[PATCH] D115168: [clang-format] [PR49298] Sort includes pass will sort inside raw strings

2021-12-12 Thread MyDeveloperDay via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG05bea533d1fc: [clang-format] [PR49298] Sort includes pass 
will sort inside raw strings (authored by MyDeveloperDay).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115168/new/

https://reviews.llvm.org/D115168

Files:
  clang/lib/Format/Format.cpp
  clang/unittests/Format/SortIncludesTest.cpp

Index: clang/unittests/Format/SortIncludesTest.cpp
===
--- clang/unittests/Format/SortIncludesTest.cpp
+++ clang/unittests/Format/SortIncludesTest.cpp
@@ -1045,6 +1045,153 @@
   EXPECT_EQ(Unsorted, sort(Unsorted, "input.cpp", 0));
 }
 
+TEST_F(SortIncludesTest, DisableRawStringLiteralSorting) {
+
+  EXPECT_EQ("const char *t = R\"(\n"
+"#include \n"
+"#include \n"
+")\";",
+sort("const char *t = R\"(\n"
+ "#include \n"
+ "#include \n"
+ ")\";",
+ "test.cxx", 0));
+  EXPECT_EQ("const char *t = R\"x(\n"
+"#include \n"
+"#include \n"
+")x\";",
+sort("const char *t = R\"x(\n"
+ "#include \n"
+ "#include \n"
+ ")x\";",
+ "test.cxx", 0));
+  EXPECT_EQ("const char *t = R\"xyz(\n"
+"#include \n"
+"#include \n"
+")xyz\";",
+sort("const char *t = R\"xyz(\n"
+ "#include \n"
+ "#include \n"
+ ")xyz\";",
+ "test.cxx", 0));
+
+  EXPECT_EQ("#include \n"
+"#include \n"
+"const char *t = R\"(\n"
+"#include \n"
+"#include \n"
+")\";\n"
+"#include \n"
+"#include \n"
+"const char *t = R\"x(\n"
+"#include \n"
+"#include \n"
+")x\";\n"
+"#include \n"
+"#include \n"
+"const char *t = R\"xyz(\n"
+"#include \n"
+"#include \n"
+")xyz\";\n"
+"#include \n"
+"#include ",
+sort("#include \n"
+ "#include \n"
+ "const char *t = R\"(\n"
+ "#include \n"
+ "#include \n"
+ ")\";\n"
+ "#include \n"
+ "#include \n"
+ "const char *t = R\"x(\n"
+ "#include \n"
+ "#include \n"
+ ")x\";\n"
+ "#include \n"
+ "#include \n"
+ "const char *t = R\"xyz(\n"
+ "#include \n"
+ "#include \n"
+ ")xyz\";\n"
+ "#include \n"
+ "#include ",
+ "test.cc", 4));
+
+  EXPECT_EQ("const char *t = R\"AMZ029amz(\n"
+"#include \n"
+"#include \n"
+")AMZ029amz\";",
+sort("const char *t = R\"AMZ029amz(\n"
+ "#include \n"
+ "#include \n"
+ ")AMZ029amz\";",
+ "test.cxx", 0));
+
+  EXPECT_EQ("const char *t = R\"-AMZ029amz(\n"
+"#include \n"
+"#include \n"
+")-AMZ029amz\";",
+sort("const char *t = R\"-AMZ029amz(\n"
+ "#include \n"
+ "#include \n"
+ ")-AMZ029amz\";",
+ "test.cxx", 0));
+
+  EXPECT_EQ("const char *t = R\"AMZ029amz-(\n"
+"#include \n"
+"#include \n"
+")AMZ029amz-\";",
+sort("const char *t = R\"AMZ029amz-(\n"
+ "#include \n"
+ "#include \n"
+ ")AMZ029amz-\";",
+ "test.cxx", 0));
+
+  EXPECT_EQ("const char *t = R\"AM|029amz-(\n"
+"#include \n"
+"#include \n"
+")AM|029amz-\";",
+sort("const char *t = R\"AM|029amz-(\n"
+ "#include \n"
+ "#include \n"
+ ")AM|029amz-\";",
+ "test.cxx", 0));
+
+  EXPECT_EQ("const char *t = R\"AM[029amz-(\n"
+"#include \n"
+"#include \n"
+")AM[029amz-\";",
+sort("const char *t = R\"AM[029amz-(\n"
+ "#include \n"
+ "#include \n"
+ ")AM[029amz-\";",
+ "test.cxx", 0));
+
+  EXPECT_EQ("const char *t = R\"AM]029amz-(\n"
+"#include \n"
+"#include \n"
+")AM]029amz-\";",
+sort("const char *t = R\"AM]029amz-(\n"
+ "#include \n"
+ "#include \n"
+ ")AM]029amz-\";",
+ "test.cxx", 0));
+
+#define X "AMZ029amz{}+!%*=_:;',.<>|/?#~-$"
+
+  EXPECT_EQ("const char *t = R\"" X "(\n"
+"#include \n"
+"#include 

[PATCH] D115168: [clang-format] [PR49298] Sort includes pass will sort inside raw strings

2021-12-10 Thread Marek Kurdej via Phabricator via cfe-commits
curdeius accepted this revision.
curdeius added a comment.
This revision is now accepted and ready to land.

LGTM. Great!


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115168/new/

https://reviews.llvm.org/D115168

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115168: [clang-format] [PR49298] Sort includes pass will sort inside raw strings

2021-12-10 Thread MyDeveloperDay via Phabricator via cfe-commits
MyDeveloperDay updated this revision to Diff 393474.
MyDeveloperDay added a comment.

Thank @HazardyKnusperkeks  you got me over the line, I was able to escape the 
`[` but not the `]` so I used your trick there

  "R\"(([\\[A-Za-z0-9_{}#<>%:;.?*+/^&\\$|~!=,'\\-]|])*)\\("

Add unit tests to cover those cases too


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115168/new/

https://reviews.llvm.org/D115168

Files:
  clang/lib/Format/Format.cpp
  clang/unittests/Format/SortIncludesTest.cpp

Index: clang/unittests/Format/SortIncludesTest.cpp
===
--- clang/unittests/Format/SortIncludesTest.cpp
+++ clang/unittests/Format/SortIncludesTest.cpp
@@ -1045,6 +1045,153 @@
   EXPECT_EQ(Unsorted, sort(Unsorted, "input.cpp", 0));
 }
 
+TEST_F(SortIncludesTest, DisableRawStringLiteralSorting) {
+
+  EXPECT_EQ("const char *t = R\"(\n"
+"#include \n"
+"#include \n"
+")\";",
+sort("const char *t = R\"(\n"
+ "#include \n"
+ "#include \n"
+ ")\";",
+ "test.cxx", 0));
+  EXPECT_EQ("const char *t = R\"x(\n"
+"#include \n"
+"#include \n"
+")x\";",
+sort("const char *t = R\"x(\n"
+ "#include \n"
+ "#include \n"
+ ")x\";",
+ "test.cxx", 0));
+  EXPECT_EQ("const char *t = R\"xyz(\n"
+"#include \n"
+"#include \n"
+")xyz\";",
+sort("const char *t = R\"xyz(\n"
+ "#include \n"
+ "#include \n"
+ ")xyz\";",
+ "test.cxx", 0));
+
+  EXPECT_EQ("#include \n"
+"#include \n"
+"const char *t = R\"(\n"
+"#include \n"
+"#include \n"
+")\";\n"
+"#include \n"
+"#include \n"
+"const char *t = R\"x(\n"
+"#include \n"
+"#include \n"
+")x\";\n"
+"#include \n"
+"#include \n"
+"const char *t = R\"xyz(\n"
+"#include \n"
+"#include \n"
+")xyz\";\n"
+"#include \n"
+"#include ",
+sort("#include \n"
+ "#include \n"
+ "const char *t = R\"(\n"
+ "#include \n"
+ "#include \n"
+ ")\";\n"
+ "#include \n"
+ "#include \n"
+ "const char *t = R\"x(\n"
+ "#include \n"
+ "#include \n"
+ ")x\";\n"
+ "#include \n"
+ "#include \n"
+ "const char *t = R\"xyz(\n"
+ "#include \n"
+ "#include \n"
+ ")xyz\";\n"
+ "#include \n"
+ "#include ",
+ "test.cc", 4));
+
+  EXPECT_EQ("const char *t = R\"AMZ029amz(\n"
+"#include \n"
+"#include \n"
+")AMZ029amz\";",
+sort("const char *t = R\"AMZ029amz(\n"
+ "#include \n"
+ "#include \n"
+ ")AMZ029amz\";",
+ "test.cxx", 0));
+
+  EXPECT_EQ("const char *t = R\"-AMZ029amz(\n"
+"#include \n"
+"#include \n"
+")-AMZ029amz\";",
+sort("const char *t = R\"-AMZ029amz(\n"
+ "#include \n"
+ "#include \n"
+ ")-AMZ029amz\";",
+ "test.cxx", 0));
+
+  EXPECT_EQ("const char *t = R\"AMZ029amz-(\n"
+"#include \n"
+"#include \n"
+")AMZ029amz-\";",
+sort("const char *t = R\"AMZ029amz-(\n"
+ "#include \n"
+ "#include \n"
+ ")AMZ029amz-\";",
+ "test.cxx", 0));
+
+  EXPECT_EQ("const char *t = R\"AM|029amz-(\n"
+"#include \n"
+"#include \n"
+")AM|029amz-\";",
+sort("const char *t = R\"AM|029amz-(\n"
+ "#include \n"
+ "#include \n"
+ ")AM|029amz-\";",
+ "test.cxx", 0));
+
+  EXPECT_EQ("const char *t = R\"AM[029amz-(\n"
+"#include \n"
+"#include \n"
+")AM[029amz-\";",
+sort("const char *t = R\"AM[029amz-(\n"
+ "#include \n"
+ "#include \n"
+ ")AM[029amz-\";",
+ "test.cxx", 0));
+
+  EXPECT_EQ("const char *t = R\"AM]029amz-(\n"
+"#include \n"
+"#include \n"
+")AM]029amz-\";",
+sort("const char *t = R\"AM]029amz-(\n"
+ "#include \n"
+ "#include \n"
+ ")AM]029amz-\";",
+ "test.cxx", 0));
+
+#define X "AMZ029amz{}+!%*=_:;',.<>|/?#~-$"
+
+  EXPECT_EQ("const char *t 

[PATCH] D115168: [clang-format] [PR49298] Sort includes pass will sort inside raw strings

2021-12-10 Thread Björn Schäpers via Phabricator via cfe-commits
HazardyKnusperkeks added a comment.

In D115168#3185199 , @MyDeveloperDay 
wrote:

> Add support for | but still can't get [] to work in the regex (checked the 
> unit tests for Support/Regex and its not clear if its supported)

How about `"R\"(([A-Za-z0-9_{}#<>%:;.?*+/^&\\$|~!=,'\\-]|]|\[)*)\\("`, not 
putting the [] symbols within the [].

Does it work with std::regex which should also be available?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115168/new/

https://reviews.llvm.org/D115168

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115168: [clang-format] [PR49298] Sort includes pass will sort inside raw strings

2021-12-10 Thread MyDeveloperDay via Phabricator via cfe-commits
MyDeveloperDay updated this revision to Diff 393448.
MyDeveloperDay added a comment.

Add support for | but still can't get [] to work in the regex (checked the unit 
tests for Support/Regex and its not clear if its supported)


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115168/new/

https://reviews.llvm.org/D115168

Files:
  clang/lib/Format/Format.cpp
  clang/unittests/Format/SortIncludesTest.cpp

Index: clang/unittests/Format/SortIncludesTest.cpp
===
--- clang/unittests/Format/SortIncludesTest.cpp
+++ clang/unittests/Format/SortIncludesTest.cpp
@@ -1045,6 +1045,133 @@
   EXPECT_EQ(Unsorted, sort(Unsorted, "input.cpp", 0));
 }
 
+TEST_F(SortIncludesTest, DisableRawStringLiteralSorting) {
+
+  EXPECT_EQ("const char *t = R\"(\n"
+"#include \n"
+"#include \n"
+")\";",
+sort("const char *t = R\"(\n"
+ "#include \n"
+ "#include \n"
+ ")\";",
+ "test.cxx", 0));
+  EXPECT_EQ("const char *t = R\"x(\n"
+"#include \n"
+"#include \n"
+")x\";",
+sort("const char *t = R\"x(\n"
+ "#include \n"
+ "#include \n"
+ ")x\";",
+ "test.cxx", 0));
+  EXPECT_EQ("const char *t = R\"xyz(\n"
+"#include \n"
+"#include \n"
+")xyz\";",
+sort("const char *t = R\"xyz(\n"
+ "#include \n"
+ "#include \n"
+ ")xyz\";",
+ "test.cxx", 0));
+
+  EXPECT_EQ("#include \n"
+"#include \n"
+"const char *t = R\"(\n"
+"#include \n"
+"#include \n"
+")\";\n"
+"#include \n"
+"#include \n"
+"const char *t = R\"x(\n"
+"#include \n"
+"#include \n"
+")x\";\n"
+"#include \n"
+"#include \n"
+"const char *t = R\"xyz(\n"
+"#include \n"
+"#include \n"
+")xyz\";\n"
+"#include \n"
+"#include ",
+sort("#include \n"
+ "#include \n"
+ "const char *t = R\"(\n"
+ "#include \n"
+ "#include \n"
+ ")\";\n"
+ "#include \n"
+ "#include \n"
+ "const char *t = R\"x(\n"
+ "#include \n"
+ "#include \n"
+ ")x\";\n"
+ "#include \n"
+ "#include \n"
+ "const char *t = R\"xyz(\n"
+ "#include \n"
+ "#include \n"
+ ")xyz\";\n"
+ "#include \n"
+ "#include ",
+ "test.cc", 4));
+
+  EXPECT_EQ("const char *t = R\"AMZ029amz(\n"
+"#include \n"
+"#include \n"
+")AMZ029amz\";",
+sort("const char *t = R\"AMZ029amz(\n"
+ "#include \n"
+ "#include \n"
+ ")AMZ029amz\";",
+ "test.cxx", 0));
+
+  EXPECT_EQ("const char *t = R\"-AMZ029amz(\n"
+"#include \n"
+"#include \n"
+")-AMZ029amz\";",
+sort("const char *t = R\"-AMZ029amz(\n"
+ "#include \n"
+ "#include \n"
+ ")-AMZ029amz\";",
+ "test.cxx", 0));
+
+  EXPECT_EQ("const char *t = R\"AMZ029amz-(\n"
+"#include \n"
+"#include \n"
+")AMZ029amz-\";",
+sort("const char *t = R\"AMZ029amz-(\n"
+ "#include \n"
+ "#include \n"
+ ")AMZ029amz-\";",
+ "test.cxx", 0));
+
+  EXPECT_EQ("const char *t = R\"AM|029amz-(\n"
+"#include \n"
+"#include \n"
+")AM|029amz-\";",
+sort("const char *t = R\"AM|029amz-(\n"
+ "#include \n"
+ "#include \n"
+ ")AM|029amz-\";",
+ "test.cxx", 0));
+
+#define X "AMZ029amz{}+!%*=_:;',.<>|/?#~-$"
+
+  EXPECT_EQ("const char *t = R\"" X "(\n"
+"#include \n"
+"#include \n"
+")" X "\";",
+sort("const char *t = R\"" X "(\n"
+ "#include \n"
+ "#include \n"
+ ")" X "\";",
+ "test.cxx", 0));
+
+#undef X
+}
+
 } // end namespace
 } // end namespace format
 } // end namespace clang
Index: clang/lib/Format/Format.cpp
===
--- clang/lib/Format/Format.cpp
+++ clang/lib/Format/Format.cpp
@@ -2586,12 +2586,31 @@
   bool MainIncludeFound = false;
   bool FormattingOff = false;
 
+  llvm::Regex RawStringRegex(
+  "R\"([A-Za-z0-9_{}#<>%:;.?*+/^&\\$|~!=,'\\-]*)\\(");
+  SmallVector 

[PATCH] D115168: [clang-format] [PR49298] Sort includes pass will sort inside raw strings

2021-12-09 Thread Marek Kurdej via Phabricator via cfe-commits
curdeius added inline comments.



Comment at: clang/lib/Format/Format.cpp:2590
+  llvm::Regex RawStringRegex(
+  "R\"([A-Za-z0-9_{}#<>%:;.?*+/^&\\$|~!=,'\\-]*)\\(");
+  SmallVector RawStringMatches;

MyDeveloperDay wrote:
> curdeius wrote:
> > I'm picky but you missed `[` and `]`, no?
> I couldn't get them to work, even if I escaped them.. let me go look at the 
> Regex unit tests
You can try putting`]` at the beginning (just after the opening`[`). It 
*should* work.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115168/new/

https://reviews.llvm.org/D115168

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115168: [clang-format] [PR49298] Sort includes pass will sort inside raw strings

2021-12-09 Thread MyDeveloperDay via Phabricator via cfe-commits
MyDeveloperDay added inline comments.



Comment at: clang/lib/Format/Format.cpp:2590
+  llvm::Regex RawStringRegex(
+  "R\"([A-Za-z0-9_{}#<>%:;.?*+/^&\\$|~!=,'\\-]*)\\(");
+  SmallVector RawStringMatches;

curdeius wrote:
> I'm picky but you missed `[` and `]`, no?
I couldn't get them to work, even if I escaped them.. let me go look at the 
Regex unit tests


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115168/new/

https://reviews.llvm.org/D115168

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115168: [clang-format] [PR49298] Sort includes pass will sort inside raw strings

2021-12-09 Thread Marek Kurdej via Phabricator via cfe-commits
curdeius added inline comments.



Comment at: clang/lib/Format/Format.cpp:2590
+  llvm::Regex RawStringRegex(
+  "R\"([A-Za-z0-9_{}#<>%:;.?*+/^&\\$|~!=,'\\-]*)\\(");
+  SmallVector RawStringMatches;

I'm picky but you missed `[` and `]`, no?



Comment at: clang/lib/Format/Format.cpp:2604
+// Skip past until we think we are at the rawstring literal close.
+if (RawStringRegex.match(Trimmed, )) {
+  std::string CharSequence = RawStringMatches[1].str();

Will this match the first or the last occurrence? If it's the last then it's 
ok. Otherwise, we should change it. Also, please test something along the lines 
of:
```
R"x(...)x" ... on the same line R"y(
#include "..."
)y"
```



Comment at: clang/unittests/Format/SortIncludesTest.cpp:1150
+
+  // Missing |`
+#define X "AMZ029amz{}+!%*=_:;',.<>/?#~-$"

Can't `|` be tested? Same for `[` and `]`?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115168/new/

https://reviews.llvm.org/D115168

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115168: [clang-format] [PR49298] Sort includes pass will sort inside raw strings

2021-12-09 Thread Björn Schäpers via Phabricator via cfe-commits
HazardyKnusperkeks added inline comments.



Comment at: clang/unittests/Format/SortIncludesTest.cpp:1162
+ "test.cxx", 0));
+}
+

You should add a `#undef X`.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115168/new/

https://reviews.llvm.org/D115168

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115168: [clang-format] [PR49298] Sort includes pass will sort inside raw strings

2021-12-09 Thread MyDeveloperDay via Phabricator via cfe-commits
MyDeveloperDay updated this revision to Diff 393188.
MyDeveloperDay added a comment.

Add a suitable amount of C++ madness, (to the limit of what I could coax the 
regex to accept)


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115168/new/

https://reviews.llvm.org/D115168

Files:
  clang/lib/Format/Format.cpp
  clang/unittests/Format/SortIncludesTest.cpp

Index: clang/unittests/Format/SortIncludesTest.cpp
===
--- clang/unittests/Format/SortIncludesTest.cpp
+++ clang/unittests/Format/SortIncludesTest.cpp
@@ -1045,6 +1045,122 @@
   EXPECT_EQ(Unsorted, sort(Unsorted, "input.cpp", 0));
 }
 
+TEST_F(SortIncludesTest, DisableRawStringLiteralSorting) {
+
+  EXPECT_EQ("const char *t = R\"(\n"
+"#include \n"
+"#include \n"
+")\";",
+sort("const char *t = R\"(\n"
+ "#include \n"
+ "#include \n"
+ ")\";",
+ "test.cxx", 0));
+  EXPECT_EQ("const char *t = R\"x(\n"
+"#include \n"
+"#include \n"
+")x\";",
+sort("const char *t = R\"x(\n"
+ "#include \n"
+ "#include \n"
+ ")x\";",
+ "test.cxx", 0));
+  EXPECT_EQ("const char *t = R\"xyz(\n"
+"#include \n"
+"#include \n"
+")xyz\";",
+sort("const char *t = R\"xyz(\n"
+ "#include \n"
+ "#include \n"
+ ")xyz\";",
+ "test.cxx", 0));
+
+  EXPECT_EQ("#include \n"
+"#include \n"
+"const char *t = R\"(\n"
+"#include \n"
+"#include \n"
+")\";\n"
+"#include \n"
+"#include \n"
+"const char *t = R\"x(\n"
+"#include \n"
+"#include \n"
+")x\";\n"
+"#include \n"
+"#include \n"
+"const char *t = R\"xyz(\n"
+"#include \n"
+"#include \n"
+")xyz\";\n"
+"#include \n"
+"#include ",
+sort("#include \n"
+ "#include \n"
+ "const char *t = R\"(\n"
+ "#include \n"
+ "#include \n"
+ ")\";\n"
+ "#include \n"
+ "#include \n"
+ "const char *t = R\"x(\n"
+ "#include \n"
+ "#include \n"
+ ")x\";\n"
+ "#include \n"
+ "#include \n"
+ "const char *t = R\"xyz(\n"
+ "#include \n"
+ "#include \n"
+ ")xyz\";\n"
+ "#include \n"
+ "#include ",
+ "test.cc", 4));
+
+  EXPECT_EQ("const char *t = R\"AMZ029amz(\n"
+"#include \n"
+"#include \n"
+")AMZ029amz\";",
+sort("const char *t = R\"AMZ029amz(\n"
+ "#include \n"
+ "#include \n"
+ ")AMZ029amz\";",
+ "test.cxx", 0));
+
+  EXPECT_EQ("const char *t = R\"-AMZ029amz(\n"
+"#include \n"
+"#include \n"
+")-AMZ029amz\";",
+sort("const char *t = R\"-AMZ029amz(\n"
+ "#include \n"
+ "#include \n"
+ ")-AMZ029amz\";",
+ "test.cxx", 0));
+
+  EXPECT_EQ("const char *t = R\"AMZ029amz-(\n"
+"#include \n"
+"#include \n"
+")AMZ029amz-\";",
+sort("const char *t = R\"AMZ029amz-(\n"
+ "#include \n"
+ "#include \n"
+ ")AMZ029amz-\";",
+ "test.cxx", 0));
+
+  // Missing |`
+#define X "AMZ029amz{}+!%*=_:;',.<>/?#~-$"
+
+  EXPECT_EQ("const char *t = R\"" X "(\n"
+"#include \n"
+"#include \n"
+")" X "\";",
+sort("const char *t = R\"" X "(\n"
+ "#include \n"
+ "#include \n"
+ ")" X "\";",
+ "test.cxx", 0));
+}
+
 } // end namespace
 } // end namespace format
 } // end namespace clang
Index: clang/lib/Format/Format.cpp
===
--- clang/lib/Format/Format.cpp
+++ clang/lib/Format/Format.cpp
@@ -2586,12 +2586,31 @@
   bool MainIncludeFound = false;
   bool FormattingOff = false;
 
+  llvm::Regex RawStringRegex(
+  "R\"([A-Za-z0-9_{}#<>%:;.?*+/^&\\$|~!=,'\\-]*)\\(");
+  SmallVector RawStringMatches;
+  std::string RawStringTermination = ")\"";
+
   for (;;) {
 auto Pos = Code.find('\n', SearchFrom);
 StringRef Line =
 Code.substr(Prev, (Pos != StringRef::npos ? Pos : Code.size()) - Prev);
 
 StringRef Trimmed = Line.trim();
+
+// #includes inside raw string literals need to be ignored.
+// or we will sort the contents 

[PATCH] D115168: [clang-format] [PR49298] Sort includes pass will sort inside raw strings

2021-12-08 Thread MyDeveloperDay via Phabricator via cfe-commits
MyDeveloperDay added a comment.

`So, you missed the digits and the characters: _{}[]#<>%:;.?*+-/^&|~!=,'.`

The standard is nuts sometimes... let me add that madness in ;-)


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115168/new/

https://reviews.llvm.org/D115168

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115168: [clang-format] [PR49298] Sort includes pass will sort inside raw strings

2021-12-08 Thread MyDeveloperDay via Phabricator via cfe-commits
MyDeveloperDay added inline comments.



Comment at: clang/lib/Format/Format.cpp:2605-2607
+  if (!CharSequence.empty()) {
+RawStringTermination = ")" + CharSequence + "\"";
+  }

curdeius wrote:
> Since `CharSequence` is empty, you might want to remove the if altogether and 
> create `RawStringTermination` unconditionally.
> 
> Oh, actually, as it's used in the for loop, you *have to* reassign 
> `RawStringTermination`, otherwise the code like:
> ```
> R"x(...
> ...)x"; // RawStringTermination is "x"
> R"(...
> ...); // RawStringTermination would still be "x"
> #include  // not sorted but it should
> #include  // not sorted but it should
> ```
> will misbehave.
> Please create a test case for this.
Correct I need to remove the `if`


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115168/new/

https://reviews.llvm.org/D115168

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115168: [clang-format] [PR49298] Sort includes pass will sort inside raw strings

2021-12-08 Thread Marek Kurdej via Phabricator via cfe-commits
curdeius requested changes to this revision.
curdeius added inline comments.
This revision now requires changes to proceed.



Comment at: clang/lib/Format/Format.cpp:2589
 
+  llvm::Regex RawStringRegex("R\"([A-Za-z]*)\\(");
+  SmallVector RawStringMatches;

A raw string literal is:
```
prefix(optional) R"d-char-sequence(optional) 
(r-char-sequence(optional))d-char-sequence(optional)"  

d-char-sequence -   A sequence of one or more d-chars, at most 16 
characters long
d-char  -   A character from the basic source character set (until 
C++23)basic character set (since C++23), except parentheses, backslash and 
spaces
```
So, you missed the digits and the characters: `_{}[]#<>%:;.?*+-/^&|~!=,'`.
Please add a test case.
Mind the need to escape the square brackets `[]` and the minus sign `-` in the 
regexp (the latter can be put at the beginning or at the end too).

Cf. https://en.cppreference.com/w/cpp/language/string_literal, 
https://godbolt.org/z/rb61WzMcs



Comment at: clang/lib/Format/Format.cpp:2605-2607
+  if (!CharSequence.empty()) {
+RawStringTermination = ")" + CharSequence + "\"";
+  }

Since `CharSequence` is empty, you might want to remove the if altogether and 
create `RawStringTermination` unconditionally.

Oh, actually, as it's used in the for loop, you *have to* reassign 
`RawStringTermination`, otherwise the code like:
```
R"x(...
...)x"; // RawStringTermination is "x"
R"(...
...); // RawStringTermination would still be "x"
#include  // not sorted but it should
#include  // not sorted but it should
```
will misbehave.
Please create a test case for this.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115168/new/

https://reviews.llvm.org/D115168

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115168: [clang-format] [PR49298] Sort includes pass will sort inside raw strings

2021-12-08 Thread MyDeveloperDay via Phabricator via cfe-commits
MyDeveloperDay updated this revision to Diff 392674.
MyDeveloperDay added a comment.

Allow for option Raw String CharSequences


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115168/new/

https://reviews.llvm.org/D115168

Files:
  clang/lib/Format/Format.cpp
  clang/unittests/Format/SortIncludesTest.cpp

Index: clang/unittests/Format/SortIncludesTest.cpp
===
--- clang/unittests/Format/SortIncludesTest.cpp
+++ clang/unittests/Format/SortIncludesTest.cpp
@@ -1045,6 +1045,79 @@
   EXPECT_EQ(Unsorted, sort(Unsorted, "input.cpp", 0));
 }
 
+TEST_F(SortIncludesTest, DisableRawStringLiteralSorting) {
+
+  EXPECT_EQ("const char *t = R\"(\n"
+"#include \n"
+"#include \n"
+")\";",
+sort("const char *t = R\"(\n"
+ "#include \n"
+ "#include \n"
+ ")\";",
+ "test.cxx", 0));
+  EXPECT_EQ("const char *t = R\"x(\n"
+"#include \n"
+"#include \n"
+")x\";",
+sort("const char *t = R\"x(\n"
+ "#include \n"
+ "#include \n"
+ ")x\";",
+ "test.cxx", 0));
+  EXPECT_EQ("const char *t = R\"xyz(\n"
+"#include \n"
+"#include \n"
+")xyz\";",
+sort("const char *t = R\"xyz(\n"
+ "#include \n"
+ "#include \n"
+ ")xyz\";",
+ "test.cxx", 0));
+
+  EXPECT_EQ("#include \n"
+"#include \n"
+"const char *t = R\"(\n"
+"#include \n"
+"#include \n"
+")\";\n"
+"#include \n"
+"#include \n"
+"const char *t = R\"x(\n"
+"#include \n"
+"#include \n"
+")x\";\n"
+"#include \n"
+"#include \n"
+"const char *t = R\"xyz(\n"
+"#include \n"
+"#include \n"
+")xyz\";\n"
+"#include \n"
+"#include ",
+sort("#include \n"
+ "#include \n"
+ "const char *t = R\"(\n"
+ "#include \n"
+ "#include \n"
+ ")\";\n"
+ "#include \n"
+ "#include \n"
+ "const char *t = R\"x(\n"
+ "#include \n"
+ "#include \n"
+ ")x\";\n"
+ "#include \n"
+ "#include \n"
+ "const char *t = R\"xyz(\n"
+ "#include \n"
+ "#include \n"
+ ")xyz\";\n"
+ "#include \n"
+ "#include ",
+ "test.cc", 4));
+}
+
 } // end namespace
 } // end namespace format
 } // end namespace clang
Index: clang/lib/Format/Format.cpp
===
--- clang/lib/Format/Format.cpp
+++ clang/lib/Format/Format.cpp
@@ -2586,12 +2586,32 @@
   bool MainIncludeFound = false;
   bool FormattingOff = false;
 
+  llvm::Regex RawStringRegex("R\"([A-Za-z]*)\\(");
+  SmallVector RawStringMatches;
+  std::string RawStringTermination = ")\"";
+
   for (;;) {
 auto Pos = Code.find('\n', SearchFrom);
 StringRef Line =
 Code.substr(Prev, (Pos != StringRef::npos ? Pos : Code.size()) - Prev);
 
 StringRef Trimmed = Line.trim();
+
+// #includes inside raw string literals need to be ignored.
+// or we will sort the contents of the string.
+// Skip past until we think we are at the rawstring literal close.
+if (RawStringRegex.match(Trimmed, )) {
+  std::string CharSequence = RawStringMatches[1].str();
+  if (!CharSequence.empty()) {
+RawStringTermination = ")" + CharSequence + "\"";
+  }
+  FormattingOff = true;
+}
+
+if (Trimmed.contains(RawStringTermination)) {
+  FormattingOff = false;
+}
+
 if (Trimmed == "// clang-format off" || Trimmed == "/* clang-format off */")
   FormattingOff = true;
 else if (Trimmed == "// clang-format on" ||
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115168: [clang-format] [PR49298] Sort includes pass will sort inside raw strings

2021-12-06 Thread Björn Schäpers via Phabricator via cfe-commits
HazardyKnusperkeks added inline comments.



Comment at: clang/lib/Format/Format.cpp:2598
+// Skip past until we think we we the rawstring literal close.
+if (Trimmed.contains("R\"(")) {
+  FormattingOff = true;

curdeius wrote:
> HazardyKnusperkeks wrote:
> > curdeius wrote:
> > > This won't be enough for raw literals with custom delimiter e.g. 
> > > `R"xyz(...)xyz"`.
> > > But well, without any code parsing, this function will always be easily 
> > > outsmarted I guess.
> > > Maybe we can use regexp here and save the delimiter? (And then check it 
> > > below)
> > And then we need to check first for closing, since we can close and reopen 
> > on the same line, or not?
> It's done below anyway (i.e. `Trimmed` is still the same line), nope?
Yes but if we save the delimiter we will save the new one, before checking for 
the old one to close.
I was just thinking we must close the old one correctly before "opening" the 
new one. But after rethinking about it, that shouldn't matter.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115168/new/

https://reviews.llvm.org/D115168

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115168: [clang-format] [PR49298] Sort includes pass will sort inside raw strings

2021-12-06 Thread Marek Kurdej via Phabricator via cfe-commits
curdeius added inline comments.



Comment at: clang/lib/Format/Format.cpp:2598
+// Skip past until we think we we the rawstring literal close.
+if (Trimmed.contains("R\"(")) {
+  FormattingOff = true;

HazardyKnusperkeks wrote:
> curdeius wrote:
> > This won't be enough for raw literals with custom delimiter e.g. 
> > `R"xyz(...)xyz"`.
> > But well, without any code parsing, this function will always be easily 
> > outsmarted I guess.
> > Maybe we can use regexp here and save the delimiter? (And then check it 
> > below)
> And then we need to check first for closing, since we can close and reopen on 
> the same line, or not?
It's done below anyway (i.e. `Trimmed` is still the same line), nope?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115168/new/

https://reviews.llvm.org/D115168

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115168: [clang-format] [PR49298] Sort includes pass will sort inside raw strings

2021-12-06 Thread Björn Schäpers via Phabricator via cfe-commits
HazardyKnusperkeks added inline comments.



Comment at: clang/lib/Format/Format.cpp:2598
+// Skip past until we think we we the rawstring literal close.
+if (Trimmed.contains("R\"(")) {
+  FormattingOff = true;

curdeius wrote:
> This won't be enough for raw literals with custom delimiter e.g. 
> `R"xyz(...)xyz"`.
> But well, without any code parsing, this function will always be easily 
> outsmarted I guess.
> Maybe we can use regexp here and save the delimiter? (And then check it below)
And then we need to check first for closing, since we can close and reopen on 
the same line, or not?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115168/new/

https://reviews.llvm.org/D115168

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115168: [clang-format] [PR49298] Sort includes pass will sort inside raw strings

2021-12-06 Thread Marek Kurdej via Phabricator via cfe-commits
curdeius added inline comments.



Comment at: clang/lib/Format/Format.cpp:2597
+// or we will sort the contents of the string.
+// Skip past until we think we we the rawstring literal close.
+if (Trimmed.contains("R\"(")) {

"we we"?



Comment at: clang/lib/Format/Format.cpp:2598
+// Skip past until we think we we the rawstring literal close.
+if (Trimmed.contains("R\"(")) {
+  FormattingOff = true;

This won't be enough for raw literals with custom delimiter e.g. 
`R"xyz(...)xyz"`.
But well, without any code parsing, this function will always be easily 
outsmarted I guess.
Maybe we can use regexp here and save the delimiter? (And then check it below)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115168/new/

https://reviews.llvm.org/D115168

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115168: [clang-format] [PR49298] Sort includes pass will sort inside raw strings

2021-12-06 Thread MyDeveloperDay via Phabricator via cfe-commits
MyDeveloperDay created this revision.
MyDeveloperDay added reviewers: HazardyKnusperkeks, curdeius, owenpan, njames93.
MyDeveloperDay added projects: clang, clang-format.
MyDeveloperDay requested review of this revision.

https://bugs.llvm.org/show_bug.cgi?id=49298

clang-format does not respect raw string literals when sorting includes

  const char *RawStr = R"(
  #include "headerB.h"
  #include "headerA.h"
  )";

Running clang-format over with SortIncludes enabled transforms this code to:

  const char *RawStr = R"(
  #include "headerA.h"
  #include "headerB.h"
  )";

The following code tries to minimize this impact during IncludeSorting, by 
treating R"( and )" as equivalent of // clang-format off/on


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D115168

Files:
  clang/lib/Format/Format.cpp
  clang/unittests/Format/SortIncludesTest.cpp


Index: clang/unittests/Format/SortIncludesTest.cpp
===
--- clang/unittests/Format/SortIncludesTest.cpp
+++ clang/unittests/Format/SortIncludesTest.cpp
@@ -1045,6 +1045,37 @@
   EXPECT_EQ(Unsorted, sort(Unsorted, "input.cpp", 0));
 }
 
+TEST_F(SortIncludesTest, DisableRawStringLiteralSorting) {
+
+  EXPECT_EQ("const char *t = R\"(\n"
+"#include \n"
+"#include \n"
+")\";",
+sort("const char *t = R\"(\n"
+ "#include \n"
+ "#include \n"
+ ")\";",
+ "test.cxx", 0));
+
+  EXPECT_EQ("#include \n"
+"#include \n"
+"const char *t = R\"(\n"
+"#include \n"
+"#include \n"
+")\";\n"
+"#include \n"
+"#include ",
+sort("#include \n"
+ "#include \n"
+ "const char *t = R\"(\n"
+ "#include \n"
+ "#include \n"
+ ")\";\n"
+ "#include \n"
+ "#include ",
+ "test.cc", 2));
+}
+
 } // end namespace
 } // end namespace format
 } // end namespace clang
Index: clang/lib/Format/Format.cpp
===
--- clang/lib/Format/Format.cpp
+++ clang/lib/Format/Format.cpp
@@ -2590,8 +2590,18 @@
 auto Pos = Code.find('\n', SearchFrom);
 StringRef Line =
 Code.substr(Prev, (Pos != StringRef::npos ? Pos : Code.size()) - Prev);
-
 StringRef Trimmed = Line.trim();
+
+// #includes inside raw string literals need to be ignored.
+// or we will sort the contents of the string.
+// Skip past until we think we we the rawstring literal close.
+if (Trimmed.contains("R\"(")) {
+  FormattingOff = true;
+}
+if (Trimmed.contains(")\"")) {
+  FormattingOff = false;
+}
+
 if (Trimmed == "// clang-format off" || Trimmed == "/* clang-format off 
*/")
   FormattingOff = true;
 else if (Trimmed == "// clang-format on" ||


Index: clang/unittests/Format/SortIncludesTest.cpp
===
--- clang/unittests/Format/SortIncludesTest.cpp
+++ clang/unittests/Format/SortIncludesTest.cpp
@@ -1045,6 +1045,37 @@
   EXPECT_EQ(Unsorted, sort(Unsorted, "input.cpp", 0));
 }
 
+TEST_F(SortIncludesTest, DisableRawStringLiteralSorting) {
+
+  EXPECT_EQ("const char *t = R\"(\n"
+"#include \n"
+"#include \n"
+")\";",
+sort("const char *t = R\"(\n"
+ "#include \n"
+ "#include \n"
+ ")\";",
+ "test.cxx", 0));
+
+  EXPECT_EQ("#include \n"
+"#include \n"
+"const char *t = R\"(\n"
+"#include \n"
+"#include \n"
+")\";\n"
+"#include \n"
+"#include ",
+sort("#include \n"
+ "#include \n"
+ "const char *t = R\"(\n"
+ "#include \n"
+ "#include \n"
+ ")\";\n"
+ "#include \n"
+ "#include ",
+ "test.cc", 2));
+}
+
 } // end namespace
 } // end namespace format
 } // end namespace clang
Index: clang/lib/Format/Format.cpp
===
--- clang/lib/Format/Format.cpp
+++ clang/lib/Format/Format.cpp
@@ -2590,8 +2590,18 @@
 auto Pos = Code.find('\n', SearchFrom);
 StringRef Line =
 Code.substr(Prev, (Pos != StringRef::npos ? Pos : Code.size()) - Prev);
-
 StringRef Trimmed = Line.trim();
+
+// #includes inside raw string literals need to be ignored.
+// or we will sort the contents of the string.
+// Skip past until we think we we the rawstring literal close.
+if (Trimmed.contains("R\"(")) {
+  FormattingOff = true;
+}
+if (Trimmed.contains(")\"")) {
+  FormattingOff = false;
+}
+
 if (Trimmed == "// clang-format off" || Trimmed == "/*