zinovy.nis updated this revision to Diff 144066.
zinovy.nis added a comment.

- Optimized `containsEscapedCharacters` not to re-create `bitset<char>` 
(implicitly in `StringRef::find_first_of`) for each literal.
- Merged 2 passes for testing for allowed chars into a single one.


https://reviews.llvm.org/D45932

Files:
  clang-tidy/modernize/RawStringLiteralCheck.cpp
  clang-tidy/modernize/RawStringLiteralCheck.h
  test/clang-tidy/modernize-raw-string-literal.cpp

Index: test/clang-tidy/modernize-raw-string-literal.cpp
===================================================================
--- test/clang-tidy/modernize-raw-string-literal.cpp
+++ test/clang-tidy/modernize-raw-string-literal.cpp
@@ -40,6 +40,8 @@
 char const *const Us("goink\\\037");
 char const *const HexNonPrintable("\\\x03");
 char const *const Delete("\\\177");
+char const *const MultibyteSnowman("\xE2\x98\x83");
+// CHECK-FIXES: {{^}}char const *const MultibyteSnowman("\xE2\x98\x83");{{$}}
 
 char const *const TrailingSpace("A line \\with space. \n");
 char const *const TrailingNewLine("A single \\line.\n");
Index: clang-tidy/modernize/RawStringLiteralCheck.h
===================================================================
--- clang-tidy/modernize/RawStringLiteralCheck.h
+++ clang-tidy/modernize/RawStringLiteralCheck.h
@@ -11,6 +11,7 @@
 #define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAW_STRING_LITERAL_H
 
 #include "../ClangTidy.h"
+#include <bitset>
 
 namespace clang {
 namespace tidy {
@@ -35,6 +36,7 @@
       const StringLiteral *Literal, StringRef Replacement);
 
   std::string DelimiterStem;
+  std::bitset<1 << CHAR_BIT> DisallowedChars;
   const bool ReplaceShorterLiterals;
 };
 
Index: clang-tidy/modernize/RawStringLiteralCheck.cpp
===================================================================
--- clang-tidy/modernize/RawStringLiteralCheck.cpp
+++ clang-tidy/modernize/RawStringLiteralCheck.cpp
@@ -41,29 +41,16 @@
   return (QuotePos > 0) && (Text[QuotePos - 1] == 'R');
 }
 
-bool containsEscapedCharacters(const MatchFinder::MatchResult &Result,
-                               const StringLiteral *Literal) {
+bool containsEscapedCharacters(
+    const MatchFinder::MatchResult &Result, const StringLiteral *Literal,
+    const std::bitset<1 << CHAR_BIT> &DisallowedChars) {
   // FIXME: Handle L"", u8"", u"" and U"" literals.
   if (!Literal->isAscii())
     return false;
 
-  StringRef Bytes = Literal->getBytes();
-  // Non-printing characters disqualify this literal:
-  // \007 = \a bell
-  // \010 = \b backspace
-  // \011 = \t horizontal tab
-  // \012 = \n new line
-  // \013 = \v vertical tab
-  // \014 = \f form feed
-  // \015 = \r carriage return
-  // \177 = delete
-  if (Bytes.find_first_of(StringRef("\000\001\002\003\004\005\006\a"
-                                    "\b\t\n\v\f\r\016\017"
-                                    "\020\021\022\023\024\025\026\027"
-                                    "\030\031\032\033\034\035\036\037"
-                                    "\177",
-                                    33)) != StringRef::npos)
-    return false;
+  for (const unsigned char C : Literal->getBytes())
+    if (DisallowedChars.test(C))
+      return false;
 
   CharSourceRange CharRange = Lexer::makeFileCharRange(
       CharSourceRange::getTokenRange(Literal->getSourceRange()),
@@ -102,7 +89,28 @@
                                              ClangTidyContext *Context)
     : ClangTidyCheck(Name, Context),
       DelimiterStem(Options.get("DelimiterStem", "lit")),
-      ReplaceShorterLiterals(Options.get("ReplaceShorterLiterals", false)) {}
+      ReplaceShorterLiterals(Options.get("ReplaceShorterLiterals", false)) {
+  // Non-printing characters are disallowed:
+  // \007 = \a bell
+  // \010 = \b backspace
+  // \011 = \t horizontal tab
+  // \012 = \n new line
+  // \013 = \v vertical tab
+  // \014 = \f form feed
+  // \015 = \r carriage return
+  // \177 = delete
+  for (const unsigned char C : StringRef("\000\001\002\003\004\005\006\a"
+                                         "\b\t\n\v\f\r\016\017"
+                                         "\020\021\022\023\024\025\026\027"
+                                         "\030\031\032\033\034\035\036\037"
+                                         "\177",
+                                         33))
+    DisallowedChars.set(C);
+
+  // Upper ASCII are disallowed too.
+  for (unsigned char C = 0xFFu; C >= 0x80u; --C)
+    DisallowedChars.set(C);
+}
 
 void RawStringLiteralCheck::storeOptions(ClangTidyOptions::OptionMap &Options) {
   ClangTidyCheck::storeOptions(Options);
@@ -124,7 +132,7 @@
   if (Literal->getLocStart().isMacroID())
     return;
 
-  if (containsEscapedCharacters(Result, Literal)) {
+  if (containsEscapedCharacters(Result, Literal, DisallowedChars)) {
     std::string Replacement = asRawStringLiteral(Literal, DelimiterStem);
     if (ReplaceShorterLiterals ||
         Replacement.length() <=
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to