[PATCH] D39806: [clang-format] Support python-style comments in text protos

Krasimir Georgiev via Phabricator via cfe-commits Fri, 10 Nov 2017 04:50:49 -0800

This revision was automatically updated to reflect the committed changes.
Closed by commit rL317886: [clang-format] Support python-style comments in text 
protos (authored by krasimir).


Repository:
  rL LLVM

https://reviews.llvm.org/D39806

Files:
  cfe/trunk/lib/Format/BreakableToken.cpp
  cfe/trunk/lib/Format/FormatTokenLexer.cpp
  cfe/trunk/lib/Format/FormatTokenLexer.h
  cfe/trunk/lib/Format/UnwrappedLineParser.cpp
  cfe/trunk/unittests/Format/FormatTestComments.cpp

Index: cfe/trunk/lib/Format/FormatTokenLexer.h
===================================================================
--- cfe/trunk/lib/Format/FormatTokenLexer.h
+++ cfe/trunk/lib/Format/FormatTokenLexer.h
@@ -73,6 +73,8 @@
   // nested template parts by balancing curly braces.
   void handleTemplateStrings();
 
+  void tryParsePythonComment();
+
   bool tryMerge_TMacro();
 
   bool tryMergeConflictMarkers();
Index: cfe/trunk/lib/Format/UnwrappedLineParser.cpp
===================================================================
--- cfe/trunk/lib/Format/UnwrappedLineParser.cpp
+++ cfe/trunk/lib/Format/UnwrappedLineParser.cpp
@@ -56,7 +56,7 @@
 };
 
 static bool isLineComment(const FormatToken &FormatTok) {
-  return FormatTok.is(tok::comment) && FormatTok.TokenText.startswith("//");
+  return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
 }
 
 // Checks if \p FormatTok is a line comment that continues the line comment
Index: cfe/trunk/lib/Format/FormatTokenLexer.cpp
===================================================================
--- cfe/trunk/lib/Format/FormatTokenLexer.cpp
+++ cfe/trunk/lib/Format/FormatTokenLexer.cpp
@@ -50,6 +50,8 @@
       tryParseJSRegexLiteral();
       handleTemplateStrings();
     }
+    if (Style.Language == FormatStyle::LK_TextProto)
+      tryParsePythonComment();
     tryMergePreviousTokens();
     if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
       FirstInLineIndex = Tokens.size() - 1;
@@ -330,6 +332,27 @@
   resetLexer(SourceMgr.getFileOffset(loc));
 }
 
+void FormatTokenLexer::tryParsePythonComment() {
+  FormatToken *HashToken = Tokens.back();
+  if (HashToken->isNot(tok::hash))
+    return;
+  // Turn the remainder of this line into a comment.
+  const char *CommentBegin =
+      Lex->getBufferLocation() - HashToken->TokenText.size(); // at "#"
+  size_t From = CommentBegin - Lex->getBuffer().begin();
+  size_t To = Lex->getBuffer().find_first_of('\n', From);
+  if (To == StringRef::npos)
+    To = Lex->getBuffer().size();
+  size_t Len = To - From;
+  HashToken->Type = TT_LineComment;
+  HashToken->Tok.setKind(tok::comment);
+  HashToken->TokenText = Lex->getBuffer().substr(From, Len);
+  SourceLocation Loc = To < Lex->getBuffer().size()
+                           ? Lex->getSourceLocation(CommentBegin + Len)
+                           : SourceMgr.getLocForEndOfFile(ID);
+  resetLexer(SourceMgr.getFileOffset(Loc));
+}
+
 bool FormatTokenLexer::tryMerge_TMacro() {
   if (Tokens.size() < 4)
     return false;
Index: cfe/trunk/lib/Format/BreakableToken.cpp
===================================================================
--- cfe/trunk/lib/Format/BreakableToken.cpp
+++ cfe/trunk/lib/Format/BreakableToken.cpp
@@ -40,9 +40,15 @@
   }
 }
 
-static StringRef getLineCommentIndentPrefix(StringRef Comment) {
-  static const char *const KnownPrefixes[] = {"///<", "//!<", "///", "//",
-                                              "//!"};
+static StringRef getLineCommentIndentPrefix(StringRef Comment,
+                                            const FormatStyle &Style) {
+  static const char *const KnownCStylePrefixes[] = {"///<", "//!<", "///", "//",
+                                                    "//!"};
+  static const char *const KnownTextProtoPrefixes[] = {"//", "#"};
+  ArrayRef<const char *> KnownPrefixes(KnownCStylePrefixes);
+  if (Style.Language == FormatStyle::LK_TextProto)
+    KnownPrefixes = KnownTextProtoPrefixes;
+
   StringRef LongestPrefix;
   for (StringRef KnownPrefix : KnownPrefixes) {
     if (Comment.startswith(KnownPrefix)) {
@@ -732,7 +738,8 @@
        CurrentTok = CurrentTok->Next) {
     LastLineTok = LineTok;
     StringRef TokenText(CurrentTok->TokenText);
-    assert(TokenText.startswith("//"));
+    assert((TokenText.startswith("//") || TokenText.startswith("#")) &&
+           "unsupported line comment prefix, '//' and '#' are supported");
     size_t FirstLineIndex = Lines.size();
     TokenText.split(Lines, "\n");
     Content.resize(Lines.size());
@@ -745,8 +752,9 @@
       // We need to trim the blanks in case this is not the first line in a
       // multiline comment. Then the indent is included in Lines[i].
       StringRef IndentPrefix =
-          getLineCommentIndentPrefix(Lines[i].ltrim(Blanks));
-      assert(IndentPrefix.startswith("//"));
+          getLineCommentIndentPrefix(Lines[i].ltrim(Blanks), Style);
+      assert((TokenText.startswith("//") || TokenText.startswith("#")) &&
+             "unsupported line comment prefix, '//' and '#' are supported");
       OriginalPrefix[i] = Prefix[i] = IndentPrefix;
       if (Lines[i].size() > Prefix[i].size() &&
           isAlphanumeric(Lines[i][Prefix[i].size()])) {
@@ -760,6 +768,9 @@
           Prefix[i] = "///< ";
         else if (Prefix[i] == "//!<")
           Prefix[i] = "//!< ";
+        else if (Prefix[i] == "#" &&
+                 Style.Language == FormatStyle::LK_TextProto)
+          Prefix[i] = "# ";
       }
 
       Tokens[i] = LineTok;
Index: cfe/trunk/unittests/Format/FormatTestComments.cpp
===================================================================
--- cfe/trunk/unittests/Format/FormatTestComments.cpp
+++ cfe/trunk/unittests/Format/FormatTestComments.cpp
@@ -62,6 +62,12 @@
     return Style;
   }
 
+  FormatStyle getTextProtoStyleWithColumns(unsigned ColumnLimit) {
+    FormatStyle Style = getGoogleStyle(FormatStyle::FormatStyle::LK_TextProto);
+    Style.ColumnLimit = ColumnLimit;
+    return Style;
+  }
+
   void verifyFormat(llvm::StringRef Code,
                     const FormatStyle &Style = getLLVMStyle()) {
     EXPECT_EQ(Code.str(), format(test::messUp(Code), Style));
@@ -2872,6 +2878,85 @@
                    "    A = B;",
                    getLLVMStyleWithColumns(40)));
 }
+
+TEST_F(FormatTestComments, PythonStyleComments) {
+  // Keeps a space after '#'.
+  EXPECT_EQ("# comment\n"
+            "key: value",
+            format("#comment\n"
+                   "key:value",
+                   getTextProtoStyleWithColumns(20)));
+  EXPECT_EQ("# comment\n"
+            "key: value",
+            format("# comment\n"
+                   "key:value",
+                   getTextProtoStyleWithColumns(20)));
+  // Breaks long comment.
+  EXPECT_EQ("# comment comment\n"
+            "# comment\n"
+            "key: value",
+            format("# comment comment comment\n"
+                   "key:value",
+                   getTextProtoStyleWithColumns(20)));
+  // Indents comments.
+  EXPECT_EQ("data {\n"
+            "  # comment comment\n"
+            "  # comment\n"
+            "  key: value\n"
+            "}",
+            format("data {\n"
+                   "# comment comment comment\n"
+                   "key: value}",
+                   getTextProtoStyleWithColumns(20)));
+  EXPECT_EQ("data {\n"
+            "  # comment comment\n"
+            "  # comment\n"
+            "  key: value\n"
+            "}",
+            format("data {# comment comment comment\n"
+                   "key: value}",
+                   getTextProtoStyleWithColumns(20)));
+  // Reflows long comments.
+  EXPECT_EQ("# comment comment\n"
+            "# comment comment\n"
+            "key: value",
+            format("# comment comment comment\n"
+                   "# comment\n"
+                   "key:value",
+                   getTextProtoStyleWithColumns(20)));
+  // Breaks trailing comments.
+  EXPECT_EQ("k: val  # comment\n"
+            "        # comment\n"
+            "a: 1",
+            format("k:val#comment comment\n"
+                   "a:1",
+                   getTextProtoStyleWithColumns(20)));
+  EXPECT_EQ("id {\n"
+            "  k: val  # comment\n"
+            "          # comment\n"
+            "  # line line\n"
+            "  a: 1\n"
+            "}",
+            format("id {k:val#comment comment\n"
+                   "# line line\n"
+                   "a:1}",
+                   getTextProtoStyleWithColumns(20)));
+  // Aligns trailing comments.
+  EXPECT_EQ("k: val  # commen1\n"
+            "        # commen2\n"
+            "        # commen3\n"
+            "# commen4\n"
+            "a: 1  # commen5\n"
+            "      # commen6\n"
+            "      # commen7",
+            format("k:val#commen1 commen2\n"
+                   " # commen3\n"
+                   "# commen4\n"
+                   "a:1#commen5 commen6\n"
+                   " #commen7",
+                   getTextProtoStyleWithColumns(20)));
+}
+
 } // end namespace
 } // end namespace format
 } // end namespace clang

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D39806: [clang-format] Support python-style comments in text protos

Reply via email to