[PATCH] Calculate and store ColumnWidth instead of CodePointCount in FormatTokens.

Alexander Kornienko Thu, 05 Sep 2013 17:58:42 -0700

Hi klimek, djasper,

This fixes various issues with mixed tabs and spaces handling, e.g.
when realigning block comments.


http://llvm-reviews.chandlerc.com/D1608

Files:
  lib/Format/BreakableToken.cpp
  lib/Format/ContinuationIndenter.cpp
  lib/Format/Format.cpp
  lib/Format/FormatToken.cpp
  lib/Format/FormatToken.h
  lib/Format/TokenAnnotator.cpp
  unittests/Format/FormatTest.cpp

Index: lib/Format/BreakableToken.cpp
===================================================================
--- lib/Format/BreakableToken.cpp
+++ lib/Format/BreakableToken.cpp
@@ -337,8 +337,10 @@
   // if leading tabs are intermixed with spaces, that is not a high priority.
 
   // Adjust the start column uniformly accross all lines.
-  StartOfLineColumn[LineIndex] =
-      std::max<int>(0, Whitespace.size() + IndentDelta);
+  StartOfLineColumn[LineIndex] = std::max<int>(
+      0,
+      encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) +
+          IndentDelta);
 }
 
 unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }
Index: lib/Format/ContinuationIndenter.cpp
===================================================================
--- lib/Format/ContinuationIndenter.cpp
+++ lib/Format/ContinuationIndenter.cpp
@@ -210,7 +210,7 @@
                                State.NextToken->WhitespaceRange.getEnd()) -
                            SourceMgr.getSpellingColumnNumber(
                                State.NextToken->WhitespaceRange.getBegin());
-    State.Column += WhitespaceLength + State.NextToken->CodePointCount;
+    State.Column += WhitespaceLength + State.NextToken->ColumnWidth;
     State.NextToken = State.NextToken->Next;
     return 0;
   }
@@ -261,11 +261,11 @@
                  State.Line->StartsDefinition))) {
       State.Column = State.Stack.back().Indent;
     } else if (Current.Type == TT_ObjCSelectorName) {
-      if (State.Stack.back().ColonPos > Current.CodePointCount) {
-        State.Column = State.Stack.back().ColonPos - Current.CodePointCount;
+      if (State.Stack.back().ColonPos > Current.ColumnWidth) {
+        State.Column = State.Stack.back().ColonPos - Current.ColumnWidth;
       } else {
         State.Column = State.Stack.back().Indent;
-        State.Stack.back().ColonPos = State.Column + Current.CodePointCount;
+        State.Stack.back().ColonPos = State.Column + Current.ColumnWidth;
       }
     } else if (Current.is(tok::l_square) && Current.Type != TT_ObjCMethodExpr &&
                Current.Type != TT_LambdaLSquare) {
@@ -311,7 +311,7 @@
     if (!Current.isTrailingComment())
       State.Stack.back().LastSpace = State.Column;
     if (Current.isMemberAccess())
-      State.Stack.back().LastSpace += Current.CodePointCount;
+      State.Stack.back().LastSpace += Current.ColumnWidth;
     State.StartOfLineLevel = State.ParenLevel;
     State.LowestLevelOnLine = State.ParenLevel;
 
@@ -347,8 +347,8 @@
       State.Stack.back().VariablePos = State.Column;
       // Move over * and & if they are bound to the variable name.
       const FormatToken *Tok = &Previous;
-      while (Tok && State.Stack.back().VariablePos >= Tok->CodePointCount) {
-        State.Stack.back().VariablePos -= Tok->CodePointCount;
+      while (Tok && State.Stack.back().VariablePos >= Tok->ColumnWidth) {
+        State.Stack.back().VariablePos -= Tok->ColumnWidth;
         if (Tok->SpacesRequiredBefore != 0)
           break;
         Tok = Tok->Previous;
@@ -365,12 +365,12 @@
     if (Current.Type == TT_ObjCSelectorName &&
         State.Stack.back().ColonPos == 0) {
       if (State.Stack.back().Indent + Current.LongestObjCSelectorName >
-          State.Column + Spaces + Current.CodePointCount)
+          State.Column + Spaces + Current.ColumnWidth)
         State.Stack.back().ColonPos =
             State.Stack.back().Indent + Current.LongestObjCSelectorName;
       else
         State.Stack.back().ColonPos =
-            State.Column + Spaces + Current.CodePointCount;
+            State.Column + Spaces + Current.ColumnWidth;
     }
 
     if (Previous.opensScope() && Previous.Type != TT_ObjCMethodExpr &&
@@ -440,7 +440,7 @@
         std::min(State.LowestLevelOnLine, State.ParenLevel);
   if (Current.isMemberAccess())
     State.Stack.back().StartOfFunctionCall =
-        Current.LastInChainOfCalls ? 0 : State.Column + Current.CodePointCount;
+        Current.LastInChainOfCalls ? 0 : State.Column + Current.ColumnWidth;
   if (Current.Type == TT_CtorInitializerColon) {
     // Indent 2 from the column, so:
     // SomeClass::SomeClass()
@@ -596,7 +596,7 @@
     State.StartOfStringLiteral = 0;
   }
 
-  State.Column += Current.CodePointCount;
+  State.Column += Current.ColumnWidth;
   State.NextToken = State.NextToken->Next;
   unsigned Penalty = breakProtrudingToken(Current, State, DryRun);
   if (State.Column > getColumnLimit(State)) {
@@ -623,7 +623,7 @@
     State.Stack[i].BreakBeforeParameter = true;
 
   unsigned ColumnsUsed =
-      State.Column - Current.CodePointCount + Current.FirstLineColumnWidth;
+      State.Column - Current.ColumnWidth + Current.FirstLineColumnWidth;
   // We can only affect layout of the first and the last line, so the penalty
   // for all other lines is constant, and we ignore it.
   State.Column = Current.LastLineColumnWidth;
@@ -640,7 +640,7 @@
     return 0;
 
   llvm::OwningPtr<BreakableToken> Token;
-  unsigned StartColumn = State.Column - Current.CodePointCount;
+  unsigned StartColumn = State.Column - Current.ColumnWidth;
 
   if (Current.is(tok::string_literal) &&
       Current.Type != TT_ImplicitStringLiteral) {
@@ -661,11 +661,8 @@
     Token.reset(new BreakableStringLiteral(
         Current, StartColumn, State.Line->InPPDirective, Encoding, Style));
   } else if (Current.Type == TT_BlockComment && Current.isTrailingComment()) {
-    unsigned OriginalStartColumn =
-        SourceMgr.getSpellingColumnNumber(Current.getStartOfNonWhitespace()) -
-        1;
     Token.reset(new BreakableBlockComment(
-        Current, StartColumn, OriginalStartColumn, !Current.Previous,
+        Current, StartColumn, Current.OriginalStartColumn, !Current.Previous,
         State.Line->InPPDirective, Encoding, Style));
   } else if (Current.Type == TT_LineComment &&
              (Current.Previous == NULL ||
@@ -768,7 +765,7 @@
   if (Current.getNextNonComment() &&
       Current.getNextNonComment()->is(tok::string_literal))
     return true; // Implicit concatenation.
-  if (State.Column + Current.CodePointCount + Current.UnbreakableTailLength >
+  if (State.Column + Current.ColumnWidth + Current.UnbreakableTailLength >
       Style.ColumnLimit)
     return true; // String will be split.
   return false;
Index: lib/Format/Format.cpp
===================================================================
--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -602,7 +602,7 @@
       FormatTok->WhitespaceRange =
           SourceRange(GreaterLocation, GreaterLocation);
       FormatTok->TokenText = ">";
-      FormatTok->CodePointCount = 1;
+      FormatTok->ColumnWidth = 1;
       GreaterStashed = false;
       return FormatTok;
     }
@@ -677,8 +677,12 @@
     }
 
     // Now FormatTok is the next non-whitespace token.
-    FormatTok->CodePointCount =
-        encoding::getCodePointCount(FormatTok->TokenText, Encoding);
+    FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
+        FormatTok->TokenText, 0, Style.TabWidth, Encoding);
+
+    // FIXME: Add the ColumnWidth to Column.
+    FormatTok->WhitespaceRange = SourceRange(
+        WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
 
     if (FormatTok->isOneOf(tok::string_literal, tok::comment)) {
       StringRef Text = FormatTok->TokenText;
@@ -691,10 +695,15 @@
             Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
             Encoding);
       }
+      if (Text.startswith("/*")) {
+        unsigned Column = SourceMgr.getSpellingColumnNumber(
+                              FormatTok->getStartOfNonWhitespace()) -
+                          1;
+        StringRef PrecedingText = StringRef(Text.data() - Column, Column);
+        FormatTok->OriginalStartColumn = encoding::columnWidthWithTabs(
+            PrecedingText, 0, Style.TabWidth, Encoding);
+      }
     }
-    // FIXME: Add the CodePointCount to Column.
-    FormatTok->WhitespaceRange = SourceRange(
-        WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
     return FormatTok;
   }
 
Index: lib/Format/FormatToken.cpp
===================================================================
--- lib/Format/FormatToken.cpp
+++ lib/Format/FormatToken.cpp
@@ -42,7 +42,7 @@
   // Calculate the number of code points we have to format this list. As the
   // first token is already placed, we have to subtract it.
   unsigned RemainingCodePoints = Style.ColumnLimit - State.Column +
-                                 State.NextToken->Previous->CodePointCount;
+                                 State.NextToken->Previous->ColumnWidth;
 
   // Find the best ColumnFormat, i.e. the best number of columns to use.
   const ColumnFormat *Format = getColumnFormat(RemainingCodePoints);
@@ -82,7 +82,7 @@
 static unsigned CodePointsBetween(const FormatToken *Begin,
                                   const FormatToken *End) {
   assert(End->TotalLength >= Begin->TotalLength);
-  return End->TotalLength - Begin->TotalLength + Begin->CodePointCount;
+  return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth;
 }
 
 void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
Index: lib/Format/FormatToken.h
===================================================================
--- lib/Format/FormatToken.h
+++ lib/Format/FormatToken.h
@@ -83,13 +83,14 @@
 struct FormatToken {
   FormatToken()
       : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
-        CodePointCount(0), FirstLineColumnWidth(0), LastLineColumnWidth(0),
-        IsFirst(false), MustBreakBefore(false), IsUnterminatedLiteral(false),
-        BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0),
-        CanBreakBefore(false), ClosesTemplateDeclaration(false),
-        ParameterCount(0), PackingKind(PPK_Inconclusive), TotalLength(0),
-        UnbreakableTailLength(0), BindingStrength(0), SplitPenalty(0),
-        LongestObjCSelectorName(0), FakeRParens(0), LastInChainOfCalls(false),
+        OriginalStartColumn(0), ColumnWidth(0), FirstLineColumnWidth(0),
+        LastLineColumnWidth(0), IsFirst(false), MustBreakBefore(false),
+        IsUnterminatedLiteral(false), BlockKind(BK_Unknown), Type(TT_Unknown),
+        SpacesRequiredBefore(0), CanBreakBefore(false),
+        ClosesTemplateDeclaration(false), ParameterCount(0),
+        PackingKind(PPK_Inconclusive), TotalLength(0), UnbreakableTailLength(0),
+        BindingStrength(0), SplitPenalty(0), LongestObjCSelectorName(0),
+        FakeRParens(0), LastInChainOfCalls(false),
         PartOfMultiVariableDeclStmt(false), MatchingParen(NULL), Previous(NULL),
         Next(NULL) {}
 
@@ -113,9 +114,13 @@
   /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
   unsigned LastNewlineOffset;
 
-  /// \brief The length of the non-whitespace parts of the token in CodePoints.
+  /// \brief Contains the 0-based column number, where this token started in the
+  /// original code. Calculated only for block comments.
+  unsigned OriginalStartColumn;
+
+  /// \brief The width of the non-whitespace parts of the token in columns.
   /// We need this to correctly measure number of columns a token spans.
-  unsigned CodePointCount;
+  unsigned ColumnWidth;
 
   /// \brief Contains the number of code points in the first line of a
   /// multi-line string literal or comment. Zero if there's no newline in the
Index: lib/Format/TokenAnnotator.cpp
===================================================================
--- lib/Format/TokenAnnotator.cpp
+++ lib/Format/TokenAnnotator.cpp
@@ -326,10 +326,10 @@
                  Line.First->Type == TT_ObjCMethodSpecifier) {
         Tok->Type = TT_ObjCMethodExpr;
         Tok->Previous->Type = TT_ObjCSelectorName;
-        if (Tok->Previous->CodePointCount >
+        if (Tok->Previous->ColumnWidth >
             Contexts.back().LongestObjCSelectorName) {
           Contexts.back().LongestObjCSelectorName =
-              Tok->Previous->CodePointCount;
+              Tok->Previous->ColumnWidth;
         }
         if (Contexts.back().FirstObjCSelectorName == NULL)
           Contexts.back().FirstObjCSelectorName = Tok->Previous;
@@ -1000,7 +1000,7 @@
 }
 
 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
-  Line.First->TotalLength = Line.First->CodePointCount;
+  Line.First->TotalLength = Line.First->ColumnWidth;
   if (!Line.First->Next)
     return;
   FormatToken *Current = Line.First->Next;
@@ -1037,7 +1037,7 @@
       Current->TotalLength = Current->Previous->TotalLength + Style.ColumnLimit;
     else
       Current->TotalLength = Current->Previous->TotalLength +
-                             Current->CodePointCount +
+                             Current->ColumnWidth +
                              Current->SpacesRequiredBefore;
     // FIXME: Only calculate this if CanBreakBefore is true once static
     // initializers etc. are sorted out.
@@ -1073,7 +1073,7 @@
       UnbreakableTailLength = 0;
     } else {
       UnbreakableTailLength +=
-          Current->CodePointCount + Current->SpacesRequiredBefore;
+          Current->ColumnWidth + Current->SpacesRequiredBefore;
     }
     Current = Current->Previous;
   }
Index: unittests/Format/FormatTest.cpp
===================================================================
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -5671,32 +5671,32 @@
   // FIXME: To correctly count mixed whitespace we need to
   // also correctly count mixed whitespace in front of the comment.
 
-  // Tab.TabWidth = 8;
-  // Tab.IndentWidth = 8;
-  // EXPECT_EQ("/*\n"
-  //           "\t      a\t\tcomment\n"
-  //           "\t      in multiple lines\n"
-  //           "       */",
-  //           format("   /*\t \t \n"
-  //                  " \t \t a\t\tcomment\t \t\n"
-  //                  " \t \t in multiple lines\t\n"
-  //                  " \t  */",
-  //                  Tab));
-  // Tab.UseTab = false;
-  // EXPECT_EQ("/*\n"
-  //           "              a\t\tcomment\n"
-  //           "              in multiple lines\n"
-  //           "       */",
-  //           format("   /*\t \t \n"
-  //                  " \t \t a\t\tcomment\t \t\n"
-  //                  " \t \t in multiple lines\t\n"
-  //                  " \t  */",
-  //                  Tab));
-  // EXPECT_EQ("/* some\n"
-  //           "   comment */",
-  //          format(" \t \t /* some\n"
-  //                 " \t \t    comment */",
-  //                 Tab));
+  Tab.TabWidth = 8;
+  Tab.IndentWidth = 8;
+  EXPECT_EQ("/*\n"
+            "\t      a\t\tcomment\n"
+            "\t      in multiple lines\n"
+            "       */",
+            format("   /*\t \t \n"
+                   " \t \t a\t\tcomment\t \t\n"
+                   " \t \t in multiple lines\t\n"
+                   " \t  */",
+                   Tab));
+  Tab.UseTab = false;
+  EXPECT_EQ("/*\n"
+            "              a\t\tcomment\n"
+            "              in multiple lines\n"
+            "       */",
+            format("   /*\t \t \n"
+                   " \t \t a\t\tcomment\t \t\n"
+                   " \t \t in multiple lines\t\n"
+                   " \t  */",
+                   Tab));
+  EXPECT_EQ("/* some\n"
+            "   comment */",
+           format(" \t \t /* some\n"
+                  " \t \t    comment */",
+                  Tab));
 
   EXPECT_EQ("{\n"
             "  /*\n"

_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

[PATCH] Calculate and store ColumnWidth instead of CodePointCount in FormatTokens.

Reply via email to