Typz created this revision.
Herald added a subscriber: klimek.

This patch tries to improve the optimizer a bit, to avoid splitting
tokens (e.g. comments/strings) if only there are only few characters
beyond the ColumnLimit.

Previously, comments/strings would be split whenever they went beyond
the ColumnLimit, without consideration for the PenaltyBreakComment/
String and PenaltyExcessCharacter. With this patch, the
OptimizingLineFormatter will also consider not splitting each token,
so that these 'small' offenders get a chance:

  // With ColumnLimit=20
  int a; // the
         // comment
  
  // With ColumnLimit=20 and PenaltyExcessCharacter = 10
  int a; // the comment

This patch does not fully optimize the reflowing, as it will only try
reflowing the whole comment or not reflowing it at all (instead of
trying each split, to allow some lines of overflow ColumnLimit even
when reflowing).


https://reviews.llvm.org/D33589

Files:
  lib/Format/ContinuationIndenter.cpp
  lib/Format/ContinuationIndenter.h
  lib/Format/FormatToken.cpp
  lib/Format/FormatToken.h
  lib/Format/UnwrappedLineFormatter.cpp
  unittests/Format/FormatTest.cpp

Index: unittests/Format/FormatTest.cpp
===================================================================
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -8542,6 +8542,36 @@
   EXPECT_EQ("#pragma option -C -A", format("#pragma    option   -C   -A"));
 }
 
+TEST_F(FormatTest, OptimizeBreakPenaltyVsExcess) {
+  FormatStyle Style = getLLVMStyle();
+  Style.ColumnLimit = 20;
+
+  verifyFormat("int a; // the\n"
+			   "       // comment", Style);
+  verifyFormat("int a; /* first line\n"
+               "        * second\n"
+			   "        * line third\n"
+               "        * line\n"
+               "        */", Style);
+
+  Style.PenaltyExcessCharacter = 30;
+  verifyFormat("int a; // the comment", Style);
+  EXPECT_EQ("int a; // the\n"
+            "       // comment aa",
+            format("int a; // the comment aa", Style));
+  verifyFormat("int a; /* first line\n"
+               "        * second line\n"
+			   "        * third line\n"
+               "        */", Style);
+  EXPECT_EQ("int a; /* first line\n"
+            "        * second\n"
+			"        * line third\n"
+            "        * line\n"
+            "        */",
+            format("int a; /* first line second line third line */",
+				   Style));
+}
+
 #define EXPECT_ALL_STYLES_EQUAL(Styles)                                        \
   for (size_t i = 1; i < Styles.size(); ++i)                                   \
   EXPECT_EQ(Styles[0], Styles[i]) << "Style #" << i << " of " << Styles.size() \
Index: lib/Format/UnwrappedLineFormatter.cpp
===================================================================
--- lib/Format/UnwrappedLineFormatter.cpp
+++ lib/Format/UnwrappedLineFormatter.cpp
@@ -590,7 +590,8 @@
           (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0);
       unsigned Penalty = 0;
       formatChildren(State, Newline, /*DryRun=*/false, Penalty);
-      Indenter->addTokenToState(State, Newline, /*DryRun=*/false);
+      Indenter->addTokenToState(State, Newline, /*BreakToken=*/true,
+                                /*DryRun=*/false);
     }
     return 0;
   }
@@ -611,7 +612,8 @@
     LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
     while (State.NextToken) {
       formatChildren(State, /*Newline=*/false, DryRun, Penalty);
-      Indenter->addTokenToState(State, /*Newline=*/false, DryRun);
+      Indenter->addTokenToState(State, /*Newline=*/false, /*BreakToken=*/true,
+                                DryRun);
     }
     return Penalty;
   }
@@ -658,10 +660,11 @@
   /// \brief An edge in the solution space from \c Previous->State to \c State,
   /// inserting a newline dependent on the \c NewLine.
   struct StateNode {
-    StateNode(const LineState &State, bool NewLine, StateNode *Previous)
-        : State(State), NewLine(NewLine), Previous(Previous) {}
+    StateNode(const LineState &State, bool NewLine, bool BreakToken, StateNode *Previous)
+        : State(State), NewLine(NewLine), BreakToken(BreakToken), Previous(Previous) {}
     LineState State;
     bool NewLine;
+    bool BreakToken;
     StateNode *Previous;
   };
 
@@ -691,7 +694,7 @@
 
     // Insert start element into queue.
     StateNode *Node =
-        new (Allocator.Allocate()) StateNode(InitialState, false, nullptr);
+        new (Allocator.Allocate()) StateNode(InitialState, false, true, nullptr);
     Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
     ++Count;
 
@@ -718,9 +721,17 @@
 
       FormatDecision LastFormat = Node->State.NextToken->Decision;
       if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
-        addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue);
+        addNextStateToQueue(Penalty, Node, /*NewLine=*/false,
+                            /*BreakToken=*/true, &Count, &Queue);
       if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
-        addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue);
+        addNextStateToQueue(Penalty, Node, /*NewLine=*/true,
+                            /*BreakToken=*/true, &Count, &Queue);
+      if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
+        addNextStateToQueue(Penalty, Node, /*NewLine=*/false,
+                            /*BreakToken=*/false, &Count, &Queue);
+      if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
+        addNextStateToQueue(Penalty, Node, /*NewLine=*/true,
+                            /*BreakToken=*/false, &Count, &Queue);
     }
 
     if (Queue.empty()) {
@@ -745,18 +756,20 @@
   /// Assume the current state is \p PreviousNode and has been reached with a
   /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
   void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
-                           bool NewLine, unsigned *Count, QueueType *Queue) {
+                           bool NewLine, bool BreakToken, unsigned *Count, QueueType *Queue) {
     if (NewLine && !Indenter->canBreak(PreviousNode->State))
       return;
     if (!NewLine && Indenter->mustBreak(PreviousNode->State))
       return;
+    if (!BreakToken && !Indenter->canSplit(PreviousNode->State))
+      return;
 
     StateNode *Node = new (Allocator.Allocate())
-        StateNode(PreviousNode->State, NewLine, PreviousNode);
+        StateNode(PreviousNode->State, NewLine, BreakToken, PreviousNode);
     if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
       return;
 
-    Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
+    Penalty += Indenter->addTokenToState(Node->State, NewLine, BreakToken, true);
 
     Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node));
     ++(*Count);
@@ -775,7 +788,7 @@
          I != E; ++I) {
       unsigned Penalty = 0;
       formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty);
-      Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false);
+      Penalty += Indenter->addTokenToState(State, (*I)->NewLine, (*I)->BreakToken, false);
 
       DEBUG({
         printLineState((*I)->Previous->State);
Index: lib/Format/FormatToken.h
===================================================================
--- lib/Format/FormatToken.h
+++ lib/Format/FormatToken.h
@@ -534,7 +534,7 @@
   /// already been set thereby deciding on the first line break.
   virtual unsigned formatAfterToken(LineState &State,
                                     ContinuationIndenter *Indenter,
-                                    bool DryRun) {
+                                    bool BreakToken, bool DryRun) {
     return 0;
   }
 
@@ -553,7 +553,7 @@
   void precomputeFormattingInfos(const FormatToken *Token) override;
 
   unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter,
-                            bool DryRun) override;
+                            bool BreakToken, bool DryRun) override;
 
   unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter,
                            bool DryRun) override;
Index: lib/Format/FormatToken.cpp
===================================================================
--- lib/Format/FormatToken.cpp
+++ lib/Format/FormatToken.cpp
@@ -73,7 +73,7 @@
 
 unsigned CommaSeparatedList::formatAfterToken(LineState &State,
                                               ContinuationIndenter *Indenter,
-                                              bool DryRun) {
+                                              bool BreakToken, bool DryRun) {
   if (State.NextToken == nullptr || !State.NextToken->Previous)
     return 0;
 
@@ -125,7 +125,8 @@
     }
 
     // Place token using the continuation indenter and store the penalty.
-    Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces);
+    Penalty += Indenter->addTokenToState(State, NewLine, BreakToken, DryRun,
+                                         ExtraSpaces);
   }
   return Penalty;
 }
Index: lib/Format/ContinuationIndenter.h
===================================================================
--- lib/Format/ContinuationIndenter.h
+++ lib/Format/ContinuationIndenter.h
@@ -56,15 +56,18 @@
   /// \brief Returns \c true, if a line break after \p State is mandatory.
   bool mustBreak(const LineState &State);
 
+  /// \brief Returns \c true, if the first token after \p State can be split.
+  bool canSplit(LineState &State);
+
   /// \brief Appends the next token to \p State and updates information
   /// necessary for indentation.
   ///
   /// Puts the token on the current line if \p Newline is \c false and adds a
   /// line break and necessary indentation otherwise.
   ///
   /// If \p DryRun is \c false, also creates and stores the required
   /// \c Replacement.
-  unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
+  unsigned addTokenToState(LineState &State, bool Newline, bool BreakToken, bool DryRun,
                            unsigned ExtraSpaces = 0);
 
   /// \brief Get the column limit for this line. This is the style's column
@@ -74,7 +77,8 @@
 private:
   /// \brief Mark the next token as consumed in \p State and modify its stacks
   /// accordingly.
-  unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
+  unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline,
+                                bool BreakToken);
 
   /// \brief Update 'State' according to the next token's fake left parentheses.
   void moveStatePastFakeLParens(LineState &State, bool Newline);
@@ -97,7 +101,7 @@
   /// column limit violation in all lines except for the last one. The penalty
   /// for the column limit violation in the last line (and in single line
   /// tokens) is handled in \c addNextStateToQueue.
-  unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
+  unsigned breakProtrudingToken(const FormatToken &Current, LineState &State, bool BreakToken,
                                 bool DryRun);
 
   /// \brief Appends the next token to \p State and updates information
Index: lib/Format/ContinuationIndenter.cpp
===================================================================
--- lib/Format/ContinuationIndenter.cpp
+++ lib/Format/ContinuationIndenter.cpp
@@ -92,7 +92,7 @@
   State.IgnoreStackForComparison = false;
 
   // The first token has already been indented and thus consumed.
-  moveStateToNextToken(State, DryRun, /*Newline=*/false);
+  moveStateToNextToken(State, DryRun, /*Newline=*/false, /*BreakToken*/true);
   return State;
 }
 
@@ -291,8 +291,14 @@
   return false;
 }
 
+bool ContinuationIndenter::canSplit(LineState & State)
+{
+    return !State.Stack.back().NoLineBreak &&
+           !State.Stack.back().NoLineBreakInOperand;
+}
+
 unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,
-                                               bool DryRun,
+                                               bool BreakToken, bool DryRun,
                                                unsigned ExtraSpaces) {
   const FormatToken &Current = *State.NextToken;
 
@@ -313,7 +319,7 @@
       assert(EndColumn >= StartColumn);
       State.Column += EndColumn - StartColumn;
     }
-    moveStateToNextToken(State, DryRun, /*Newline=*/false);
+    moveStateToNextToken(State, DryRun, /*Newline=*/false, /*BreakToken=*/true);
     return 0;
   }
 
@@ -323,7 +329,7 @@
   else
     addTokenOnCurrentLine(State, DryRun, ExtraSpaces);
 
-  return moveStateToNextToken(State, DryRun, Newline) + Penalty;
+  return moveStateToNextToken(State, DryRun, Newline, BreakToken) + Penalty;
 }
 
 void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
@@ -761,7 +767,8 @@
 }
 
 unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
-                                                    bool DryRun, bool Newline) {
+                                                    bool DryRun, bool Newline,
+                                                    bool BreakToken) {
   assert(State.Stack.size());
   const FormatToken &Current = *State.NextToken;
 
@@ -876,7 +883,7 @@
   State.NextToken = State.NextToken->Next;
   unsigned Penalty = 0;
   if (CanBreakProtrudingToken)
-    Penalty = breakProtrudingToken(Current, State, DryRun);
+    Penalty = breakProtrudingToken(Current, State, BreakToken, DryRun);
   if (State.Column > getColumnLimit(State)) {
     unsigned ExcessCharacters = State.Column - getColumnLimit(State);
     Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
@@ -890,7 +897,7 @@
   // after the "{" is already done and both options are tried and evaluated.
   // FIXME: This is ugly, find a better way.
   if (Previous && Previous->Role)
-    Penalty += Previous->Role->formatAfterToken(State, this, DryRun);
+    Penalty += Previous->Role->formatAfterToken(State, this, BreakToken, DryRun);
 
   return Penalty;
 }
@@ -1134,7 +1141,7 @@
 
 unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
                                                     LineState &State,
-                                                    bool DryRun) {
+                                                    bool BreakToken, bool DryRun) {
   // Don't break multi-line tokens other than block comments. Instead, just
   // update the state.
   if (Current.isNot(TT_BlockComment) && Current.IsMultiline)
@@ -1267,6 +1274,12 @@
         break;
       }
 
+      if (!BreakToken) {
+        Penalty += Style.PenaltyExcessCharacter *
+                   (RemainingTokenColumns - RemainingSpace);
+        break;
+      }
+
       unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit(
           LineIndex, TailOffset + Split.first + Split.second, StringRef::npos);
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to