Addressed comments, added a style option for the new behavior.
Hi djasper,
http://llvm-reviews.chandlerc.com/D1640
CHANGE SINCE LAST DIFF
http://llvm-reviews.chandlerc.com/D1640?vs=4231&id=4283#toc
Files:
include/clang/Format/Format.h
lib/Format/BreakableToken.cpp
lib/Format/BreakableToken.h
lib/Format/ContinuationIndenter.cpp
lib/Format/Format.cpp
lib/Format/WhitespaceManager.cpp
unittests/Format/FormatTest.cpp
Index: include/clang/Format/Format.h
===================================================================
--- include/clang/Format/Format.h
+++ include/clang/Format/Format.h
@@ -166,6 +166,15 @@
/// \brief If \c true, always break before multiline string literals.
bool AlwaysBreakBeforeMultilineStrings;
+ /// \brief If \c true, break _T("aaabbb") as _T("aaa") _T("bbb") as opposed to
+ /// _T("aaa" "bbb").
+ ///
+ /// This options can be used to avoid Visual C++'s error C2308:
+ /// "concatenating mismatched strings" when compiling in Unicode mode, if
+ /// a string literal is broken inside the _T() macro. (This Visual C++'s
+ /// behavior is not C++11 compliant.)
+ bool BreakOutsideOf_TMacro;
+
/// \brief If \c true, \c IndentWidth consecutive spaces will be replaced
/// with tab characters.
bool UseTab;
@@ -232,19 +241,26 @@
R.AllowAllParametersOfDeclarationOnNextLine &&
AllowShortIfStatementsOnASingleLine ==
R.AllowShortIfStatementsOnASingleLine &&
+ AllowShortLoopsOnASingleLine == R.AllowShortLoopsOnASingleLine &&
AlwaysBreakTemplateDeclarations ==
R.AlwaysBreakTemplateDeclarations &&
AlwaysBreakBeforeMultilineStrings ==
R.AlwaysBreakBeforeMultilineStrings &&
BinPackParameters == R.BinPackParameters &&
+ BreakBeforeBinaryOperators == R.BreakBeforeBinaryOperators &&
BreakBeforeBraces == R.BreakBeforeBraces &&
+ BreakConstructorInitializersBeforeComma ==
+ R.BreakConstructorInitializersBeforeComma &&
+ BreakOutsideOf_TMacro == R.BreakOutsideOf_TMacro &&
ColumnLimit == R.ColumnLimit &&
ConstructorInitializerAllOnOneLineOrOnePerLine ==
R.ConstructorInitializerAllOnOneLineOrOnePerLine &&
DerivePointerBinding == R.DerivePointerBinding &&
ExperimentalAutoDetectBinPacking ==
R.ExperimentalAutoDetectBinPacking &&
IndentCaseLabels == R.IndentCaseLabels &&
+ IndentFunctionDeclarationAfterType ==
+ R.IndentFunctionDeclarationAfterType &&
IndentWidth == R.IndentWidth &&
MaxEmptyLinesToKeep == R.MaxEmptyLinesToKeep &&
NamespaceIndentation == R.NamespaceIndentation &&
@@ -257,13 +273,10 @@
PointerBindsToType == R.PointerBindsToType &&
SpacesBeforeTrailingComments == R.SpacesBeforeTrailingComments &&
Cpp11BracedListStyle == R.Cpp11BracedListStyle &&
- Standard == R.Standard && UseTab == R.UseTab &&
- IndentFunctionDeclarationAfterType ==
- R.IndentFunctionDeclarationAfterType &&
- SpacesInParentheses == R.SpacesInParentheses &&
+ Standard == R.Standard && TabWidth == R.TabWidth &&
+ UseTab == R.UseTab && SpacesInParentheses == R.SpacesInParentheses &&
SpaceInEmptyParentheses == R.SpaceInEmptyParentheses &&
- SpacesInCStyleCastParentheses ==
- R.SpacesInCStyleCastParentheses &&
+ SpacesInCStyleCastParentheses == R.SpacesInCStyleCastParentheses &&
SpaceAfterControlStatementKeyword ==
R.SpaceAfterControlStatementKeyword;
}
Index: lib/Format/BreakableToken.cpp
===================================================================
--- lib/Format/BreakableToken.cpp
+++ lib/Format/BreakableToken.cpp
@@ -82,20 +82,18 @@
}
static BreakableToken::Split getStringSplit(StringRef Text,
- unsigned ContentStartColumn,
+ unsigned UsedColumns,
unsigned ColumnLimit,
unsigned TabWidth,
encoding::Encoding Encoding) {
// FIXME: Reduce unit test case.
if (Text.empty())
return BreakableToken::Split(StringRef::npos, 0);
- if (ColumnLimit <= ContentStartColumn)
+ if (ColumnLimit <= UsedColumns)
return BreakableToken::Split(StringRef::npos, 0);
- unsigned MaxSplit =
- std::min<unsigned>(ColumnLimit - ContentStartColumn,
- encoding::columnWidthWithTabs(Text, ContentStartColumn,
- TabWidth, Encoding) -
- 1);
+ unsigned MaxSplit = std::min<unsigned>(
+ ColumnLimit - UsedColumns,
+ encoding::columnWidthWithTabs(Text, UsedColumns, TabWidth, Encoding) - 1);
StringRef::size_type SpaceOffset = 0;
StringRef::size_type SlashOffset = 0;
StringRef::size_type WordStartOffset = 0;
@@ -107,9 +105,8 @@
Chars += Advance;
} else {
Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
- Chars += encoding::columnWidthWithTabs(Text.substr(0, Advance),
- ContentStartColumn + Chars,
- TabWidth, Encoding);
+ Chars += encoding::columnWidthWithTabs(
+ Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding);
}
if (Chars > MaxSplit)
@@ -158,19 +155,19 @@
Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
}
-BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok,
- unsigned StartColumn,
- bool InPPDirective,
- encoding::Encoding Encoding,
- const FormatStyle &Style)
- : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"", InPPDirective,
+BreakableStringLiteral::BreakableStringLiteral(
+ const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
+ StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,
+ const FormatStyle &Style)
+ : BreakableSingleLineToken(Tok, StartColumn, Prefix, Postfix, InPPDirective,
Encoding, Style) {}
BreakableToken::Split
BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
unsigned ColumnLimit) const {
- return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit,
- Style.TabWidth, Encoding);
+ return getStringSplit(Line.substr(TailOffset),
+ StartColumn + Prefix.size() + Postfix.size(),
+ ColumnLimit, Style.TabWidth, Encoding);
}
void BreakableStringLiteral::insertBreak(unsigned LineIndex,
Index: lib/Format/BreakableToken.h
===================================================================
--- lib/Format/BreakableToken.h
+++ lib/Format/BreakableToken.h
@@ -110,6 +110,7 @@
/// \p StartColumn specifies the column in which the token will start
/// after formatting.
BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn,
+ StringRef Prefix, StringRef Postfix,
bool InPPDirective, encoding::Encoding Encoding,
const FormatStyle &Style);
Index: lib/Format/ContinuationIndenter.cpp
===================================================================
--- lib/Format/ContinuationIndenter.cpp
+++ lib/Format/ContinuationIndenter.cpp
@@ -633,32 +633,66 @@
return 0;
}
+static bool getRawStringLiteralPrefixPostfix(StringRef Text,
+ StringRef &Prefix,
+ StringRef &Postfix) {
+ if (Text.startswith(Prefix = "R\"") || Text.startswith(Prefix = "uR\"") ||
+ Text.startswith(Prefix = "UR\"") || Text.startswith(Prefix = "u8R\"") ||
+ Text.startswith(Prefix = "LR\"")) {
+ size_t ParenPos = Text.find('(');
+ if (ParenPos != StringRef::npos) {
+ StringRef Delimiter =
+ Text.substr(Prefix.size(), ParenPos - Prefix.size());
+ Prefix = Text.substr(0, ParenPos + 1);
+ Postfix = Text.substr(Text.size() - 2 - Delimiter.size());
+ return Postfix.front() == ')' && Postfix.back() == '"' &&
+ Postfix.substr(1).startswith(Delimiter);
+ }
+ }
+ return false;
+}
+
unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
LineState &State,
bool DryRun) {
// Don't break multi-line tokens other than block comments. Instead, just
// update the state.
if (Current.Type != TT_BlockComment && Current.IsMultiline)
return addMultilineToken(Current, State);
- if (!Current.isOneOf(tok::string_literal, tok::comment))
+ if (!Current.isOneOf(tok::string_literal, tok::wide_string_literal,
+ tok::utf8_string_literal, tok::utf16_string_literal,
+ tok::utf32_string_literal, tok::comment))
return 0;
llvm::OwningPtr<BreakableToken> Token;
unsigned StartColumn = State.Column - Current.ColumnWidth;
- if (Current.is(tok::string_literal) &&
+ if (Current.isOneOf(tok::string_literal, tok::wide_string_literal,
+ tok::utf8_string_literal, tok::utf16_string_literal,
+ tok::utf32_string_literal) &&
Current.Type != TT_ImplicitStringLiteral) {
- // Only break up default narrow strings.
- if (!Current.TokenText.startswith("\""))
- return 0;
// Exempts unterminated string literals from line breaking. The user will
// likely want to terminate the string before any line breaking is done.
if (Current.IsUnterminatedLiteral)
return 0;
- Token.reset(new BreakableStringLiteral(
- Current, StartColumn, State.Line->InPPDirective, Encoding, Style));
+ StringRef Text = Current.TokenText;
+ StringRef Prefix;
+ StringRef Postfix;
+ // FIXME: Handle whitespace between '_T', '(', '"..."', and ')'.
+ if ((Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")")) ||
+ (Text.endswith(Postfix = "\"") &&
+ (Text.startswith(Prefix = "\"") || Text.startswith(Prefix = "u\"") ||
+ Text.startswith(Prefix = "U\"") || Text.startswith(Prefix = "u8\"") ||
+ Text.startswith(Prefix = "L\""))) ||
+ getRawStringLiteralPrefixPostfix(Text, Prefix, Postfix)) {
+ Token.reset(new BreakableStringLiteral(Current, StartColumn, Prefix,
+ Postfix, State.Line->InPPDirective,
+ Encoding, Style));
+ } else {
+ return 0;
+ }
} else if (Current.Type == TT_BlockComment && Current.isTrailingComment()) {
Token.reset(new BreakableBlockComment(
Current, StartColumn, Current.OriginalColumn, !Current.Previous,
Index: lib/Format/Format.cpp
===================================================================
--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -176,6 +176,7 @@
LLVMStyle.BreakBeforeBinaryOperators = false;
LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
LLVMStyle.BreakConstructorInitializersBeforeComma = false;
+ LLVMStyle.BreakOutsideOf_TMacro = false;
LLVMStyle.ColumnLimit = 80;
LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
LLVMStyle.ConstructorInitializerIndentWidth = 4;
@@ -218,6 +219,7 @@
GoogleStyle.BreakBeforeBinaryOperators = false;
GoogleStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
GoogleStyle.BreakConstructorInitializersBeforeComma = false;
+ GoogleStyle.BreakOutsideOf_TMacro = false;
GoogleStyle.ColumnLimit = 80;
GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
GoogleStyle.ConstructorInitializerIndentWidth = 4;
@@ -591,13 +593,51 @@
assert(Tokens.empty());
do {
Tokens.push_back(getNextToken());
+ maybeJoinPreviousTokens();
} while (Tokens.back()->Tok.isNot(tok::eof));
return Tokens;
}
IdentifierTable &getIdentTable() { return IdentTable; }
private:
+ void maybeJoinPreviousTokens() {
+ if (!Style.BreakOutsideOf_TMacro)
+ return;
+
+ if (Tokens.size() < 4)
+ return;
+ FormatToken *Last = Tokens.back();
+ if (!Last->is(tok::r_paren))
+ return;
+
+ FormatToken *String = Tokens[Tokens.size() - 2];
+ if (!String->is(tok::string_literal) || String->IsMultiline)
+ return;
+
+ if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
+ return;
+
+ FormatToken *Macro = Tokens[Tokens.size() - 4];
+ if (Macro->TokenText != "_T")
+ return;
+
+ const char *Start = Macro->TokenText.data();
+ const char *End = Last->TokenText.data() + Last->TokenText.size();
+ String->TokenText = StringRef(Start, End - Start);
+ String->IsFirst = Macro->IsFirst;
+ String->LastNewlineOffset = Macro->LastNewlineOffset;
+ String->WhitespaceRange = Macro->WhitespaceRange;
+ String->OriginalColumn = Macro->OriginalColumn;
+ String->ColumnWidth = encoding::columnWidthWithTabs(
+ String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
+
+ Tokens.pop_back();
+ Tokens.pop_back();
+ Tokens.pop_back();
+ Tokens.back() = String;
+ }
+
FormatToken *getNextToken() {
if (GreaterStashed) {
// Create a synthesized second '>' token.
@@ -1135,7 +1175,8 @@
CharSourceRange LineRange = CharSourceRange::getCharRange(
First->WhitespaceRange.getBegin().getLocWithOffset(
First->LastNewlineOffset),
- Last->Tok.getLocation().getLocWithOffset(Last->TokenText.size() - 1));
+ Last->getStartOfNonWhitespace().getLocWithOffset(
+ Last->TokenText.size() - 1));
return touchesRanges(LineRange);
}
Index: lib/Format/WhitespaceManager.cpp
===================================================================
--- lib/Format/WhitespaceManager.cpp
+++ lib/Format/WhitespaceManager.cpp
@@ -49,11 +49,9 @@
void WhitespaceManager::addUntouchableToken(const FormatToken &Tok,
bool InPPDirective) {
- Changes.push_back(
- Change(false, Tok.WhitespaceRange, /*Spaces=*/0,
- SourceMgr.getSpellingColumnNumber(Tok.Tok.getLocation()) - 1,
- Tok.NewlinesBefore, "", "", Tok.Tok.getKind(),
- InPPDirective && !Tok.IsFirst));
+ Changes.push_back(Change(false, Tok.WhitespaceRange, /*Spaces=*/0,
+ Tok.OriginalColumn, Tok.NewlinesBefore, "", "",
+ Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst));
}
void WhitespaceManager::replaceWhitespaceInToken(
Index: unittests/Format/FormatTest.cpp
===================================================================
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -5464,6 +5464,84 @@
format("#define A \"some text other\";", AlignLeft));
}
+TEST_F(FormatTest, BreaksWideStringLiterals) {
+ EXPECT_EQ(
+ "u8\"utf8 string \"\n"
+ "u8\"literal\";",
+ format("u8\"utf8 string literal\";", getGoogleStyleWithColumns(16)));
+ EXPECT_EQ(
+ "u\"utf16 string \"\n"
+ "u\"literal\";",
+ format("u\"utf16 string literal\";", getGoogleStyleWithColumns(16)));
+ EXPECT_EQ(
+ "U\"utf32 string \"\n"
+ "U\"literal\";",
+ format("U\"utf32 string literal\";", getGoogleStyleWithColumns(16)));
+ EXPECT_EQ("L\"wide string \"\n"
+ "L\"literal\";",
+ format("L\"wide string literal\";", getGoogleStyleWithColumns(16)));
+}
+
+TEST_F(FormatTest, BreaksRawStringLiterals) {
+ EXPECT_EQ("R\"x(raw )x\"\n"
+ "R\"x(literal)x\";",
+ format("R\"x(raw literal)x\";", getGoogleStyleWithColumns(15)));
+ EXPECT_EQ("uR\"x(raw )x\"\n"
+ "uR\"x(literal)x\";",
+ format("uR\"x(raw literal)x\";", getGoogleStyleWithColumns(16)));
+ EXPECT_EQ("u8R\"x(raw )x\"\n"
+ "u8R\"x(literal)x\";",
+ format("u8R\"x(raw literal)x\";", getGoogleStyleWithColumns(17)));
+ EXPECT_EQ("LR\"x(raw )x\"\n"
+ "LR\"x(literal)x\";",
+ format("LR\"x(raw literal)x\";", getGoogleStyleWithColumns(16)));
+ EXPECT_EQ("UR\"x(raw )x\"\n"
+ "UR\"x(literal)x\";",
+ format("UR\"x(raw literal)x\";", getGoogleStyleWithColumns(16)));
+}
+
+TEST_F(FormatTest, BreaksStringLiteralsWithin_TMacro) {
+ FormatStyle Style = getLLVMStyleWithColumns(20);
+ Style.BreakOutsideOf_TMacro = true;
+ EXPECT_EQ(
+ "_T(\"aaaaaaaaaaaaaa\")\n"
+ "_T(\"aaaaaaaaaaaaaa\")\n"
+ "_T(\"aaaaaaaaaaaa\")",
+ format(" _T(\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\")", Style));
+ EXPECT_EQ("f(x, _T(\"aaaaaaaaa\")\n"
+ " _T(\"aaaaaa\"),\n"
+ " z);",
+ format("f(x, _T(\"aaaaaaaaaaaaaaa\"), z);", Style));
+
+ // FIXME: Handle embedded spaces in one iteration.
+ // EXPECT_EQ("_T(\"aaaaaaaaaaaaa\")\n"
+ // "_T(\"aaaaaaaaaaaaa\")\n"
+ // "_T(\"aaaaaaaaaaaaa\")\n"
+ // "_T(\"a\")",
+ // format(" _T ( \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\" )",
+ // getLLVMStyleWithColumns(20)));
+ EXPECT_EQ(
+ "_T ( \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\" )",
+ format(" _T ( \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\" )", Style));
+
+ Style.BreakOutsideOf_TMacro = false;
+ EXPECT_EQ(
+ "_T(\"aaaaaaaaaaaaaaa\"\n"
+ " \"aaaaaaaaaaaaaaa\"\n"
+ " \"aaaaaaaaaa\")",
+ format(" _T(\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\")", Style));
+ EXPECT_EQ("f(x, _T(\"aaaaaaaaaa\"\n"
+ " \"aaaaa\"),\n"
+ " z);",
+ format("f(x, _T(\"aaaaaaaaaaaaaaa\"), z);", Style));
+
+ EXPECT_EQ(
+ "_T(\"aaaaaaaaaaaaaaa\"\n"
+ " \"aaaaaaaaaaaaaaa\"\n"
+ " \"aaaaaaaaaa\")",
+ format(" _T ( \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\" )", Style));
+}
+
TEST_F(FormatTest, DontSplitStringLiteralsWithEscapedNewlines) {
EXPECT_EQ(
"aaaaaaaaaaa = \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\\n"
@@ -5515,17 +5593,6 @@
}
TEST_F(FormatTest, SkipsUnknownStringLiterals) {
- EXPECT_EQ(
- "u8\"unsupported literal\";",
- format("u8\"unsupported literal\";", getGoogleStyleWithColumns(15)));
- EXPECT_EQ("u\"unsupported literal\";",
- format("u\"unsupported literal\";", getGoogleStyleWithColumns(15)));
- EXPECT_EQ("U\"unsupported literal\";",
- format("U\"unsupported literal\";", getGoogleStyleWithColumns(15)));
- EXPECT_EQ("L\"unsupported literal\";",
- format("L\"unsupported literal\";", getGoogleStyleWithColumns(15)));
- EXPECT_EQ("R\"x(raw literal)x\";",
- format("R\"x(raw literal)x\";", getGoogleStyleWithColumns(15)));
verifyFormat("string a = \"unterminated;");
EXPECT_EQ("function(\"unterminated,\n"
" OtherParameter);",
@@ -5622,7 +5689,10 @@
"\"00000000\"\n"
"\"1\"",
format("\"test\\000000000001\"", getLLVMStyleWithColumns(10)));
- EXPECT_EQ("R\"(\\x\\x00)\"\n",
+ // FIXME: We probably don't need to care about escape sequences in raw
+ // literals.
+ EXPECT_EQ("R\"(\\x)\"\n"
+ "R\"(\\x00)\"\n",
format("R\"(\\x\\x00)\"\n", getGoogleStyleWithColumns(7)));
}
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits