bmharper removed rL LLVM as the repository for this revision. bmharper updated this revision to Diff 68675. bmharper added a comment.
Fixed the two issues pointed out by djasper in his most recent comments: 1. Only calculate ScopeLevel when necessary. 2. Instead of calculating ScopeLevel by inspecting {[(< and their closing pairs, calculate it by combining IndentLevel and Nesting Level. It's not quite as simple as just making ScopeLevel = IndentLevel + NestingLevel, but at least we don't have yet another function computing scope depth from scratch. https://reviews.llvm.org/D21279 Files: lib/Format/WhitespaceManager.cpp lib/Format/WhitespaceManager.h unittests/Format/FormatTest.cpp
Index: unittests/Format/FormatTest.cpp =================================================================== --- unittests/Format/FormatTest.cpp +++ unittests/Format/FormatTest.cpp @@ -9250,12 +9250,11 @@ "};", Alignment); - // FIXME: Should align all three assignments verifyFormat( "int i = 1;\n" "SomeType a = SomeFunction(looooooooooooooooooooooongParameterA,\n" " loooooooooooooooooooooongParameterB);\n" - "int j = 2;", + "int j = 2;", Alignment); verifyFormat("template <typename T, typename T_0 = very_long_type_name_0,\n" @@ -9337,7 +9336,52 @@ "unsigned oneTwoThree = 123;\n" "int oneTwo = 12;", Alignment)); + // Function prototype alignment + verifyFormat("int a();\n" + "double b();", + Alignment); + verifyFormat("int a(int x);\n" + "double b();", + Alignment); + unsigned OldColumnLimit = Alignment.ColumnLimit; + // We need to set ColumnLimit to zero, in order to stress nested alignments, + // otherwise the function parameters will be re-flowed onto a single line. + Alignment.ColumnLimit = 0; + EXPECT_EQ("int a(int x,\n" + " float y);\n" + "double b(int x,\n" + " double y);", + format("int a(int x,\n" + " float y);\n" + "double b(int x,\n" + " double y);", + Alignment)); + // This ensures that function parameters of function declarations are + // correctly indented when their owning functions are indented. + // The failure case here is for 'double y' to not be indented enough. + // This is the "split function parameter alignment" issue mentioned in + // WhitespaceManager.cpp + EXPECT_EQ("double a(int x);\n" + "int b(int x,\n" + " double y);", + format("double a(int x);\n" + "int b(int x,\n" + " double y);", + Alignment)); + Alignment.ColumnLimit = OldColumnLimit; + // Ensure function pointers don't screw up recursive alignment + verifyFormat("int a(int x, void (*fp)(int y));\n" + "double b();", + Alignment); Alignment.AlignConsecutiveAssignments = true; + // Ensure recursive alignment is broken by function braces, so that the + // "a = 1" does not align with subsequent assignments inside the function + // body. + verifyFormat("int func(int a = 1) {\n" + " int b = 2;\n" + " int cc = 3;\n" + "}", + Alignment); verifyFormat("float something = 2000;\n" "double another = 911;\n" "int i = 1, j = 10;\n" @@ -9347,6 +9391,28 @@ verifyFormat("int oneTwoThree = {0}; // comment\n" "unsigned oneTwo = 0; // comment", Alignment); + // Make sure that scope is correctly tracked, in the absence of braces + verifyFormat("for (int i = 0; i < n; i++)\n" + " j = i;\n" + "double x = 1;\n", + Alignment); + verifyFormat("if (int i = 0)\n" + " j = i;\n" + "double x = 1;\n", + Alignment); + // Ensure operator[] and operator() are comprehended + verifyFormat("struct test {\n" + " long long int foo();\n" + " int operator[](int a);\n" + " double bar();\n" + "};\n", + Alignment); + verifyFormat("struct test {\n" + " long long int foo();\n" + " int operator()(int a);\n" + " double bar();\n" + "};\n", + Alignment); EXPECT_EQ("void SomeFunction(int parameter = 0) {\n" " int const i = 1;\n" " int * j = 2;\n" @@ -9448,17 +9514,16 @@ Alignment); Alignment.AlignConsecutiveAssignments = false; - // FIXME: Should align all three declarations verifyFormat( "int i = 1;\n" "SomeType a = SomeFunction(looooooooooooooooooooooongParameterA,\n" " loooooooooooooooooooooongParameterB);\n" - "int j = 2;", + "int j = 2;", Alignment); // Test interactions with ColumnLimit and AlignConsecutiveAssignments: // We expect declarations and assignments to align, as long as it doesn't - // exceed the column limit, starting a new alignemnt sequence whenever it + // exceed the column limit, starting a new alignment sequence whenever it // happens. Alignment.AlignConsecutiveAssignments = true; Alignment.ColumnLimit = 30; Index: lib/Format/WhitespaceManager.h =================================================================== --- lib/Format/WhitespaceManager.h +++ lib/Format/WhitespaceManager.h @@ -106,11 +106,11 @@ /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out /// trailing comments and escaped newlines. Change(bool CreateReplacement, SourceRange OriginalWhitespaceRange, - unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn, - unsigned NewlinesBefore, StringRef PreviousLinePostfix, - StringRef CurrentLinePrefix, tok::TokenKind Kind, - bool ContinuesPPDirective, bool IsStartOfDeclName, - bool IsInsideToken); + unsigned IndentLevel, unsigned NestingLevel, int Spaces, + unsigned StartOfTokenColumn, unsigned NewlinesBefore, + StringRef PreviousLinePostfix, StringRef CurrentLinePrefix, + tok::TokenKind Kind, TokenType Type, bool ContinuesPPDirective, + bool IsStartOfDeclName, bool IsInsideToken); bool CreateReplacement; // Changes might be in the middle of a token, so we cannot just keep the @@ -125,14 +125,20 @@ // FIXME: Currently this is not set correctly for breaks inside comments, as // the \c BreakableToken is still doing its own alignment. tok::TokenKind Kind; + // Same comment applies as for Kind + TokenType Type; bool ContinuesPPDirective; bool IsStartOfDeclName; // The number of nested blocks the token is in. This is used to add tabs // only for the indentation, and not for alignment, when // UseTab = US_ForIndentation. unsigned IndentLevel; + // The nesting level of this token, i.e. the number of surrounding (), + // [], {} or <>. + unsigned NestingLevel; + // The number of spaces in front of the token or broken part of the token. // This will be adapted when aligning tokens. // Can be negative to retain information about the initial relative offset @@ -162,14 +168,22 @@ // the alignment process. const Change *StartOfBlockComment; int IndentationOffset; + + // This is formed by combining IndentLevel and NestingLevel, in order to + // produce a scope depth number that is useful for performing consecutive + // alignment of declarations and assignments. + int ScopeLevel; }; private: /// \brief Calculate \c IsTrailingComment, \c TokenLength for the last tokens /// or token parts in a line and \c PreviousEndOfTokenColumn and /// \c EscapedNewlineColumn for the first tokens or token parts in a line. void calculateLineBreakInformation(); + /// \brief Calculate \c ScopeLevel for all \c Changes. + void calculateScopeLevel(); + /// \brief Align consecutive assignments over all \c Changes. void alignConsecutiveAssignments(); @@ -202,6 +216,10 @@ void appendIndentText(std::string &Text, unsigned IndentLevel, unsigned Spaces, unsigned WhitespaceStartColumn); + /// \brief Determine whether this token is considered as the start of a + /// declaration name. + static bool IsStartOfDeclName(const FormatToken &Tok); + SmallVector<Change, 16> Changes; const SourceManager &SourceMgr; tooling::Replacements Replaces; Index: lib/Format/WhitespaceManager.cpp =================================================================== --- lib/Format/WhitespaceManager.cpp +++ lib/Format/WhitespaceManager.cpp @@ -27,20 +27,22 @@ WhitespaceManager::Change::Change( bool CreateReplacement, SourceRange OriginalWhitespaceRange, - unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn, - unsigned NewlinesBefore, StringRef PreviousLinePostfix, - StringRef CurrentLinePrefix, tok::TokenKind Kind, bool ContinuesPPDirective, + unsigned IndentLevel, unsigned NestingLevel, int Spaces, + unsigned StartOfTokenColumn, unsigned NewlinesBefore, + StringRef PreviousLinePostfix, StringRef CurrentLinePrefix, + tok::TokenKind Kind, TokenType Type, bool ContinuesPPDirective, bool IsStartOfDeclName, bool IsInsideToken) : CreateReplacement(CreateReplacement), OriginalWhitespaceRange(OriginalWhitespaceRange), StartOfTokenColumn(StartOfTokenColumn), NewlinesBefore(NewlinesBefore), PreviousLinePostfix(PreviousLinePostfix), - CurrentLinePrefix(CurrentLinePrefix), Kind(Kind), + CurrentLinePrefix(CurrentLinePrefix), Kind(Kind), Type(Type), ContinuesPPDirective(ContinuesPPDirective), IsStartOfDeclName(IsStartOfDeclName), IndentLevel(IndentLevel), - Spaces(Spaces), IsInsideToken(IsInsideToken), IsTrailingComment(false), - TokenLength(0), PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0), - StartOfBlockComment(nullptr), IndentationOffset(0) {} + NestingLevel(NestingLevel), Spaces(Spaces), IsInsideToken(IsInsideToken), + IsTrailingComment(false), TokenLength(0), PreviousEndOfTokenColumn(0), + EscapedNewlineColumn(0), StartOfBlockComment(nullptr), + IndentationOffset(0), ScopeLevel(0) {} void WhitespaceManager::reset() { Changes.clear(); @@ -56,22 +58,21 @@ Tok.Decision = (Newlines > 0) ? FD_Break : FD_Continue; Changes.push_back( Change(/*CreateReplacement=*/true, Tok.WhitespaceRange, IndentLevel, - Spaces, StartOfTokenColumn, Newlines, "", "", Tok.Tok.getKind(), - InPPDirective && !Tok.IsFirst, - Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName), - /*IsInsideToken=*/false)); + Tok.NestingLevel, Spaces, StartOfTokenColumn, Newlines, "", "", + Tok.Tok.getKind(), Tok.Type, InPPDirective && !Tok.IsFirst, + IsStartOfDeclName(Tok), /*IsInsideToken=*/false)); } void WhitespaceManager::addUntouchableToken(const FormatToken &Tok, bool InPPDirective) { if (Tok.Finalized) return; Changes.push_back(Change( /*CreateReplacement=*/false, Tok.WhitespaceRange, /*IndentLevel=*/0, + Tok.NestingLevel, /*Spaces=*/0, Tok.OriginalColumn, Tok.NewlinesBefore, "", "", - Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst, - Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName), - /*IsInsideToken=*/false)); + Tok.Tok.getKind(), Tok.Type, InPPDirective && !Tok.IsFirst, + IsStartOfDeclName(Tok), /*IsInsideToken=*/false)); } void WhitespaceManager::replaceWhitespaceInToken( @@ -81,20 +82,22 @@ if (Tok.Finalized) return; SourceLocation Start = Tok.getStartOfNonWhitespace().getLocWithOffset(Offset); - Changes.push_back(Change( - true, SourceRange(Start, Start.getLocWithOffset(ReplaceChars)), - IndentLevel, Spaces, std::max(0, Spaces), Newlines, PreviousPostfix, - CurrentPrefix, Tok.is(TT_LineComment) ? tok::comment : tok::unknown, - InPPDirective && !Tok.IsFirst, - Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName), - /*IsInsideToken=*/Newlines == 0)); + Changes.push_back( + Change(true, SourceRange(Start, Start.getLocWithOffset(ReplaceChars)), + IndentLevel, Tok.NestingLevel, Spaces, std::max(0, Spaces), + Newlines, PreviousPostfix, CurrentPrefix, + Tok.is(TT_LineComment) ? tok::comment : tok::unknown, Tok.Type, + InPPDirective && !Tok.IsFirst, IsStartOfDeclName(Tok), + /*IsInsideToken=*/Newlines == 0)); } const tooling::Replacements &WhitespaceManager::generateReplacements() { if (Changes.empty()) return Replaces; std::sort(Changes.begin(), Changes.end(), Change::IsBeforeInFile(SourceMgr)); + if (Style.AlignConsecutiveAssignments || Style.AlignConsecutiveDeclarations) + calculateScopeLevel(); calculateLineBreakInformation(); alignConsecutiveDeclarations(); alignConsecutiveAssignments(); @@ -160,59 +163,147 @@ } } +struct TokenTypeAndLevel { + TokenType Type; + int ScopeLevel; +}; + +// Here we combine IndentLevel and NestingLevel to produce ScopeLevel, +// which is used to detect where to break consecutive alignment. +void WhitespaceManager::calculateScopeLevel() { + unsigned IndentLevel = 0; + for (auto &Change : Changes) { + // IndentLevel is only set on the first token of the line + if (Change.NewlinesBefore != 0) + IndentLevel = Change.IndentLevel; + + unsigned NestingLevel = Change.NestingLevel; + + // NestingLevel is raised on the opening paren/square, and remains raised + // until AFTER the closing paren/square. This means that with a statement + // like: + // + // for (int i = 0; ...) + // int x = 1;. + // + // The "int x = 1" line is going to have the same NestingLevel as + // the tokens inside the parentheses of the "for" statement. + // In order to break this continuity, we force the closing paren/square to + // reduce it's nesting level. + // If we don't do this, then "x = 1" ends up getting aligned to "i = 1" + switch (Change.Kind) { + case tok::r_paren: + case tok::r_square: + NestingLevel--; + break; + default: + break; + } + + Change.ScopeLevel = IndentLevel + NestingLevel; + } +} + // Align a single sequence of tokens, see AlignTokens below. template <typename F> static void AlignTokenSequence(unsigned Start, unsigned End, unsigned Column, F &&Matches, SmallVector<WhitespaceManager::Change, 16> &Changes) { bool FoundMatchOnLine = false; int Shift = 0; + + // ScopeStack keeps track of the current scope depth. + // We only run the "Matches" function on tokens from the outer-most scope. + // However, we do need to adjust some special tokens within the entire + // Start..End range, regardless of their scope. This special rule applies + // only to function parameters which are split across multiple lines, and + // who's function names have been shifted for declaration alignment's sake. + // See "split function parameter alignment" in FormatTest.cpp for an example. + SmallVector<TokenTypeAndLevel, 16> ScopeStack; + for (unsigned i = Start; i != End; ++i) { - if (Changes[i].NewlinesBefore > 0) { - FoundMatchOnLine = false; + if (ScopeStack.size() != 0 && + Changes[i].ScopeLevel < ScopeStack.back().ScopeLevel) + ScopeStack.pop_back(); + + if (i != Start) { + if (Changes[i].ScopeLevel > Changes[i - 1].ScopeLevel) { + ScopeStack.push_back({Changes[i - 1].Type, Changes[i].ScopeLevel}); + if (i > Start + 1 && Changes[i - 2].Type == TT_FunctionDeclarationName) + ScopeStack.back().Type = Changes[i - 2].Type; + } + } + + bool InsideNestedScope = ScopeStack.size() != 0; + + if (Changes[i].NewlinesBefore > 0 && !InsideNestedScope) { Shift = 0; + FoundMatchOnLine = false; } // If this is the first matching token to be aligned, remember by how many // spaces it has to be shifted, so the rest of the changes on the line are // shifted by the same amount - if (!FoundMatchOnLine && Matches(Changes[i])) { + if (!FoundMatchOnLine && !InsideNestedScope && Matches(Changes[i])) { FoundMatchOnLine = true; Shift = Column - Changes[i].StartOfTokenColumn; Changes[i].Spaces += Shift; } + // This is for function parameters that are split across multiple lines, + // as mentioned in the ScopeStack comment. + if (InsideNestedScope && Changes[i].NewlinesBefore > 0 && + ScopeStack.back().Type == TT_FunctionDeclarationName) + Changes[i].Spaces += Shift; + assert(Shift >= 0); Changes[i].StartOfTokenColumn += Shift; if (i + 1 != Changes.size()) Changes[i + 1].PreviousEndOfTokenColumn += Shift; } } -// Walk through all of the changes and find sequences of matching tokens to -// align. To do so, keep track of the lines and whether or not a matching token -// was found on a line. If a matching token is found, extend the current -// sequence. If the current line cannot be part of a sequence, e.g. because -// there is an empty line before it or it contains only non-matching tokens, -// finalize the previous sequence. +// Walk through a subset of the changes, starting at StartAt, and find +// sequences of matching tokens to align. To do so, keep track of the lines and +// whether or not a matching token was found on a line. If a matching token is +// found, extend the current sequence. If the current line cannot be part of a +// sequence, e.g. because there is an empty line before it or it contains only +// non-matching tokens, finalize the previous sequence. +// The value returned is the token on which we stopped, either because we +// exhausted all items inside Changes, or because we hit a scope level higher +// than our initial scope. +// This function is recursive. Each invocation processes only the scope level +// equal to the initial level, which is the level of Changes[StartAt]. +// If we encounter a scope level greater than the initial level, then we call +// ourselves recursively, thereby avoiding the pollution of the current state +// with the alignment requirements of the nested sub-level. This recursive +// behavior is necessary for aligning function prototypes that have one or more +// arguments. +// If this function encounters a scope level less than the initial level, +// it exits. +// There is a non-obvious subtlety in the recursive behavior: Even though we +// defer processing of nested levels to recursive invocations of this +// function, when it comes time to align a sequence of tokens, we run the +// alignment on the entire sequence, including the nested levels. +// When doing so, most of the nested tokens are skipped, because their +// alignment was already handled by the recursive invocations of this function. +// However, the special exception is that we do NOT skip function parameters +// that are split across multiple lines. See the test case in FormatTest.cpp +// that mentions "split function parameter alignment" for an example of this. template <typename F> -static void AlignTokens(const FormatStyle &Style, F &&Matches, - SmallVector<WhitespaceManager::Change, 16> &Changes) { +static unsigned AlignTokens(const FormatStyle &Style, F &&Matches, + SmallVector<WhitespaceManager::Change, 16> &Changes, + unsigned StartAt) { unsigned MinColumn = 0; unsigned MaxColumn = UINT_MAX; // Line number of the start and the end of the current token sequence. unsigned StartOfSequence = 0; unsigned EndOfSequence = 0; - // Keep track of the nesting level of matching tokens, i.e. the number of - // surrounding (), [], or {}. We will only align a sequence of matching - // token that share the same scope depth. - // - // FIXME: This could use FormatToken::NestingLevel information, but there is - // an outstanding issue wrt the brace scopes. - unsigned NestingLevelOfLastMatch = 0; - unsigned NestingLevel = 0; + // Measure the ScopeLevel (i.e. depth of (), [], {}) of the first token, and + // abort when we hit any token in a higher scope than the starting one. + int ScopeLevel = StartAt < Changes.size() ? Changes[StartAt].ScopeLevel : -1; // Keep track of the number of commas before the matching tokens, we will only // align a sequence of matching tokens if they are preceded by the same number @@ -240,7 +331,10 @@ EndOfSequence = 0; }; - for (unsigned i = 0, e = Changes.size(); i != e; ++i) { + unsigned i = StartAt; + for (unsigned e = Changes.size(); + i != e && Changes[i].ScopeLevel >= ScopeLevel; ++i) { + if (Changes[i].NewlinesBefore != 0) { CommasBeforeMatch = 0; EndOfSequence = i; @@ -254,31 +348,23 @@ if (Changes[i].Kind == tok::comma) { ++CommasBeforeMatch; - } else if (Changes[i].Kind == tok::r_brace || - Changes[i].Kind == tok::r_paren || - Changes[i].Kind == tok::r_square) { - --NestingLevel; - } else if (Changes[i].Kind == tok::l_brace || - Changes[i].Kind == tok::l_paren || - Changes[i].Kind == tok::l_square) { - // We want sequences to skip over child scopes if possible, but not the - // other way around. - NestingLevelOfLastMatch = std::min(NestingLevelOfLastMatch, NestingLevel); - ++NestingLevel; + } else if (Changes[i].ScopeLevel != ScopeLevel) { + // Call AlignTokens recursively, skipping over this scope block. + unsigned StoppedAt = AlignTokens(Style, Matches, Changes, i); + i = StoppedAt - 1; + continue; } if (!Matches(Changes[i])) continue; // If there is more than one matching token per line, or if the number of // preceding commas, or the scope depth, do not match anymore, end the // sequence. - if (FoundMatchOnLine || CommasBeforeMatch != CommasBeforeLastMatch || - NestingLevel != NestingLevelOfLastMatch) + if (FoundMatchOnLine || CommasBeforeMatch != CommasBeforeLastMatch) AlignCurrentSequence(); CommasBeforeLastMatch = CommasBeforeMatch; - NestingLevelOfLastMatch = NestingLevel; FoundMatchOnLine = true; if (StartOfSequence == 0) @@ -301,8 +387,10 @@ MaxColumn = std::min(MaxColumn, ChangeMaxColumn); } - EndOfSequence = Changes.size(); + unsigned StoppedAt = i; + EndOfSequence = i; AlignCurrentSequence(); + return StoppedAt; } void WhitespaceManager::alignConsecutiveAssignments() { @@ -321,7 +409,7 @@ return C.Kind == tok::equal; }, - Changes); + Changes, 0); } void WhitespaceManager::alignConsecutiveDeclarations() { @@ -334,9 +422,8 @@ // const char* const* v1; // float const* v2; // SomeVeryLongType const& v3; - AlignTokens(Style, [](Change const &C) { return C.IsStartOfDeclName; }, - Changes); + Changes, 0); } void WhitespaceManager::alignTrailingComments() { @@ -372,8 +459,7 @@ unsigned CommentColumn = SourceMgr.getSpellingColumnNumber( Changes[i].OriginalWhitespaceRange.getEnd()); for (unsigned j = i + 1; j != e; ++j) { - if (Changes[j].Kind == tok::comment || - Changes[j].Kind == tok::unknown) + if (Changes[j].Kind == tok::comment || Changes[j].Kind == tok::unknown) // Skip over comments and unknown tokens. "unknown tokens are used for // the continuation of multiline comments. continue; @@ -494,8 +580,7 @@ } } -void WhitespaceManager::storeReplacement(SourceRange Range, - StringRef Text) { +void WhitespaceManager::storeReplacement(SourceRange Range, StringRef Text) { unsigned WhitespaceLength = SourceMgr.getFileOffset(Range.getEnd()) - SourceMgr.getFileOffset(Range.getBegin()); // Don't create a replacement, if it does not change anything. @@ -574,5 +659,10 @@ } } +bool WhitespaceManager::IsStartOfDeclName(const FormatToken &Tok) { + return Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName) || + Tok.is(tok::kw_operator); +} + } // namespace format } // namespace clang
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits