[PATCH] D18551: Added Fixer implementation and fix() interface in clang-format for removing redundant code.

Eric Liu via cfe-commits Tue, 29 Mar 2016 07:23:43 -0700

ioeric created this revision.
ioeric added a reviewer: djasper.
ioeric added a subscriber: cfe-commits.
Herald added a subscriber: klimek.


After applying replacements, redundant code like extra commas or empty 
namespaces
might be introduced. Fixer can detect and remove any redundant code introduced 
by replacements.
The current implementation only handles redundant commas.

http://reviews.llvm.org/D18551

Files:
  include/clang/Format/Format.h
  lib/Format/Format.cpp
  lib/Format/FormatToken.h
  unittests/Format/FormatTest.cpp

Index: unittests/Format/FormatTest.cpp
===================================================================
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -11233,6 +11233,117 @@
 
 #endif // _MSC_VER
 
+class FixTest : public ::testing::Test {
+protected:
+  std::string fix(llvm::StringRef Code,
+                  const std::vector<tooling::Range> &Ranges,
+                  const FormatStyle &Style = getLLVMStyle()) {
+    DEBUG(llvm::errs() << "---\n");
+    DEBUG(llvm::errs() << Code << "\n\n");
+    tooling::Replacements Replaces = format::fix(Style, Code, Ranges);
+
+    std::string Result = applyAllReplacements(Code, Replaces);
+    EXPECT_NE("", Result);
+    DEBUG(llvm::errs() << "\n" << Result << "\n\n");
+    return Result;
+  }
+};
+
+TEST_F(FixTest, CtorInitializationSimpleRedundantComma) {
+  std::string Code = "class A {\nA() : , {} };";
+  std::string Expected = "class A {\nA()   {} };";
+  std::vector<tooling::Range> Ranges;
+  Ranges.push_back(tooling::Range(17, 0));
+  Ranges.push_back(tooling::Range(19, 0));
+  std::string Result = fix(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+
+  Code = "class A {\nA() : x(1), {} };";
+  Expected = "class A {\nA() : x(1) {} };";
+  Ranges.clear();
+  Ranges.push_back(tooling::Range(23, 0));
+  Result = fix(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+}
+
+TEST_F(FixTest, CtorInitializationBracesInParens) {
+  std::string Code = "class A {\nA() : x({1}),, {} };";
+  std::string Expected = "class A {\nA() : x({1}) {} };";
+  std::vector<tooling::Range> Ranges;
+  Ranges.push_back(tooling::Range(24, 0));
+  Ranges.push_back(tooling::Range(26, 0));
+  std::string Result = fix(Code, Ranges);
+  DEBUG(llvm::errs() << "\n" << Result << "\n");
+  EXPECT_EQ(Expected, Result);
+}
+
+TEST_F(FixTest, RedundantCommaNotInAffectedRanges) {
+  std::string Code =
+      "class A {\nA() : x({1}), /* comment */, { int x = 0; } };";
+  std::string Expected =
+      "class A {\nA() : x({1}), /* comment */, { int x = 0; } };";
+  // Set the affected range to be "int x = 0", which does not intercept the
+  // constructor initialization list.
+  std::vector<tooling::Range> Ranges(1, tooling::Range(42, 9));
+  std::string Result = fix(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+
+  Code = "class A {\nA() : x(1), {} };";
+  Expected = "class A {\nA() : x(1), {} };";
+  // No range. Fixer should do nothing.
+  Ranges.clear();
+  Result = fix(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+}
+
+TEST_F(FixTest, CtorInitializationCommentAroundCommas) {
+  // Remove redundant commas and comment between them.
+  std::string Code = "class A {\nA() : x({1}), /* comment */, {} };";
+  std::string Expected = "class A {\nA() : x({1})  {} };";
+  std::vector<tooling::Range> Ranges;
+  Ranges.push_back(tooling::Range(25, 0));
+  Ranges.push_back(tooling::Range(40, 0));
+  std::string Result = fix(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+
+  // Remove trailing comma and comment.
+  Code = "class A {\nA() : x({1}), // comment\n{} };";
+  Expected = "class A {\nA() : x({1}) \n{} };";
+  Ranges = std::vector<tooling::Range>(1, tooling::Range(25, 0));
+  Result = fix(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+
+  // Remove trailing comma, but leave the comment.
+  Code = "class A {\nA() : x({1}), // comment\n , y(1),{} };";
+  Expected = "class A {\nA() : x({1}), // comment\n  y(1){} };";
+  Ranges = std::vector<tooling::Range>(1, tooling::Range(38, 0));
+  Result = fix(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+
+  // Remove trailing comma and the comment before it.
+  Code = "class A {\nA() : x({1}), \n/* comment */, y(1),{} };";
+  Expected = "class A {\nA() : x({1}), \n y(1){} };";
+  Ranges = std::vector<tooling::Range>(1, tooling::Range(40, 0));
+  Result = fix(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+
+  // Remove trailing comma and the comment after it.
+  Code = "class A {\nA() : , // comment\n y(1),{} };";
+  Expected = "class A {\nA() :  \n y(1){} };";
+  Ranges = std::vector<tooling::Range>(1, tooling::Range(17, 0));
+  Result = fix(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+}
+
+TEST_F(FixTest, SkipImbalancedParentheses) {
+  std::string Code = "class A {\nA() : x((),, {} };";
+  std::string Expected = "class A {\nA() : x((),, {} };";
+  std::vector<tooling::Range> Ranges(1, tooling::Range(0, Code.size()));
+  std::string Result = fix(Code, Ranges);
+  DEBUG(llvm::errs() << "\n" << Result << "\n");
+  EXPECT_EQ(Expected, Result);
+}
+
 class ReplacementTest : public ::testing::Test {
 protected:
   tooling::Replacement createReplacement(SourceLocation Start, unsigned Length,
Index: lib/Format/FormatToken.h
===================================================================
--- lib/Format/FormatToken.h
+++ lib/Format/FormatToken.h
@@ -279,6 +279,9 @@
   /// changes.
   bool Finalized = false;
 
+  // \brief Fixer will set this to true if the token is going to be deleted.
+  bool Deleted = false;
+
   bool is(tok::TokenKind Kind) const { return Tok.is(Kind); }
   bool is(TokenType TT) const { return Type == TT; }
   bool is(const IdentifierInfo *II) const {
Index: lib/Format/Format.cpp
===================================================================
--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -1443,6 +1443,368 @@
   }
 }
 
+class Fixer : public UnwrappedLineConsumer {
+public:
+  Fixer(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID,
+        ArrayRef<CharSourceRange> Ranges)
+      : Style(Style), ID(ID), SourceMgr(SourceMgr),
+        Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
+        Encoding(encoding::detectEncoding(SourceMgr.getBufferData(ID))) {
+          DummyToken.Tok.setKind(tok::unknown);
+        }
+
+  tooling::Replacements fix(bool *IncompleteFormat) {
+    tooling::Replacements Result;
+    FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding);
+
+    UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(),
+                               *this);
+    Parser.parse();
+    assert(UnwrappedLines.rbegin()->empty());
+
+    TokenAnnotator Annotator(Style, Tokens.getKeywords());
+    for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
+         ++Run) {
+      DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
+      for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
+        AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
+        Annotator.annotate(*AnnotatedLines.back());
+      }
+      // FIXME: in the current implementation the granularity of affected range
+      // is an annotated line. However, this is not sufficient. Furthermore,
+      // redundant code introduced by replacements does not necessarily
+      // intercept with ranges of replacements that result in the redundancy.
+      // To determine if some redundant code is actually introduced by
+      // replacements(e.g. deletions), we need to come up with a more
+      // sophisticated way of computing affected ranges.
+      computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end());
+
+      tooling::Replacements RunResult = fix(Tokens, IncompleteFormat);
+      DEBUG({
+        llvm::dbgs() << "Replacements for run " << Run << ":\n";
+        for (tooling::Replacements::iterator I = RunResult.begin(),
+                                             E = RunResult.end();
+             I != E; ++I) {
+          llvm::dbgs() << I->toString() << "\n";
+        }
+      });
+      for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
+        delete AnnotatedLines[i];
+      }
+      Result.insert(RunResult.begin(), RunResult.end());
+    }
+    return Result;
+  }
+
+  tooling::Replacements fix(FormatTokenLexer &Tokens, bool *IncompleteFormat) {
+    reset();
+
+    while (CurrentToken->isNot(tok::eof)) {
+      switch (CurrentToken->Tok.getKind()) {
+      case tok::colon:
+        if (CurrentToken->Type == TT_CtorInitializerColon) {
+          checkConstructorInitList();
+        } else {
+          nextToken();
+        }
+        break;
+      default:
+        nextToken();
+        break;
+      }
+    }
+
+    tooling::Replacements Fixes;
+    for (const auto *Tok : RedundantTokens) {
+      Fixes.insert(tooling::Replacement(SourceMgr, Tok->Tok.getLocation(),
+                                        Tok->Tok.getLength(), ""));
+    }
+    return Fixes;
+  }
+
+  void checkConstructorInitList() {
+    assert(CurrentToken->Type == TT_CtorInitializerColon &&
+           "Expect TT_CtorInitializerColon Token!");
+    FormatToken *CtorInitColonTok = CurrentToken;
+    nextToken();
+    bool IsListEmpty = true;
+    bool Done = false;
+    // This vector stores comments between the last token not deleted and the
+    // current token.
+    SmallVector<FormatToken *, 1> Comments;
+    while (!Done && CurrentToken->isNot(tok::eof)) {
+      switch (CurrentToken->Tok.getKind()) {
+      case tok::comma:
+        if (LastTokenNotDeleted->isOneOf(tok::comma, tok::colon)) {
+          deleteToken(CurrentToken);
+          // If there is a new line before the deleted comma, the comment may
+          // belong to the previous token.
+          if (!CurrentToken->HasUnescapedNewline) {
+            for (auto *Comment : Comments) {
+              deleteToken(Comment);
+            }
+          }
+        }
+        break;
+      case tok::l_paren:
+        // We need to skip a pair of parentheses here because it is possible
+        // that "({ ... })" appears in the initialization list, and we do not
+        // want to return when we get the "{" in the parentheses.
+        skipParens();
+        break;
+      case tok::l_brace:
+        if (LastTokenNotDeleted->is(tok::comma)) {
+          deleteToken(LastTokenNotDeleted);
+          for (auto *Comment : Comments) {
+            deleteToken(Comment);
+          }
+          // FIXME: LastTokenNotDeleted should be the token before it now, but
+          // we do not have a pointer to it. Now we make it point to DummyToken
+          // in this case since we are finishing the checking.
+          LastTokenNotDeleted = &DummyToken;
+        }
+        Done = true;
+        break;
+      case tok::comment:
+        // If the last deleted token is followed by a comment "//...", then we
+        // delete the comment as well.
+        if (LastToken->Deleted && CurrentToken->TokenText.startswith("//")) {
+          deleteToken(CurrentToken);
+        } else {
+          Comments.push_back(CurrentToken);
+        }
+        break;
+      default:
+        IsListEmpty = false;
+        break;
+      }
+      if (!Done) {
+        if (CurrentToken->isNot(tok::comment)) {
+          Comments.clear();
+        }
+        nextToken();
+      }
+    }
+    if (IsListEmpty) {
+      // FIXME: LastTokenNotDeleted might also be affected. But since we are at
+      // the end of the check. This can be ignored now.
+      deleteToken(CtorInitColonTok);
+    }
+  }
+
+private:
+  void consumeUnwrappedLine(const UnwrappedLine &TheLine) override {
+    assert(!UnwrappedLines.empty());
+    UnwrappedLines.back().push_back(TheLine);
+  }
+
+  void finishRun() override {
+    UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
+  }
+
+  void reset() {
+    CurrentLine = 0;
+    LastToken = nullptr;
+    // Make LastTokenNotDeleted point somewhere in the beginning so that we
+    // don't need to check nullptr every time we access it.
+    LastTokenNotDeleted = &DummyToken;
+    CurrentToken = AnnotatedLines[CurrentLine]->First;
+  }
+
+  inline void deleteToken(FormatToken *Tok) {
+    // FIXME: we need better way to determine wether to delete this token.
+    if (!AnnotatedLines[CurrentLine]->Affected) return;
+    Tok->Deleted = true;
+    RedundantTokens.insert(Tok);
+  }
+
+  void nextToken() {
+    if (CurrentToken->is(tok::eof)) return;
+    llvm::errs() << "Current token is " << CurrentToken->Tok.getName() << "\n";
+    LastToken = CurrentToken;
+    if (!LastToken->Deleted && LastToken->isNot(tok::comment)) {
+      LastTokenNotDeleted = LastToken;
+    }
+    if (CurrentToken->Next) {
+      CurrentToken = CurrentToken->Next;
+      return;
+    }
+
+    ++CurrentLine;
+    assert(CurrentLine < AnnotatedLines.size());
+    CurrentToken = AnnotatedLines[CurrentLine]->First;
+    assert(CurrentToken);
+  }
+
+  void skipParens() {
+    assert(CurrentToken->is(tok::l_paren));
+    nextToken();
+    while (CurrentToken->isNot(tok::eof)) {
+      switch (CurrentToken->Tok.getKind()) {
+      case tok::l_paren:
+        skipParens();
+        break;
+      case tok::r_paren:
+        return;
+      default:
+        break;
+      }
+      nextToken();
+    }
+  }
+
+  // FIXME: these functions for computing affected lines are copied from
+  // Formatter. Refactor to reduce duplication - make these functions non-member
+  // function?
+  //
+  // Determines which lines are affected by the SourceRanges given as input.
+  // Returns \c true if at least one line between I and E or one of their
+  // children is affected.
+  bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I,
+                            SmallVectorImpl<AnnotatedLine *>::iterator E) {
+    bool SomeLineAffected = false;
+    const AnnotatedLine *PreviousLine = nullptr;
+    while (I != E) {
+      AnnotatedLine *Line = *I;
+      Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First);
+
+      // If a line is part of a preprocessor directive, it needs to be formatted
+      // if any token within the directive is affected.
+      if (Line->InPPDirective) {
+        FormatToken *Last = Line->Last;
+        SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1;
+        while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) {
+          Last = (*PPEnd)->Last;
+          ++PPEnd;
+        }
+
+        if (affectsTokenRange(*Line->First, *Last,
+                              /*IncludeLeadingNewlines=*/false)) {
+          SomeLineAffected = true;
+          markAllAsAffected(I, PPEnd);
+        }
+        I = PPEnd;
+        continue;
+      }
+
+      if (nonPPLineAffected(Line, PreviousLine))
+        SomeLineAffected = true;
+
+      PreviousLine = Line;
+      ++I;
+    }
+    return SomeLineAffected;
+  }
+
+  // Returns true if the range from 'First' to 'Last' intersects with one of the
+  // input ranges.
+  bool affectsTokenRange(const FormatToken &First, const FormatToken &Last,
+                         bool IncludeLeadingNewlines) {
+    SourceLocation Start = First.WhitespaceRange.getBegin();
+    if (!IncludeLeadingNewlines)
+      Start = Start.getLocWithOffset(First.LastNewlineOffset);
+    SourceLocation End = Last.getStartOfNonWhitespace();
+    End = End.getLocWithOffset(Last.TokenText.size());
+    CharSourceRange Range = CharSourceRange::getCharRange(Start, End);
+    return affectsCharSourceRange(Range);
+  }
+
+  // Returns true if one of the input ranges intersect the leading empty lines
+  // before 'Tok'.
+  bool affectsLeadingEmptyLines(const FormatToken &Tok) {
+    CharSourceRange EmptyLineRange = CharSourceRange::getCharRange(
+        Tok.WhitespaceRange.getBegin(),
+        Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset));
+    return affectsCharSourceRange(EmptyLineRange);
+  }
+
+  // Returns true if 'Range' intersects with one of the input ranges.
+  bool affectsCharSourceRange(const CharSourceRange &Range) {
+    for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
+                                                          E = Ranges.end();
+         I != E; ++I) {
+      if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
+          !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
+        return true;
+    }
+    return false;
+  }
+
+  // Determines whether 'Line' is affected by the SourceRanges given as input.
+  // Returns \c true if line or one if its children is affected.
+  bool nonPPLineAffected(AnnotatedLine *Line,
+                         const AnnotatedLine *PreviousLine) {
+    bool SomeLineAffected = false;
+    Line->ChildrenAffected =
+        computeAffectedLines(Line->Children.begin(), Line->Children.end());
+    if (Line->ChildrenAffected)
+      SomeLineAffected = true;
+
+    // Stores whether one of the line's tokens is directly affected.
+    bool SomeTokenAffected = false;
+    // Stores whether we need to look at the leading newlines of the next token
+    // in order to determine whether it was affected.
+    bool IncludeLeadingNewlines = false;
+
+    // Stores whether the first child line of any of this line's tokens is
+    // affected.
+    bool SomeFirstChildAffected = false;
+
+    for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
+      // Determine whether 'Tok' was affected.
+      if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines))
+        SomeTokenAffected = true;
+
+      // Determine whether the first child of 'Tok' was affected.
+      if (!Tok->Children.empty() && Tok->Children.front()->Affected)
+        SomeFirstChildAffected = true;
+
+      IncludeLeadingNewlines = Tok->Children.empty();
+    }
+
+    // Was this line moved, i.e. has it previously been on the same line as an
+    // affected line?
+    bool LineMoved = PreviousLine && PreviousLine->Affected &&
+                     Line->First->NewlinesBefore == 0;
+
+    bool IsContinuedComment =
+        Line->First->is(tok::comment) && Line->First->Next == nullptr &&
+        Line->First->NewlinesBefore < 2 && PreviousLine &&
+        PreviousLine->Affected && PreviousLine->Last->is(tok::comment);
+
+    if (SomeTokenAffected || SomeFirstChildAffected || LineMoved ||
+        IsContinuedComment) {
+      Line->Affected = true;
+      SomeLineAffected = true;
+    }
+    return SomeLineAffected;
+  }
+
+  // Marks all lines between I and E as well as all their children as affected.
+  void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I,
+                         SmallVectorImpl<AnnotatedLine *>::iterator E) {
+    while (I != E) {
+      (*I)->Affected = true;
+      markAllAsAffected((*I)->Children.begin(), (*I)->Children.end());
+      ++I;
+    }
+  }
+
+  FormatStyle Style;
+  FileID ID;
+  SourceManager &SourceMgr;
+  SmallVector<CharSourceRange, 8> Ranges;
+  SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
+  encoding::Encoding Encoding;
+  SmallVector<AnnotatedLine *, 16> AnnotatedLines;
+  size_t CurrentLine;
+  FormatToken DummyToken;
+  FormatToken *LastToken;
+  FormatToken *LastTokenNotDeleted;
+  FormatToken *CurrentToken;
+  std::set<FormatToken *> RedundantTokens;
+};
+
 class Formatter : public UnwrappedLineConsumer {
 public:
   Formatter(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID,
@@ -2058,6 +2420,36 @@
   return reformat(Style, SourceMgr, ID, CharRanges, IncompleteFormat);
 }
 
+tooling::Replacements fix(const FormatStyle &Style, StringRef Code,
+                          ArrayRef<tooling::Range> Ranges, StringRef FileName,
+                          bool *IncompleteFormat) {
+  if (Style.DisableFormat)
+    return tooling::Replacements();
+
+  IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
+      new vfs::InMemoryFileSystem);
+  FileManager Files(FileSystemOptions(), InMemoryFileSystem);
+  DiagnosticsEngine Diagnostics(
+      IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
+      new DiagnosticOptions);
+  SourceManager SourceMgr(Diagnostics, Files);
+  InMemoryFileSystem->addFile(
+      FileName, 0, llvm::MemoryBuffer::getMemBuffer(
+                       Code, FileName, /*RequiresNullTerminator=*/false));
+  FileID ID = SourceMgr.createFileID(Files.getFile(FileName), SourceLocation(),
+                                     clang::SrcMgr::C_User);
+  SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
+  std::vector<CharSourceRange> CharRanges;
+  for (const tooling::Range &Range : Ranges) {
+    SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
+    SourceLocation End = Start.getLocWithOffset(Range.getLength());
+    CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
+  }
+
+  Fixer Fix(Style, SourceMgr, ID, CharRanges);
+  return Fix.fix(IncompleteFormat);
+}
+
 LangOptions getFormattingLangOpts(const FormatStyle &Style) {
   LangOptions LangOpts;
   LangOpts.CPlusPlus = 1;
Index: include/clang/Format/Format.h
===================================================================
--- include/clang/Format/Format.h
+++ include/clang/Format/Format.h
@@ -807,6 +807,14 @@
                                StringRef FileName = "<stdin>",
                                bool *IncompleteFormat = nullptr);
 
+/// \brief Reformats the given \p Ranges in \p Code.
+///
+/// Otherwise identical to the reformat() function using a file ID.
+tooling::Replacements fix(const FormatStyle &Style, StringRef Code,
+                          ArrayRef<tooling::Range> Ranges,
+                          StringRef FileName = "<stdin>",
+                          bool *IncompleteFormat = nullptr);
+
 /// \brief Returns the ``LangOpts`` that the formatter expects you to set.
 ///
 /// \param Style determines specific settings for lexing mode.

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D18551: Added Fixer implementation and fix() interface in clang-format for removing redundant code.

Reply via email to