[PATCH] D26909: [ClangFormat] Only insert #include into the #include block in the beginning of the file.

2016-12-02 Thread Eric Liu via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL288493: [ClangFormat] Only insert #include into the #include 
block in the beginning of… (authored by ioeric).

Changed prior to commit:
  https://reviews.llvm.org/D26909?vs=80044&id=80045#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D26909

Files:
  cfe/trunk/include/clang/Format/Format.h
  cfe/trunk/lib/Format/Format.cpp
  cfe/trunk/unittests/Format/CleanupTest.cpp

Index: cfe/trunk/include/clang/Format/Format.h
===
--- cfe/trunk/include/clang/Format/Format.h
+++ cfe/trunk/include/clang/Format/Format.h
@@ -786,7 +786,14 @@
 /// This also supports inserting/deleting C++ #include directives:
 /// - If a replacement has offset UINT_MAX, length 0, and a replacement text
 ///   that is an #include directive, this will insert the #include into the
-///   correct block in the \p Code.
+///   correct block in the \p Code. When searching for points to insert new
+///   header, this ignores #include's after the #include block(s) in the
+///   beginning of a file to avoid inserting headers into code sections where
+///   new #include's should not be added by default. These code sections
+///   include:
+/// - raw string literals (containing #include).
+/// - #if blocks.
+/// - Special #include's among declarations (e.g. functions).
 /// - If a replacement has offset UINT_MAX, length 1, and a replacement text
 ///   that is the name of the header to be removed, the header will be removed
 ///   from \p Code if it exists.
Index: cfe/trunk/lib/Format/Format.cpp
===
--- cfe/trunk/lib/Format/Format.cpp
+++ cfe/trunk/lib/Format/Format.cpp
@@ -1514,10 +1514,23 @@
   return Replace.getOffset() == UINT_MAX && Replace.getLength() == 1;
 }
 
-void skipComments(Lexer &Lex, Token &Tok) {
-  while (Tok.is(tok::comment))
-if (Lex.LexFromRawLexer(Tok))
-  return;
+// Returns the offset after skipping a sequence of tokens, matched by \p
+// GetOffsetAfterSequence, from the start of the code.
+// \p GetOffsetAfterSequence should be a function that matches a sequence of
+// tokens and returns an offset after the sequence.
+unsigned getOffsetAfterTokenSequence(
+StringRef FileName, StringRef Code, const FormatStyle &Style,
+std::function
+GetOffsetAfterSequense) {
+  std::unique_ptr Env =
+  Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{});
+  const SourceManager &SourceMgr = Env->getSourceManager();
+  Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), SourceMgr,
+getFormattingLangOpts(Style));
+  Token Tok;
+  // Get the first token.
+  Lex.LexFromRawLexer(Tok);
+  return GetOffsetAfterSequense(SourceMgr, Lex, Tok);
 }
 
 // Check if a sequence of tokens is like "# ". If it is,
@@ -1527,44 +1540,96 @@
   bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
  Tok.is(tok::raw_identifier) &&
  Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Tok) &&
- Tok.is(tok::raw_identifier);
+ tok::raw_identifier;
   if (Matched)
 Lex.LexFromRawLexer(Tok);
   return Matched;
 }
 
+void skipComments(Lexer &Lex, Token &Tok) {
+  while (Tok.is(tok::comment))
+if (Lex.LexFromRawLexer(Tok))
+  return;
+}
+
+// Returns the offset after header guard directives and any comments
+// before/after header guards. If no header guard presents in the code, this
+// will returns the offset after skipping all comments from the start of the
+// code.
 unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
StringRef Code,
const FormatStyle &Style) {
-  std::unique_ptr Env =
-  Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{});
-  const SourceManager &SourceMgr = Env->getSourceManager();
-  Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), SourceMgr,
-getFormattingLangOpts(Style));
-  Token Tok;
-  // Get the first token.
-  Lex.LexFromRawLexer(Tok);
-  skipComments(Lex, Tok);
-  unsigned AfterComments = SourceMgr.getFileOffset(Tok.getLocation());
-  if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
-skipComments(Lex, Tok);
-if (checkAndConsumeDirectiveWithName(Lex, "define", Tok))
-  return SourceMgr.getFileOffset(Tok.getLocation());
+  return getOffsetAfterTokenSequence(
+  FileName, Code, Style,
+  [](const SourceManager &SM, Lexer &Lex, Token Tok) {
+skipComments(Lex, Tok);
+unsigned InitialOffset = SM.getFileOffset(Tok.getLocation());
+if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
+  skipComments(Lex, Tok);
+  if (checkAndConsumeDirectiveWithName(Lex, "define", Tok))
+return SM.getFileOffset(Tok.getLocation(

[PATCH] D26909: [ClangFormat] Only insert #include into the #include block in the beginning of the file.

2016-12-02 Thread Eric Liu via Phabricator via cfe-commits
ioeric updated this revision to Diff 80044.
ioeric marked an inline comment as done.
ioeric added a comment.

- Addressed comment.


https://reviews.llvm.org/D26909

Files:
  include/clang/Format/Format.h
  lib/Format/Format.cpp
  unittests/Format/CleanupTest.cpp

Index: unittests/Format/CleanupTest.cpp
===
--- unittests/Format/CleanupTest.cpp
+++ unittests/Format/CleanupTest.cpp
@@ -839,6 +839,93 @@
   EXPECT_EQ(Expected, apply(Code, Replaces));
 }
 
+TEST_F(CleanUpReplacementsTest, NoInsertionAfterCode) {
+  std::string Code = "#include \"a.h\"\n"
+ "void f() {}\n"
+ "#include \"b.h\"\n";
+  std::string Expected = "#include \"a.h\"\n"
+ "#include \"c.h\"\n"
+ "void f() {}\n"
+ "#include \"b.h\"\n";
+  tooling::Replacements Replaces = toReplacements(
+  {createInsertion("#include \"c.h\"")});
+  EXPECT_EQ(Expected, apply(Code, Replaces));
+}
+
+TEST_F(CleanUpReplacementsTest, NoInsertionInStringLiteral) {
+  std::string Code = "#include \"a.h\"\n"
+ "const char[] = R\"(\n"
+ "#include \"b.h\"\n"
+ ")\";\n";
+  std::string Expected = "#include \"a.h\"\n"
+ "#include \"c.h\"\n"
+ "const char[] = R\"(\n"
+ "#include \"b.h\"\n"
+ ")\";\n";
+  tooling::Replacements Replaces =
+  toReplacements({createInsertion("#include \"c.h\"")});
+  EXPECT_EQ(Expected, apply(Code, Replaces));
+}
+
+TEST_F(CleanUpReplacementsTest, NoInsertionAfterOtherDirective) {
+  std::string Code = "#include \"a.h\"\n"
+ "#ifdef X\n"
+ "#include \"b.h\"\n"
+ "#endif\n";
+  std::string Expected = "#include \"a.h\"\n"
+ "#include \"c.h\"\n"
+ "#ifdef X\n"
+ "#include \"b.h\"\n"
+ "#endif\n";
+  tooling::Replacements Replaces = toReplacements(
+  {createInsertion("#include \"c.h\"")});
+  EXPECT_EQ(Expected, apply(Code, Replaces));
+}
+
+TEST_F(CleanUpReplacementsTest, CanInsertAfterLongSystemInclude) {
+  std::string Code = "#include \"a.h\"\n"
+ "// comment\n\n"
+ "#include \n";
+  std::string Expected = "#include \"a.h\"\n"
+ "// comment\n\n"
+ "#include \n"
+ "#include \n";
+  tooling::Replacements Replaces =
+  toReplacements({createInsertion("#include ")});
+  EXPECT_EQ(Expected, apply(Code, Replaces));
+}
+
+TEST_F(CleanUpReplacementsTest, CanInsertAfterComment) {
+  std::string Code = "#include \"a.h\"\n"
+ "// Comment\n"
+ "\n"
+ "/* Comment */\n"
+ "// Comment\n"
+ "\n"
+ "#include \"b.h\"\n";
+  std::string Expected = "#include \"a.h\"\n"
+ "// Comment\n"
+ "\n"
+ "/* Comment */\n"
+ "// Comment\n"
+ "\n"
+ "#include \"b.h\"\n"
+ "#include \"c.h\"\n";
+  tooling::Replacements Replaces =
+  toReplacements({createInsertion("#include \"c.h\"")});
+  EXPECT_EQ(Expected, apply(Code, Replaces));
+}
+
+TEST_F(CleanUpReplacementsTest, CanDeleteAfterCode) {
+  std::string Code = "#include \"a.h\"\n"
+ "void f() {}\n"
+ "#include \"b.h\"\n";
+  std::string Expected = "#include \"a.h\"\n"
+ "void f() {}\n";
+  tooling::Replacements Replaces = toReplacements({createDeletion("\"b.h\"")});
+  EXPECT_EQ(Expected, apply(Code, Replaces));
+}
+
 } // end namespace
 } // end namespace format
 } // end namespace clang
Index: lib/Format/Format.cpp
===
--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -1514,10 +1514,23 @@
   return Replace.getOffset() == UINT_MAX && Replace.getLength() == 1;
 }
 
-void skipComments(Lexer &Lex, Token &Tok) {
-  while (Tok.is(tok::comment))
-if (Lex.LexFromRawLexer(Tok))
-  return;
+// Returns the offset after skipping a sequence of tokens, matched by \p
+// GetOffsetAfterSequence, from the start of the code.
+// \p GetOffsetAfterSequence should be a function that matches a sequence of
+// tokens and returns an offset after the sequence.
+unsigned getOffsetAfterTokenSequence(
+StringRef FileName, StringRef Code, const FormatStyle &Style,
+std::function
+GetOffsetAfterSequense) {
+  std::unique_ptr Env =
+  Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{});
+  const SourceManager &SourceMgr = Env->getSourceManager();
+  Lexer Lex(Env->getFileID(), Source

[PATCH] D26909: [ClangFormat] Only insert #include into the #include block in the beginning of the file.

2016-12-02 Thread Daniel Jasper via Phabricator via cfe-commits
djasper accepted this revision.
djasper added a comment.
This revision is now accepted and ready to land.

Looks good.




Comment at: unittests/Format/CleanupTest.cpp:898
+
+TEST_F(CleanUpReplacementsTest, CanInsertAfterComment) {
+  std::string Code = "#include \"a.h\"\n"

Can you add a similar test or update this one that empty lines are also 
acceptable?


https://reviews.llvm.org/D26909



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D26909: [ClangFormat] Only insert #include into the #include block in the beginning of the file.

2016-12-02 Thread Eric Liu via Phabricator via cfe-commits
ioeric added inline comments.



Comment at: lib/Format/Format.cpp:1521
+// tokens and returns an offset after the sequence.
+unsigned getOffsetAfterTokenSequence(
+StringRef FileName, StringRef Code, const FormatStyle &Style,

djasper wrote:
> I am somewhat hesitant to put more and more code here. Can we move some of 
> this to a separate file? Is there a reasonable separation?
Yeah, it makes sense to separate these.

I'll do the refactoring in a followup patch to make the current change clear.



Comment at: lib/Format/Format.cpp:1601
+
+// Returns the offset of the last #include directive after which a new
+// #include can be inserted. If no such #include in the code, this returns the

djasper wrote:
> I might be useful to precisely describe (here or elsewhere) what is 
> determined here, not from an implementation standpoint, but from what a user 
> can expect.
Added some more comments here and in the public interface.


https://reviews.llvm.org/D26909



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D26909: [ClangFormat] Only insert #include into the #include block in the beginning of the file.

2016-12-02 Thread Eric Liu via Phabricator via cfe-commits
ioeric updated this revision to Diff 80042.
ioeric marked an inline comment as done.
ioeric added a comment.

- Updated comments.


https://reviews.llvm.org/D26909

Files:
  include/clang/Format/Format.h
  lib/Format/Format.cpp
  unittests/Format/CleanupTest.cpp

Index: unittests/Format/CleanupTest.cpp
===
--- unittests/Format/CleanupTest.cpp
+++ unittests/Format/CleanupTest.cpp
@@ -839,6 +839,89 @@
   EXPECT_EQ(Expected, apply(Code, Replaces));
 }
 
+TEST_F(CleanUpReplacementsTest, NoInsertionAfterCode) {
+  std::string Code = "#include \"a.h\"\n"
+ "void f() {}\n"
+ "#include \"b.h\"\n";
+  std::string Expected = "#include \"a.h\"\n"
+ "#include \"c.h\"\n"
+ "void f() {}\n"
+ "#include \"b.h\"\n";
+  tooling::Replacements Replaces = toReplacements(
+  {createInsertion("#include \"c.h\"")});
+  EXPECT_EQ(Expected, apply(Code, Replaces));
+}
+
+TEST_F(CleanUpReplacementsTest, NoInsertionInStringLiteral) {
+  std::string Code = "#include \"a.h\"\n"
+ "const char[] = R\"(\n"
+ "#include \"b.h\"\n"
+ ")\";\n";
+  std::string Expected = "#include \"a.h\"\n"
+ "#include \"c.h\"\n"
+ "const char[] = R\"(\n"
+ "#include \"b.h\"\n"
+ ")\";\n";
+  tooling::Replacements Replaces =
+  toReplacements({createInsertion("#include \"c.h\"")});
+  EXPECT_EQ(Expected, apply(Code, Replaces));
+}
+
+TEST_F(CleanUpReplacementsTest, NoInsertionAfterOtherDirective) {
+  std::string Code = "#include \"a.h\"\n"
+ "#ifdef X\n"
+ "#include \"b.h\"\n"
+ "#endif\n";
+  std::string Expected = "#include \"a.h\"\n"
+ "#include \"c.h\"\n"
+ "#ifdef X\n"
+ "#include \"b.h\"\n"
+ "#endif\n";
+  tooling::Replacements Replaces = toReplacements(
+  {createInsertion("#include \"c.h\"")});
+  EXPECT_EQ(Expected, apply(Code, Replaces));
+}
+
+TEST_F(CleanUpReplacementsTest, CanInsertAfterLongSystemInclude) {
+  std::string Code = "#include \"a.h\"\n"
+ "// comment\n\n"
+ "#include \n";
+  std::string Expected = "#include \"a.h\"\n"
+ "// comment\n\n"
+ "#include \n"
+ "#include \n";
+  tooling::Replacements Replaces =
+  toReplacements({createInsertion("#include ")});
+  EXPECT_EQ(Expected, apply(Code, Replaces));
+}
+
+TEST_F(CleanUpReplacementsTest, CanInsertAfterComment) {
+  std::string Code = "#include \"a.h\"\n"
+ "// Comment\n"
+ "/* Comment */\n"
+ "// Comment\n"
+ "#include \"b.h\"\n";
+  std::string Expected = "#include \"a.h\"\n"
+ "// Comment\n"
+ "/* Comment */\n"
+ "// Comment\n"
+ "#include \"b.h\"\n"
+ "#include \"c.h\"\n";
+  tooling::Replacements Replaces =
+  toReplacements({createInsertion("#include \"c.h\"")});
+  EXPECT_EQ(Expected, apply(Code, Replaces));
+}
+
+TEST_F(CleanUpReplacementsTest, CanDeleteAfterCode) {
+  std::string Code = "#include \"a.h\"\n"
+ "void f() {}\n"
+ "#include \"b.h\"\n";
+  std::string Expected = "#include \"a.h\"\n"
+ "void f() {}\n";
+  tooling::Replacements Replaces = toReplacements({createDeletion("\"b.h\"")});
+  EXPECT_EQ(Expected, apply(Code, Replaces));
+}
+
 } // end namespace
 } // end namespace format
 } // end namespace clang
Index: lib/Format/Format.cpp
===
--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -1514,10 +1514,23 @@
   return Replace.getOffset() == UINT_MAX && Replace.getLength() == 1;
 }
 
-void skipComments(Lexer &Lex, Token &Tok) {
-  while (Tok.is(tok::comment))
-if (Lex.LexFromRawLexer(Tok))
-  return;
+// Returns the offset after skipping a sequence of tokens, matched by \p
+// GetOffsetAfterSequence, from the start of the code.
+// \p GetOffsetAfterSequence should be a function that matches a sequence of
+// tokens and returns an offset after the sequence.
+unsigned getOffsetAfterTokenSequence(
+StringRef FileName, StringRef Code, const FormatStyle &Style,
+std::function
+GetOffsetAfterSequense) {
+  std::unique_ptr Env =
+  Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{});
+  const SourceManager &SourceMgr = Env->getSourceManager();
+  Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), SourceMgr,
+getFormattingLangOpts(Style));
+  Token Tok;
+  // Get the f

[PATCH] D26909: [ClangFormat] Only insert #include into the #include block in the beginning of the file.

2016-12-02 Thread Daniel Jasper via Phabricator via cfe-commits
djasper added inline comments.



Comment at: lib/Format/Format.cpp:1521
+// tokens and returns an offset after the sequence.
+unsigned getOffsetAfterTokenSequence(
+StringRef FileName, StringRef Code, const FormatStyle &Style,

I am somewhat hesitant to put more and more code here. Can we move some of this 
to a separate file? Is there a reasonable separation?



Comment at: lib/Format/Format.cpp:1601
+
+// Returns the offset of the last #include directive after which a new
+// #include can be inserted. If no such #include in the code, this returns the

I might be useful to precisely describe (here or elsewhere) what is determined 
here, not from an implementation standpoint, but from what a user can expect.


https://reviews.llvm.org/D26909



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D26909: [ClangFormat] Only insert #include into the #include block in the beginning of the file.

2016-12-02 Thread Eric Liu via Phabricator via cfe-commits
ioeric updated this revision to Diff 80039.
ioeric added a comment.

- Merge branch 'master' of http://llvm.org/git/clang into insert
- Still delete #include's in code.


https://reviews.llvm.org/D26909

Files:
  lib/Format/Format.cpp
  unittests/Format/CleanupTest.cpp

Index: unittests/Format/CleanupTest.cpp
===
--- unittests/Format/CleanupTest.cpp
+++ unittests/Format/CleanupTest.cpp
@@ -839,6 +839,89 @@
   EXPECT_EQ(Expected, apply(Code, Replaces));
 }
 
+TEST_F(CleanUpReplacementsTest, NoInsertionAfterCode) {
+  std::string Code = "#include \"a.h\"\n"
+ "void f() {}\n"
+ "#include \"b.h\"\n";
+  std::string Expected = "#include \"a.h\"\n"
+ "#include \"c.h\"\n"
+ "void f() {}\n"
+ "#include \"b.h\"\n";
+  tooling::Replacements Replaces = toReplacements(
+  {createInsertion("#include \"c.h\"")});
+  EXPECT_EQ(Expected, apply(Code, Replaces));
+}
+
+TEST_F(CleanUpReplacementsTest, NoInsertionInStringLiteral) {
+  std::string Code = "#include \"a.h\"\n"
+ "const char[] = R\"(\n"
+ "#include \"b.h\"\n"
+ ")\";\n";
+  std::string Expected = "#include \"a.h\"\n"
+ "#include \"c.h\"\n"
+ "const char[] = R\"(\n"
+ "#include \"b.h\"\n"
+ ")\";\n";
+  tooling::Replacements Replaces =
+  toReplacements({createInsertion("#include \"c.h\"")});
+  EXPECT_EQ(Expected, apply(Code, Replaces));
+}
+
+TEST_F(CleanUpReplacementsTest, NoInsertionAfterOtherDirective) {
+  std::string Code = "#include \"a.h\"\n"
+ "#ifdef X\n"
+ "#include \"b.h\"\n"
+ "#endif\n";
+  std::string Expected = "#include \"a.h\"\n"
+ "#include \"c.h\"\n"
+ "#ifdef X\n"
+ "#include \"b.h\"\n"
+ "#endif\n";
+  tooling::Replacements Replaces = toReplacements(
+  {createInsertion("#include \"c.h\"")});
+  EXPECT_EQ(Expected, apply(Code, Replaces));
+}
+
+TEST_F(CleanUpReplacementsTest, CanInsertAfterLongSystemInclude) {
+  std::string Code = "#include \"a.h\"\n"
+ "// comment\n\n"
+ "#include \n";
+  std::string Expected = "#include \"a.h\"\n"
+ "// comment\n\n"
+ "#include \n"
+ "#include \n";
+  tooling::Replacements Replaces =
+  toReplacements({createInsertion("#include ")});
+  EXPECT_EQ(Expected, apply(Code, Replaces));
+}
+
+TEST_F(CleanUpReplacementsTest, CanInsertAfterComment) {
+  std::string Code = "#include \"a.h\"\n"
+ "// Comment\n"
+ "/* Comment */\n"
+ "// Comment\n"
+ "#include \"b.h\"\n";
+  std::string Expected = "#include \"a.h\"\n"
+ "// Comment\n"
+ "/* Comment */\n"
+ "// Comment\n"
+ "#include \"b.h\"\n"
+ "#include \"c.h\"\n";
+  tooling::Replacements Replaces =
+  toReplacements({createInsertion("#include \"c.h\"")});
+  EXPECT_EQ(Expected, apply(Code, Replaces));
+}
+
+TEST_F(CleanUpReplacementsTest, CanDeleteAfterCode) {
+  std::string Code = "#include \"a.h\"\n"
+ "void f() {}\n"
+ "#include \"b.h\"\n";
+  std::string Expected = "#include \"a.h\"\n"
+ "void f() {}\n";
+  tooling::Replacements Replaces = toReplacements({createDeletion("\"b.h\"")});
+  EXPECT_EQ(Expected, apply(Code, Replaces));
+}
+
 } // end namespace
 } // end namespace format
 } // end namespace clang
Index: lib/Format/Format.cpp
===
--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -1514,10 +1514,23 @@
   return Replace.getOffset() == UINT_MAX && Replace.getLength() == 1;
 }
 
-void skipComments(Lexer &Lex, Token &Tok) {
-  while (Tok.is(tok::comment))
-if (Lex.LexFromRawLexer(Tok))
-  return;
+// Returns the offset after skipping a sequence of tokens, matched by \p
+// GetOffsetAfterSequence, from the start of the code.
+// \p GetOffsetAfterSequence should be a function that matches a sequence of
+// tokens and returns an offset after the sequence.
+unsigned getOffsetAfterTokenSequence(
+StringRef FileName, StringRef Code, const FormatStyle &Style,
+std::function
+GetOffsetAfterSequense) {
+  std::unique_ptr Env =
+  Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{});
+  const SourceManager &SourceMgr = Env->getSourceManager();
+  Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), SourceMgr,
+getFormattingLangOpts(Style));
+  Token Tok;
+  // Ge