Re: r271883 - [clang-format] make header guard identification stricter (with Lexer).
Hi Mehdi, No, it is not intended... thanks for pointing that out! I have fixed it at rL272465. Thanks, Eric On Fri, Jun 10, 2016 at 10:42 PM Mehdi Amini wrote: > Hi Eric, > > > On Jun 6, 2016, at 4:00 AM, Eric Liu via cfe-commits < > cfe-commits@lists.llvm.org> wrote: > > > > Author: ioeric > > Date: Mon Jun 6 06:00:13 2016 > > New Revision: 271883 > > > > URL: http://llvm.org/viewvc/llvm-project?rev=271883&view=rev > > Log: > > [clang-format] make header guard identification stricter (with Lexer). > > > > Summary: make header guard identification stricter with Lexer. > > > > Reviewers: djasper > > > > Subscribers: klimek, cfe-commits > > > > Differential Revision: http://reviews.llvm.org/D20959 > > > > Modified: > >cfe/trunk/lib/Format/Format.cpp > >cfe/trunk/unittests/Format/CleanupTest.cpp > > > > Modified: cfe/trunk/lib/Format/Format.cpp > > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=271883&r1=271882&r2=271883&view=diff > > > == > > --- cfe/trunk/lib/Format/Format.cpp (original) > > +++ cfe/trunk/lib/Format/Format.cpp Mon Jun 6 06:00:13 2016 > > @@ -1436,6 +1436,49 @@ inline bool isHeaderInsertion(const tool > > > llvm::Regex(IncludeRegexPattern).match(Replace.getReplacementText()); > > } > > > > +void skipComments(Lexer &Lex, Token &Tok) { > > + while (Tok.is(tok::comment)) > > +if (Lex.LexFromRawLexer(Tok)) > > + return; > > +} > > + > > +// Check if a sequence of tokens is like "# ". If > it is, > > +// \p Tok will be the token after this directive; otherwise, it can be > any token > > +// after the given \p Tok (including \p Tok). > > +bool checkAndConsumeDirectiveWithName(Lexer &Lex, StringRef Name, Token > &Tok) { > > + bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) && > > + Tok.is(tok::raw_identifier) && > > + Tok.getRawIdentifier() == Name && > !Lex.LexFromRawLexer(Tok) && > > + Tok.is(tok::raw_identifier); > > + if (Matched) > > +Lex.LexFromRawLexer(Tok); > > + return Matched; > > +} > > + > > +unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName, > > + StringRef Code, > > + FormatStyle Style) { > > > FormatStyle is 360B, did you really intended to pass it by value here? > > -- > Mehdi > > > > + std::unique_ptr Env = > > + Environment::CreateVirtualEnvironment(Code, FileName, > /*Ranges=*/{}); > > + const SourceManager &SourceMgr = Env->getSourceManager(); > > + Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), > SourceMgr, > > +getFormattingLangOpts(Style)); > > + Token Tok; > > + // Get the first token. > > + Lex.LexFromRawLexer(Tok); > > + skipComments(Lex, Tok); > > + unsigned AfterComments = SourceMgr.getFileOffset(Tok.getLocation()); > > + if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) { > > +skipComments(Lex, Tok); > > +if (checkAndConsumeDirectiveWithName(Lex, "define", Tok)) > > + return SourceMgr.getFileOffset(Tok.getLocation()); > > + } > > + return AfterComments; > > +} > > + > > +// FIXME: we also need to insert a '\n' at the end of the code if we > have an > > +// insertion with offset Code.size(), and there is no '\n' at the end > of the > > +// code. > > // FIXME: do not insert headers into conditional #include blocks, e.g. > #includes > > // surrounded by compile condition "#if...". > > // FIXME: do not insert existing headers. > > @@ -1469,20 +1512,6 @@ fixCppIncludeInsertions(StringRef Code, > > StringRef FileName = Replaces.begin()->getFilePath(); > > IncludeCategoryManager Categories(Style, FileName); > > > > - std::unique_ptr Env = > > - Environment::CreateVirtualEnvironment(Code, FileName, > /*Ranges=*/{}); > > - const SourceManager &SourceMgr = Env->getSourceManager(); > > - Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), > SourceMgr, > > -getFormattingLangOpts(Style)); > > - Token Tok; > > - // All new headers should be inserted after this offset. > > - int MinInsertOffset = Code.size(); > > - while (!Lex.LexFromRawLexer(Tok)) { > > -if (Tok.isNot(tok::comment)) { > > - MinInsertOffset = SourceMgr.getFileOffset(Tok.getLocation()); > > - break; > > -} > > - } > > // Record the offset of the end of the last include in each category. > > std::map CategoryEndOffsets; > > // All possible priorities. > > @@ -1491,26 +1520,25 @@ fixCppIncludeInsertions(StringRef Code, > > for (const auto &Category : Style.IncludeCategories) > > Priorities.insert(Category.Priority); > > int FirstIncludeOffset = -1; > > - bool HeaderGuardFound = false; > > + // All new headers should be inserted after this offset. > > + unsigned MinInsertOffset = > > + getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style); > > StringRef Trimm
Re: r271883 - [clang-format] make header guard identification stricter (with Lexer).
Hi Eric, > On Jun 6, 2016, at 4:00 AM, Eric Liu via cfe-commits > wrote: > > Author: ioeric > Date: Mon Jun 6 06:00:13 2016 > New Revision: 271883 > > URL: http://llvm.org/viewvc/llvm-project?rev=271883&view=rev > Log: > [clang-format] make header guard identification stricter (with Lexer). > > Summary: make header guard identification stricter with Lexer. > > Reviewers: djasper > > Subscribers: klimek, cfe-commits > > Differential Revision: http://reviews.llvm.org/D20959 > > Modified: >cfe/trunk/lib/Format/Format.cpp >cfe/trunk/unittests/Format/CleanupTest.cpp > > Modified: cfe/trunk/lib/Format/Format.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=271883&r1=271882&r2=271883&view=diff > == > --- cfe/trunk/lib/Format/Format.cpp (original) > +++ cfe/trunk/lib/Format/Format.cpp Mon Jun 6 06:00:13 2016 > @@ -1436,6 +1436,49 @@ inline bool isHeaderInsertion(const tool > llvm::Regex(IncludeRegexPattern).match(Replace.getReplacementText()); > } > > +void skipComments(Lexer &Lex, Token &Tok) { > + while (Tok.is(tok::comment)) > +if (Lex.LexFromRawLexer(Tok)) > + return; > +} > + > +// Check if a sequence of tokens is like "# ". If it > is, > +// \p Tok will be the token after this directive; otherwise, it can be any > token > +// after the given \p Tok (including \p Tok). > +bool checkAndConsumeDirectiveWithName(Lexer &Lex, StringRef Name, Token > &Tok) { > + bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) && > + Tok.is(tok::raw_identifier) && > + Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Tok) > && > + Tok.is(tok::raw_identifier); > + if (Matched) > +Lex.LexFromRawLexer(Tok); > + return Matched; > +} > + > +unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName, > + StringRef Code, > + FormatStyle Style) { FormatStyle is 360B, did you really intended to pass it by value here? -- Mehdi > + std::unique_ptr Env = > + Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{}); > + const SourceManager &SourceMgr = Env->getSourceManager(); > + Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), > SourceMgr, > +getFormattingLangOpts(Style)); > + Token Tok; > + // Get the first token. > + Lex.LexFromRawLexer(Tok); > + skipComments(Lex, Tok); > + unsigned AfterComments = SourceMgr.getFileOffset(Tok.getLocation()); > + if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) { > +skipComments(Lex, Tok); > +if (checkAndConsumeDirectiveWithName(Lex, "define", Tok)) > + return SourceMgr.getFileOffset(Tok.getLocation()); > + } > + return AfterComments; > +} > + > +// FIXME: we also need to insert a '\n' at the end of the code if we have an > +// insertion with offset Code.size(), and there is no '\n' at the end of the > +// code. > // FIXME: do not insert headers into conditional #include blocks, e.g. > #includes > // surrounded by compile condition "#if...". > // FIXME: do not insert existing headers. > @@ -1469,20 +1512,6 @@ fixCppIncludeInsertions(StringRef Code, > StringRef FileName = Replaces.begin()->getFilePath(); > IncludeCategoryManager Categories(Style, FileName); > > - std::unique_ptr Env = > - Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{}); > - const SourceManager &SourceMgr = Env->getSourceManager(); > - Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), > SourceMgr, > -getFormattingLangOpts(Style)); > - Token Tok; > - // All new headers should be inserted after this offset. > - int MinInsertOffset = Code.size(); > - while (!Lex.LexFromRawLexer(Tok)) { > -if (Tok.isNot(tok::comment)) { > - MinInsertOffset = SourceMgr.getFileOffset(Tok.getLocation()); > - break; > -} > - } > // Record the offset of the end of the last include in each category. > std::map CategoryEndOffsets; > // All possible priorities. > @@ -1491,26 +1520,25 @@ fixCppIncludeInsertions(StringRef Code, > for (const auto &Category : Style.IncludeCategories) > Priorities.insert(Category.Priority); > int FirstIncludeOffset = -1; > - bool HeaderGuardFound = false; > + // All new headers should be inserted after this offset. > + unsigned MinInsertOffset = > + getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style); > StringRef TrimmedCode = Code.drop_front(MinInsertOffset); > SmallVector Lines; > TrimmedCode.split(Lines, '\n'); > - int Offset = MinInsertOffset; > + unsigned Offset = MinInsertOffset; > + unsigned NextLineOffset; > for (auto Line : Lines) { > +NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1); > if (IncludeRegex.match(Line, &Matches)) { > StringRef IncludeName = Matches[2]; >
r271883 - [clang-format] make header guard identification stricter (with Lexer).
Author: ioeric Date: Mon Jun 6 06:00:13 2016 New Revision: 271883 URL: http://llvm.org/viewvc/llvm-project?rev=271883&view=rev Log: [clang-format] make header guard identification stricter (with Lexer). Summary: make header guard identification stricter with Lexer. Reviewers: djasper Subscribers: klimek, cfe-commits Differential Revision: http://reviews.llvm.org/D20959 Modified: cfe/trunk/lib/Format/Format.cpp cfe/trunk/unittests/Format/CleanupTest.cpp Modified: cfe/trunk/lib/Format/Format.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=271883&r1=271882&r2=271883&view=diff == --- cfe/trunk/lib/Format/Format.cpp (original) +++ cfe/trunk/lib/Format/Format.cpp Mon Jun 6 06:00:13 2016 @@ -1436,6 +1436,49 @@ inline bool isHeaderInsertion(const tool llvm::Regex(IncludeRegexPattern).match(Replace.getReplacementText()); } +void skipComments(Lexer &Lex, Token &Tok) { + while (Tok.is(tok::comment)) +if (Lex.LexFromRawLexer(Tok)) + return; +} + +// Check if a sequence of tokens is like "# ". If it is, +// \p Tok will be the token after this directive; otherwise, it can be any token +// after the given \p Tok (including \p Tok). +bool checkAndConsumeDirectiveWithName(Lexer &Lex, StringRef Name, Token &Tok) { + bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) && + Tok.is(tok::raw_identifier) && + Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Tok) && + Tok.is(tok::raw_identifier); + if (Matched) +Lex.LexFromRawLexer(Tok); + return Matched; +} + +unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName, + StringRef Code, + FormatStyle Style) { + std::unique_ptr Env = + Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{}); + const SourceManager &SourceMgr = Env->getSourceManager(); + Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), SourceMgr, +getFormattingLangOpts(Style)); + Token Tok; + // Get the first token. + Lex.LexFromRawLexer(Tok); + skipComments(Lex, Tok); + unsigned AfterComments = SourceMgr.getFileOffset(Tok.getLocation()); + if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) { +skipComments(Lex, Tok); +if (checkAndConsumeDirectiveWithName(Lex, "define", Tok)) + return SourceMgr.getFileOffset(Tok.getLocation()); + } + return AfterComments; +} + +// FIXME: we also need to insert a '\n' at the end of the code if we have an +// insertion with offset Code.size(), and there is no '\n' at the end of the +// code. // FIXME: do not insert headers into conditional #include blocks, e.g. #includes // surrounded by compile condition "#if...". // FIXME: do not insert existing headers. @@ -1469,20 +1512,6 @@ fixCppIncludeInsertions(StringRef Code, StringRef FileName = Replaces.begin()->getFilePath(); IncludeCategoryManager Categories(Style, FileName); - std::unique_ptr Env = - Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{}); - const SourceManager &SourceMgr = Env->getSourceManager(); - Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), SourceMgr, -getFormattingLangOpts(Style)); - Token Tok; - // All new headers should be inserted after this offset. - int MinInsertOffset = Code.size(); - while (!Lex.LexFromRawLexer(Tok)) { -if (Tok.isNot(tok::comment)) { - MinInsertOffset = SourceMgr.getFileOffset(Tok.getLocation()); - break; -} - } // Record the offset of the end of the last include in each category. std::map CategoryEndOffsets; // All possible priorities. @@ -1491,26 +1520,25 @@ fixCppIncludeInsertions(StringRef Code, for (const auto &Category : Style.IncludeCategories) Priorities.insert(Category.Priority); int FirstIncludeOffset = -1; - bool HeaderGuardFound = false; + // All new headers should be inserted after this offset. + unsigned MinInsertOffset = + getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style); StringRef TrimmedCode = Code.drop_front(MinInsertOffset); SmallVector Lines; TrimmedCode.split(Lines, '\n'); - int Offset = MinInsertOffset; + unsigned Offset = MinInsertOffset; + unsigned NextLineOffset; for (auto Line : Lines) { +NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1); if (IncludeRegex.match(Line, &Matches)) { StringRef IncludeName = Matches[2]; int Category = Categories.getIncludePriority( IncludeName, /*CheckMainHeader=*/FirstIncludeOffset < 0); - CategoryEndOffsets[Category] = Offset + Line.size() + 1; + CategoryEndOffsets[Category] = NextLineOffset; if (FirstIncludeOffset < 0) FirstIncludeOffset = Offset; } -Offset += Line.size() + 1; -// FIXME: make header guard matching