[PATCH] D143070: [clang-format] Enable FormatTokenSource to insert tokens.
klimek updated this revision to Diff 497415. klimek marked 3 inline comments as done. klimek added a comment. Address reviewer comments. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D143070/new/ https://reviews.llvm.org/D143070 Files: clang/lib/Format/FormatTokenSource.h clang/lib/Format/UnwrappedLineParser.cpp clang/lib/Format/UnwrappedLineParser.h clang/unittests/Format/FormatTokenSourceTest.cpp clang/unittests/Format/TestLexer.h Index: clang/unittests/Format/TestLexer.h === --- clang/unittests/Format/TestLexer.h +++ clang/unittests/Format/TestLexer.h @@ -71,7 +71,8 @@ TokenList annotate(llvm::StringRef Code) { FormatTokenLexer Lex = getNewLexer(Code); -auto Tokens = Lex.lex(); +auto Toks = Lex.lex(); +SmallVector Tokens(Toks.begin(), Toks.end()); UnwrappedLineParser Parser(Style, Lex.getKeywords(), 0, Tokens, *this); Parser.parse(); TokenAnnotator Annotator(Style, Lex.getKeywords()); Index: clang/unittests/Format/FormatTokenSourceTest.cpp === --- clang/unittests/Format/FormatTokenSourceTest.cpp +++ clang/unittests/Format/FormatTokenSourceTest.cpp @@ -30,10 +30,15 @@ FormatToken *Tok = FormatTok; \ EXPECT_EQ((Tok)->Tok.getKind(), Kind) << *(Tok); \ } while (false); +#define EXPECT_TOKEN_ID(FormatTok, Name) \ + do { \ +FormatToken *Tok = FormatTok; \ +EXPECT_EQ((Tok)->Tok.getKind(), tok::identifier) << *(Tok);\ +EXPECT_EQ((Tok)->TokenText, Name) << *(Tok); \ + } while (false); TEST_F(IndexedTokenSourceTest, EmptyInput) { - TokenList Tokens = lex(""); - IndexedTokenSource Source(Tokens); + IndexedTokenSource Source(lex("")); EXPECT_FALSE(Source.isEOF()); EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); EXPECT_TRUE(Source.isEOF()); @@ -46,8 +51,7 @@ } TEST_F(IndexedTokenSourceTest, NavigateTokenStream) { - TokenList Tokens = lex("int a;"); - IndexedTokenSource Source(Tokens); + IndexedTokenSource Source(lex("int a;")); EXPECT_TOKEN_KIND(Source.peekNextToken(), tok::kw_int); EXPECT_TOKEN_KIND(Source.getNextToken(), tok::kw_int); EXPECT_EQ(Source.getPreviousToken(), nullptr); @@ -60,11 +64,12 @@ EXPECT_TOKEN_KIND(Source.peekNextToken(), tok::eof); EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); EXPECT_TOKEN_KIND(Source.getPreviousToken(), tok::semi); + EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); + EXPECT_TOKEN_KIND(Source.getPreviousToken(), tok::semi); } TEST_F(IndexedTokenSourceTest, ResetPosition) { - TokenList Tokens = lex("int a;"); - IndexedTokenSource Source(Tokens); + IndexedTokenSource Source(lex("int a;")); Source.getNextToken(); unsigned Position = Source.getPosition(); Source.getNextToken(); @@ -73,6 +78,50 @@ EXPECT_TOKEN_KIND(Source.setPosition(Position), tok::kw_int); } +TEST_F(IndexedTokenSourceTest, InsertTokens) { + IndexedTokenSource Source(lex("A1 A2")); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(lex("B1 B2")), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "B2"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A2"); +} + +TEST_F(IndexedTokenSourceTest, InsertTokensAtEOF) { + IndexedTokenSource Source(lex("A1")); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); + EXPECT_TOKEN_ID(Source.insertTokens(lex("B1 B2")), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "B2"); + EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); +} + +TEST_F(IndexedTokenSourceTest, InsertTokensRecursive) { + IndexedTokenSource Source(lex("A1")); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + // A1 + EXPECT_TOKEN_ID(Source.insertTokens(lex("B1")), "B1"); + // B1 A1 + EXPECT_TOKEN_ID(Source.insertTokens(lex("C1")), "C1"); + // C1 B1 A1 + EXPECT_TOKEN_ID(Source.insertTokens(lex("D1")), "D1"); + // D1 C1 B1 A1 + EXPECT_TOKEN_ID(Source.getNextToken(), "C1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); +} + +TEST_F(IndexedTokenSourceTest, InsertTokensRecursiveAtEndOfSequence) { + IndexedTokenSource Source(lex("A1")); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(lex("B1")), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(lex("C1")), "C1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(lex("D1")), "D1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); +} + } // namespace } // names
[PATCH] D143070: [clang-format] Enable FormatTokenSource to insert tokens.
klimek added inline comments. Comment at: clang/lib/Format/FormatTokenSource.h:74 public: - IndexedTokenSource(ArrayRef Tokens) + IndexedTokenSource(SmallVectorImpl &Tokens) : Tokens(Tokens), Position(-1) {} sammccall wrote: > As I understand it, this parameter is used: > - to provide the initial set of input tokens the source will iterate over > - as a common address space for input + synthesized tokens, to allow the > jump mechanism to work > - to make the caller responsible for ownership/lifetime of the synthesized > tokens too > > This simplifies the implementation, my only problem with this is it seems > unusual and confusing. > A comment explaining the roles of this `Tokens` param would help a bit. > > Alternatively you could consider slightly different data structures just for > the purpose of making interfaces more obvious: e.g. pass in a > `BumpPtrAllocator&`, allocate scratch tokens there, and use pointers instead > of indices (jumps become a ptr->ptr map etc) Noticing none of this makes sense. We should really just copy the tokens, given that we modify the vector. Comment at: clang/lib/Format/FormatTokenSource.h:94 FormatToken *getPreviousToken() override { return Position > 0 ? Tokens[Position - 1] : nullptr; } sammccall wrote: > this no longer seems valid, immediately after calling insertTokens(), > Position is at the start of a jump range, and Tokens[Position - 1] will be > the EOF at the end of the main stream or previous jump range. > > if this is never going to happen, you can detect it (I don't think > Tokens[Position - 1] can be EOF in any other way) Added comment in the interface and assert. Comment at: clang/lib/Format/FormatTokenSource.h:115 unsigned getPosition() override { LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n"); sammccall wrote: > maybe add a comment that positions don't have meaningful order and this is > only useful to restore the position? > > (All the callsites look good, it just feels like a trap) Added a comment. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D143070/new/ https://reviews.llvm.org/D143070 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D143070: [clang-format] Enable FormatTokenSource to insert tokens.
klimek updated this revision to Diff 497416. klimek added a comment. Undo changes to ownership of initial set of FormatTokens. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D143070/new/ https://reviews.llvm.org/D143070 Files: clang/lib/Format/FormatTokenSource.h clang/lib/Format/UnwrappedLineParser.h clang/unittests/Format/FormatTokenSourceTest.cpp Index: clang/unittests/Format/FormatTokenSourceTest.cpp === --- clang/unittests/Format/FormatTokenSourceTest.cpp +++ clang/unittests/Format/FormatTokenSourceTest.cpp @@ -30,10 +30,15 @@ FormatToken *Tok = FormatTok; \ EXPECT_EQ((Tok)->Tok.getKind(), Kind) << *(Tok); \ } while (false); +#define EXPECT_TOKEN_ID(FormatTok, Name) \ + do { \ +FormatToken *Tok = FormatTok; \ +EXPECT_EQ((Tok)->Tok.getKind(), tok::identifier) << *(Tok);\ +EXPECT_EQ((Tok)->TokenText, Name) << *(Tok); \ + } while (false); TEST_F(IndexedTokenSourceTest, EmptyInput) { - TokenList Tokens = lex(""); - IndexedTokenSource Source(Tokens); + IndexedTokenSource Source(lex("")); EXPECT_FALSE(Source.isEOF()); EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); EXPECT_TRUE(Source.isEOF()); @@ -46,8 +51,7 @@ } TEST_F(IndexedTokenSourceTest, NavigateTokenStream) { - TokenList Tokens = lex("int a;"); - IndexedTokenSource Source(Tokens); + IndexedTokenSource Source(lex("int a;")); EXPECT_TOKEN_KIND(Source.peekNextToken(), tok::kw_int); EXPECT_TOKEN_KIND(Source.getNextToken(), tok::kw_int); EXPECT_EQ(Source.getPreviousToken(), nullptr); @@ -60,11 +64,12 @@ EXPECT_TOKEN_KIND(Source.peekNextToken(), tok::eof); EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); EXPECT_TOKEN_KIND(Source.getPreviousToken(), tok::semi); + EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); + EXPECT_TOKEN_KIND(Source.getPreviousToken(), tok::semi); } TEST_F(IndexedTokenSourceTest, ResetPosition) { - TokenList Tokens = lex("int a;"); - IndexedTokenSource Source(Tokens); + IndexedTokenSource Source(lex("int a;")); Source.getNextToken(); unsigned Position = Source.getPosition(); Source.getNextToken(); @@ -73,6 +78,50 @@ EXPECT_TOKEN_KIND(Source.setPosition(Position), tok::kw_int); } +TEST_F(IndexedTokenSourceTest, InsertTokens) { + IndexedTokenSource Source(lex("A1 A2")); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(lex("B1 B2")), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "B2"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A2"); +} + +TEST_F(IndexedTokenSourceTest, InsertTokensAtEOF) { + IndexedTokenSource Source(lex("A1")); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); + EXPECT_TOKEN_ID(Source.insertTokens(lex("B1 B2")), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "B2"); + EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); +} + +TEST_F(IndexedTokenSourceTest, InsertTokensRecursive) { + IndexedTokenSource Source(lex("A1")); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + // A1 + EXPECT_TOKEN_ID(Source.insertTokens(lex("B1")), "B1"); + // B1 A1 + EXPECT_TOKEN_ID(Source.insertTokens(lex("C1")), "C1"); + // C1 B1 A1 + EXPECT_TOKEN_ID(Source.insertTokens(lex("D1")), "D1"); + // D1 C1 B1 A1 + EXPECT_TOKEN_ID(Source.getNextToken(), "C1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); +} + +TEST_F(IndexedTokenSourceTest, InsertTokensRecursiveAtEndOfSequence) { + IndexedTokenSource Source(lex("A1")); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(lex("B1")), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(lex("C1")), "C1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(lex("D1")), "D1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); +} + } // namespace } // namespace format } // namespace clang Index: clang/lib/Format/UnwrappedLineParser.h === --- clang/lib/Format/UnwrappedLineParser.h +++ clang/lib/Format/UnwrappedLineParser.h @@ -89,7 +89,8 @@ public: UnwrappedLineParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, - unsigned FirstStartColumn, ArrayRef Tokens, + unsigned FirstStartColumn, + ArrayRef Tokens, UnwrappedLineConsumer &Callback); void parse(); @@ -283,6 +284,8 @@ // FIXME: This is a temporary
[PATCH] D143070: [clang-format] Enable FormatTokenSource to insert tokens.
sammccall accepted this revision. sammccall added inline comments. This revision is now accepted and ready to land. Comment at: clang/lib/Format/FormatTokenSource.h:74 public: - IndexedTokenSource(ArrayRef Tokens) + IndexedTokenSource(SmallVectorImpl &Tokens) : Tokens(Tokens), Position(-1) {} klimek wrote: > sammccall wrote: > > As I understand it, this parameter is used: > > - to provide the initial set of input tokens the source will iterate over > > - as a common address space for input + synthesized tokens, to allow the > > jump mechanism to work > > - to make the caller responsible for ownership/lifetime of the synthesized > > tokens too > > > > This simplifies the implementation, my only problem with this is it seems > > unusual and confusing. > > A comment explaining the roles of this `Tokens` param would help a bit. > > > > Alternatively you could consider slightly different data structures just > > for the purpose of making interfaces more obvious: e.g. pass in a > > `BumpPtrAllocator&`, allocate scratch tokens there, and use pointers > > instead of indices (jumps become a ptr->ptr map etc) > Noticing none of this makes sense. We should really just copy the tokens, > given that we modify the vector. Oops, somehow I missed that this was an array of *pointers* and assumed that copying it was no good. This is much better. Comment at: clang/lib/Format/UnwrappedLineParser.h:287 // owned outside of and handed into the UnwrappedLineParser. + // FIXME: The above fixme doesn't work if we need to create tokens while + // parsing. I'm not really sure how to read the combination of these two FIXMEs... does it mean "we wanted to do this differently one day, but now we never can"? Maybe either delete both, or if you think it's still potentially actionable, something like FIXME: it would be better to have tokens created and owned outside because XYZ, but it's hard because macro expansion mutates the stream (I don't really understand what the prev comment is about: the tokens *are* handed into the UnwrappedLineParser constructor. So I may be off base here) Comment at: clang/unittests/Format/FormatTokenSourceTest.cpp:36 +FormatToken *Tok = FormatTok; \ +EXPECT_EQ((Tok)->Tok.getKind(), tok::identifier) << *(Tok); \ +EXPECT_EQ((Tok)->TokenText, Name) << *(Tok); \ nit: parens on (Tok) are redundant given Tok is a local rather than the macro arg Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D143070/new/ https://reviews.llvm.org/D143070 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D143070: [clang-format] Enable FormatTokenSource to insert tokens.
klimek updated this revision to Diff 497637. klimek marked an inline comment as done. klimek added a comment. Address review comments. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D143070/new/ https://reviews.llvm.org/D143070 Files: clang/lib/Format/FormatTokenSource.h clang/lib/Format/UnwrappedLineParser.h clang/unittests/Format/FormatTokenSourceTest.cpp Index: clang/unittests/Format/FormatTokenSourceTest.cpp === --- clang/unittests/Format/FormatTokenSourceTest.cpp +++ clang/unittests/Format/FormatTokenSourceTest.cpp @@ -28,12 +28,17 @@ #define EXPECT_TOKEN_KIND(FormatTok, Kind) \ do { \ FormatToken *Tok = FormatTok; \ -EXPECT_EQ((Tok)->Tok.getKind(), Kind) << *(Tok); \ +EXPECT_EQ(Tok->Tok.getKind(), Kind) << *Tok; \ + } while (false); +#define EXPECT_TOKEN_ID(FormatTok, Name) \ + do { \ +FormatToken *Tok = FormatTok; \ +EXPECT_EQ(Tok->Tok.getKind(), tok::identifier) << *Tok;\ +EXPECT_EQ(Tok->TokenText, Name) << *Tok; \ } while (false); TEST_F(IndexedTokenSourceTest, EmptyInput) { - TokenList Tokens = lex(""); - IndexedTokenSource Source(Tokens); + IndexedTokenSource Source(lex("")); EXPECT_FALSE(Source.isEOF()); EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); EXPECT_TRUE(Source.isEOF()); @@ -46,8 +51,7 @@ } TEST_F(IndexedTokenSourceTest, NavigateTokenStream) { - TokenList Tokens = lex("int a;"); - IndexedTokenSource Source(Tokens); + IndexedTokenSource Source(lex("int a;")); EXPECT_TOKEN_KIND(Source.peekNextToken(), tok::kw_int); EXPECT_TOKEN_KIND(Source.getNextToken(), tok::kw_int); EXPECT_EQ(Source.getPreviousToken(), nullptr); @@ -60,11 +64,12 @@ EXPECT_TOKEN_KIND(Source.peekNextToken(), tok::eof); EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); EXPECT_TOKEN_KIND(Source.getPreviousToken(), tok::semi); + EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); + EXPECT_TOKEN_KIND(Source.getPreviousToken(), tok::semi); } TEST_F(IndexedTokenSourceTest, ResetPosition) { - TokenList Tokens = lex("int a;"); - IndexedTokenSource Source(Tokens); + IndexedTokenSource Source(lex("int a;")); Source.getNextToken(); unsigned Position = Source.getPosition(); Source.getNextToken(); @@ -73,6 +78,50 @@ EXPECT_TOKEN_KIND(Source.setPosition(Position), tok::kw_int); } +TEST_F(IndexedTokenSourceTest, InsertTokens) { + IndexedTokenSource Source(lex("A1 A2")); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(lex("B1 B2")), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "B2"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A2"); +} + +TEST_F(IndexedTokenSourceTest, InsertTokensAtEOF) { + IndexedTokenSource Source(lex("A1")); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); + EXPECT_TOKEN_ID(Source.insertTokens(lex("B1 B2")), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "B2"); + EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); +} + +TEST_F(IndexedTokenSourceTest, InsertTokensRecursive) { + IndexedTokenSource Source(lex("A1")); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + // A1 + EXPECT_TOKEN_ID(Source.insertTokens(lex("B1")), "B1"); + // B1 A1 + EXPECT_TOKEN_ID(Source.insertTokens(lex("C1")), "C1"); + // C1 B1 A1 + EXPECT_TOKEN_ID(Source.insertTokens(lex("D1")), "D1"); + // D1 C1 B1 A1 + EXPECT_TOKEN_ID(Source.getNextToken(), "C1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); +} + +TEST_F(IndexedTokenSourceTest, InsertTokensRecursiveAtEndOfSequence) { + IndexedTokenSource Source(lex("A1")); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(lex("B1")), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(lex("C1")), "C1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(lex("D1")), "D1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); +} + } // namespace } // namespace format } // namespace clang Index: clang/lib/Format/UnwrappedLineParser.h === --- clang/lib/Format/UnwrappedLineParser.h +++ clang/lib/Format/UnwrappedLineParser.h @@ -280,9 +280,6 @@ FormatTokenSource *Tokens; UnwrappedLineConsumer &Callback; - // FIXME: This is a temporary measure until we have reworked the ownership -
[PATCH] D143070: [clang-format] Enable FormatTokenSource to insert tokens.
klimek added inline comments. Comment at: clang/lib/Format/UnwrappedLineParser.h:287 // owned outside of and handed into the UnwrappedLineParser. + // FIXME: The above fixme doesn't work if we need to create tokens while + // parsing. sammccall wrote: > I'm not really sure how to read the combination of these two FIXMEs... does > it mean "we wanted to do this differently one day, but now we never can"? > > Maybe either delete both, or if you think it's still potentially actionable, > something like FIXME: it would be better to have tokens created and owned > outside because XYZ, but it's hard because macro expansion mutates the stream > > (I don't really understand what the prev comment is about: the tokens *are* > handed into the UnwrappedLineParser constructor. So I may be off base here) Yeah, I think I don't fully understand what I wanted to fix, so deleted both. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D143070/new/ https://reviews.llvm.org/D143070 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D143070: [clang-format] Enable FormatTokenSource to insert tokens.
This revision was landed with ongoing or failed builds. This revision was automatically updated to reflect the committed changes. Closed by commit rG1995d4424505: [clang-format] Enable FormatTokenSource to insert tokens. (authored by klimek). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D143070/new/ https://reviews.llvm.org/D143070 Files: clang/lib/Format/FormatTokenSource.h clang/lib/Format/UnwrappedLineParser.h clang/unittests/Format/FormatTokenSourceTest.cpp Index: clang/unittests/Format/FormatTokenSourceTest.cpp === --- clang/unittests/Format/FormatTokenSourceTest.cpp +++ clang/unittests/Format/FormatTokenSourceTest.cpp @@ -28,12 +28,17 @@ #define EXPECT_TOKEN_KIND(FormatTok, Kind) \ do { \ FormatToken *Tok = FormatTok; \ -EXPECT_EQ((Tok)->Tok.getKind(), Kind) << *(Tok); \ +EXPECT_EQ(Tok->Tok.getKind(), Kind) << *Tok; \ + } while (false); +#define EXPECT_TOKEN_ID(FormatTok, Name) \ + do { \ +FormatToken *Tok = FormatTok; \ +EXPECT_EQ(Tok->Tok.getKind(), tok::identifier) << *Tok;\ +EXPECT_EQ(Tok->TokenText, Name) << *Tok; \ } while (false); TEST_F(IndexedTokenSourceTest, EmptyInput) { - TokenList Tokens = lex(""); - IndexedTokenSource Source(Tokens); + IndexedTokenSource Source(lex("")); EXPECT_FALSE(Source.isEOF()); EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); EXPECT_TRUE(Source.isEOF()); @@ -46,8 +51,7 @@ } TEST_F(IndexedTokenSourceTest, NavigateTokenStream) { - TokenList Tokens = lex("int a;"); - IndexedTokenSource Source(Tokens); + IndexedTokenSource Source(lex("int a;")); EXPECT_TOKEN_KIND(Source.peekNextToken(), tok::kw_int); EXPECT_TOKEN_KIND(Source.getNextToken(), tok::kw_int); EXPECT_EQ(Source.getPreviousToken(), nullptr); @@ -60,11 +64,12 @@ EXPECT_TOKEN_KIND(Source.peekNextToken(), tok::eof); EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); EXPECT_TOKEN_KIND(Source.getPreviousToken(), tok::semi); + EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); + EXPECT_TOKEN_KIND(Source.getPreviousToken(), tok::semi); } TEST_F(IndexedTokenSourceTest, ResetPosition) { - TokenList Tokens = lex("int a;"); - IndexedTokenSource Source(Tokens); + IndexedTokenSource Source(lex("int a;")); Source.getNextToken(); unsigned Position = Source.getPosition(); Source.getNextToken(); @@ -73,6 +78,50 @@ EXPECT_TOKEN_KIND(Source.setPosition(Position), tok::kw_int); } +TEST_F(IndexedTokenSourceTest, InsertTokens) { + IndexedTokenSource Source(lex("A1 A2")); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(lex("B1 B2")), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "B2"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A2"); +} + +TEST_F(IndexedTokenSourceTest, InsertTokensAtEOF) { + IndexedTokenSource Source(lex("A1")); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); + EXPECT_TOKEN_ID(Source.insertTokens(lex("B1 B2")), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "B2"); + EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); +} + +TEST_F(IndexedTokenSourceTest, InsertTokensRecursive) { + IndexedTokenSource Source(lex("A1")); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + // A1 + EXPECT_TOKEN_ID(Source.insertTokens(lex("B1")), "B1"); + // B1 A1 + EXPECT_TOKEN_ID(Source.insertTokens(lex("C1")), "C1"); + // C1 B1 A1 + EXPECT_TOKEN_ID(Source.insertTokens(lex("D1")), "D1"); + // D1 C1 B1 A1 + EXPECT_TOKEN_ID(Source.getNextToken(), "C1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); +} + +TEST_F(IndexedTokenSourceTest, InsertTokensRecursiveAtEndOfSequence) { + IndexedTokenSource Source(lex("A1")); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(lex("B1")), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(lex("C1")), "C1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(lex("D1")), "D1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); +} + } // namespace } // namespace format } // namespace clang Index: clang/lib/Format/UnwrappedLineParser.h === --- clang/lib/Format/UnwrappedLineParser.h +++ clang/lib/Format/UnwrappedLineParser.h @@ -280,9 +280,6 @@ FormatTokenSource *Tokens; Unwrapp
[PATCH] D143070: [clang-format] Enable FormatTokenSource to insert tokens.
klimek created this revision. klimek added a reviewer: sammccall. Herald added a project: All. klimek requested review of this revision. Herald added a project: clang. In preparation for configured macro replacements in formatting, add the ability to insert tokens to FormatTokenSource, and implement token insertion in IndexedTokenSource. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D143070 Files: clang/lib/Format/FormatTokenSource.h clang/lib/Format/UnwrappedLineParser.cpp clang/lib/Format/UnwrappedLineParser.h clang/unittests/Format/FormatTokenSourceTest.cpp clang/unittests/Format/TestLexer.h Index: clang/unittests/Format/TestLexer.h === --- clang/unittests/Format/TestLexer.h +++ clang/unittests/Format/TestLexer.h @@ -71,7 +71,8 @@ TokenList annotate(llvm::StringRef Code) { FormatTokenLexer Lex = getNewLexer(Code); -auto Tokens = Lex.lex(); +auto Toks = Lex.lex(); +SmallVector Tokens(Toks.begin(), Toks.end()); UnwrappedLineParser Parser(Style, Lex.getKeywords(), 0, Tokens, *this); Parser.parse(); TokenAnnotator Annotator(Style, Lex.getKeywords()); Index: clang/unittests/Format/FormatTokenSourceTest.cpp === --- clang/unittests/Format/FormatTokenSourceTest.cpp +++ clang/unittests/Format/FormatTokenSourceTest.cpp @@ -30,6 +30,12 @@ FormatToken *Tok = FormatTok; \ EXPECT_EQ((Tok)->Tok.getKind(), Kind) << *(Tok); \ } while (false); +#define EXPECT_TOKEN_ID(FormatTok, Name) \ + do { \ +FormatToken *Tok = FormatTok; \ +EXPECT_EQ((Tok)->Tok.getKind(), tok::identifier) << *(Tok);\ +EXPECT_EQ((Tok)->TokenText, Name) << *(Tok); \ + } while (false); TEST_F(IndexedTokenSourceTest, EmptyInput) { TokenList Tokens = lex(""); @@ -60,6 +66,8 @@ EXPECT_TOKEN_KIND(Source.peekNextToken(), tok::eof); EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); EXPECT_TOKEN_KIND(Source.getPreviousToken(), tok::semi); + EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); + EXPECT_TOKEN_KIND(Source.getPreviousToken(), tok::semi); } TEST_F(IndexedTokenSourceTest, ResetPosition) { @@ -73,6 +81,62 @@ EXPECT_TOKEN_KIND(Source.setPosition(Position), tok::kw_int); } +TEST_F(IndexedTokenSourceTest, InsertTokens) { + TokenList TokensA = lex("A1 A2"); + TokenList TokensB = lex("B1 B2"); + IndexedTokenSource Source(TokensA); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(TokensB), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "B2"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A2"); +} + +TEST_F(IndexedTokenSourceTest, InsertTokensAtEOF) { + TokenList TokensA = lex("A1"); + TokenList TokensB = lex("B1 B2"); + IndexedTokenSource Source(TokensA); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); + EXPECT_TOKEN_ID(Source.insertTokens(TokensB), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "B2"); + EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); +} + +TEST_F(IndexedTokenSourceTest, InsertTokensRecursive) { + TokenList TokensA = lex("A1"); + TokenList TokensB = lex("B1"); + TokenList TokensC = lex("C1"); + TokenList TokensD = lex("D1"); + IndexedTokenSource Source(TokensA); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + // A1 + EXPECT_TOKEN_ID(Source.insertTokens(TokensB), "B1"); + // B1 A1 + EXPECT_TOKEN_ID(Source.insertTokens(TokensC), "C1"); + // C1 B1 A1 + EXPECT_TOKEN_ID(Source.insertTokens(TokensD), "D1"); + // D1 C1 B1 A1 + EXPECT_TOKEN_ID(Source.getNextToken(), "C1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); +} + +TEST_F(IndexedTokenSourceTest, InsertTokensRecursiveAtEndOfSequence) { + TokenList TokensA = lex("A1"); + TokenList TokensB = lex("B1"); + TokenList TokensC = lex("C1"); + TokenList TokensD = lex("D1"); + IndexedTokenSource Source(TokensA); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(TokensB), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(TokensC), "C1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(TokensD), "D1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); +} + } // namespace } // namespace format } // namespace clang Index: clang/lib/Format/UnwrappedLineParser.h === --- clang/lib/Format/UnwrappedLineParser.h +++ clang/lib/Format/UnwrappedLineParser.h @@ -89,7 +89,8 @@ public:
[PATCH] D143070: [clang-format] Enable FormatTokenSource to insert tokens.
sammccall added a comment. Only serious concern is `getPreviousToken()`. Comment at: clang/lib/Format/FormatTokenSource.h:74 public: - IndexedTokenSource(ArrayRef Tokens) + IndexedTokenSource(SmallVectorImpl &Tokens) : Tokens(Tokens), Position(-1) {} As I understand it, this parameter is used: - to provide the initial set of input tokens the source will iterate over - as a common address space for input + synthesized tokens, to allow the jump mechanism to work - to make the caller responsible for ownership/lifetime of the synthesized tokens too This simplifies the implementation, my only problem with this is it seems unusual and confusing. A comment explaining the roles of this `Tokens` param would help a bit. Alternatively you could consider slightly different data structures just for the purpose of making interfaces more obvious: e.g. pass in a `BumpPtrAllocator&`, allocate scratch tokens there, and use pointers instead of indices (jumps become a ptr->ptr map etc) Comment at: clang/lib/Format/FormatTokenSource.h:94 FormatToken *getPreviousToken() override { return Position > 0 ? Tokens[Position - 1] : nullptr; } this no longer seems valid, immediately after calling insertTokens(), Position is at the start of a jump range, and Tokens[Position - 1] will be the EOF at the end of the main stream or previous jump range. if this is never going to happen, you can detect it (I don't think Tokens[Position - 1] can be EOF in any other way) Comment at: clang/lib/Format/FormatTokenSource.h:115 unsigned getPosition() override { LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n"); maybe add a comment that positions don't have meaningful order and this is only useful to restore the position? (All the callsites look good, it just feels like a trap) Comment at: clang/lib/Format/FormatTokenSource.h:130 +assert((*New.rbegin())->Tok.is(tok::eof)); +LLVM_DEBUG(llvm::dbgs() << "Inserting:\n"); +int Next = Tokens.size(); nit: move into the LLVM_DEBUG block below? or are you worried about a crash? Comment at: clang/lib/Format/FormatTokenSource.h:151 private: + int next(int Current) { +int Next = Current + 1; nit: const I wasn't sure if this method advanced or not. "successor" might be clearer in this respect Comment at: clang/lib/Format/FormatTokenSource.h:173 + // stream continues at position b instead. + std::map Jumps; }; DenseMap? Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D143070/new/ https://reviews.llvm.org/D143070 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits