[PATCH] D121165: [pseudo] Add crude heuristics to choose taken preprocessor branches.
This revision was automatically updated to reflect the committed changes. sammccall marked an inline comment as done. Closed by commit rGaf89e4792d23: [pseudo] Add crude heuristics to choose taken preprocessor branches. (authored by sammccall). Changed prior to commit: https://reviews.llvm.org/D121165?vs=419142&id=420878#toc Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D121165/new/ https://reviews.llvm.org/D121165 Files: clang-tools-extra/pseudo/include/clang-pseudo/DirectiveMap.h clang-tools-extra/pseudo/include/clang-pseudo/Token.h clang-tools-extra/pseudo/lib/DirectiveMap.cpp clang-tools-extra/pseudo/test/lex.c clang-tools-extra/pseudo/tool/ClangPseudo.cpp clang-tools-extra/pseudo/unittests/DirectiveMapTest.cpp Index: clang-tools-extra/pseudo/unittests/DirectiveMapTest.cpp === --- clang-tools-extra/pseudo/unittests/DirectiveMapTest.cpp +++ clang-tools-extra/pseudo/unittests/DirectiveMapTest.cpp @@ -145,6 +145,162 @@ EXPECT_EQ(0u, X.End.Tokens.size()); } +TEST(DirectiveMap, ChooseBranches) { + LangOptions Opts; + const std::string Cases[] = { + R"cpp( +// Branches with no alternatives are taken +#if COND // TAKEN +int x; +#endif + )cpp", + + R"cpp( +// Empty branches are better than nothing +#if COND // TAKEN +#endif + )cpp", + + R"cpp( +// Trivially false branches are not taken, even with no alternatives. +#if 0 +int x; +#endif + )cpp", + + R"cpp( +// Longer branches are preferred over shorter branches +#if COND // TAKEN +int x = 1; +#else +int x; +#endif + +#if COND +int x; +#else // TAKEN +int x = 1; +#endif + )cpp", + + R"cpp( +// Trivially true branches are taken if previous branches are trivial. +#if 1 // TAKEN +#else + int x = 1; +#endif + +#if 0 + int x = 1; +#elif 0 + int x = 2; +#elif 1 // TAKEN + int x; +#endif + +#if 0 + int x = 1; +#elif FOO // TAKEN + int x = 2; +#elif 1 + int x; +#endif + )cpp", + + R"cpp( +// #else is a trivially true branch +#if 0 + int x = 1; +#elif 0 + int x = 2; +#else // TAKEN + int x; +#endif + )cpp", + + R"cpp( +// Directives break ties, but nondirective text is more important. +#if FOO + #define A 1 2 3 +#else // TAKEN + #define B 4 5 6 + #define C 7 8 9 +#endif + +#if FOO // TAKEN + ; + #define A 1 2 3 +#else + #define B 4 5 6 + #define C 7 8 9 +#endif + )cpp", + + R"cpp( +// Avoid #error directives. +#if FOO + int x = 42; + #error This branch is no good +#else // TAKEN +#endif + +#if FOO + // All paths here lead to errors. + int x = 42; + #if 1 // TAKEN +#if COND // TAKEN + #error This branch is no good +#else + #error This one is no good either +#endif + #endif +#else // TAKEN +#endif + )cpp", + + R"cpp( +// Populate taken branches recursively. +#if FOO // TAKEN + int x = 42; + #if BAR +; + #else // TAKEN +int y = 43; + #endif +#else + int x; + #if BAR // TAKEN +int y; + #else +; + #endif +#endif + )cpp", + }; + for (const auto &Code : Cases) { +TokenStream S = cook(lex(Code, Opts), Opts); + +std::function Verify = +[&](const DirectiveMap &M) { + for (const auto &C : M.Chunks) { +if (C.kind() != DirectiveMap::Chunk::K_Conditional) + continue; +const DirectiveMap::Conditional &Cond(C); +for (unsigned I = 0; I < Cond.Branches.size(); ++I) { + auto Directive = S.tokens(Cond.Branches[I].first.Tokens); + EXPECT_EQ(I == Cond.Taken, Directive.back().text() == "// TAKEN") + << "At line " << Directive.front().Line << " of: " << Code; + Verify(Cond.Branches[I].second); +} + } +}; + +DirectiveMap Map = DirectiveMap::parse(S); +chooseConditionalBranches(Map, S); +Verify(Map); + } +} + } // namespace } // namespace pseudo } // namespace clang Index: clang-tools-extra/pseudo/tool/ClangPseudo.cpp === --- clang-tools-extra/pseudo/tool/ClangPseudo.cpp +++ clang-tools-extra/pseudo/tool/Cla
[PATCH] D121165: [pseudo] Add crude heuristics to choose taken preprocessor branches.
sammccall marked an inline comment as done. sammccall added inline comments. Comment at: clang-tools-extra/pseudo/lib/DirectiveMap.cpp:329 +const auto &Tokens = Code.tokens(Dir.Tokens); +if (Tokens.empty() || Tokens.front().Kind != tok::hash) + return llvm::None; hokein wrote: > I thought the first token of the `Directive` should always be `tok::hash`, > isn't it? Yeah, this was defensive against parsing recovery, but we're sufficiently close to the parser that we don't need to be. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D121165/new/ https://reviews.llvm.org/D121165 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D121165: [pseudo] Add crude heuristics to choose taken preprocessor branches.
hokein accepted this revision. hokein added a comment. This revision is now accepted and ready to land. Thanks, looks good! Comment at: clang-tools-extra/pseudo/include/clang-pseudo/DirectiveMap.h:95 // FIXME: add heuristically selection of conditional branches. // FIXME: allow deriving a preprocessed stream it is done in this patch, can be removed now. Comment at: clang-tools-extra/pseudo/lib/DirectiveMap.cpp:329 +const auto &Tokens = Code.tokens(Dir.Tokens); +if (Tokens.empty() || Tokens.front().Kind != tok::hash) + return llvm::None; I thought the first token of the `Directive` should always be `tok::hash`, isn't it? Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D121165/new/ https://reviews.llvm.org/D121165 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D121165: [pseudo] Add crude heuristics to choose taken preprocessor branches.
sammccall updated this revision to Diff 419142. sammccall marked 2 inline comments as done. sammccall added a comment. Herald added a project: clang-tools-extra. Rebase and address comments Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D121165/new/ https://reviews.llvm.org/D121165 Files: clang-tools-extra/pseudo/include/clang-pseudo/DirectiveMap.h clang-tools-extra/pseudo/include/clang-pseudo/Token.h clang-tools-extra/pseudo/lib/DirectiveMap.cpp clang-tools-extra/pseudo/test/lex.c clang-tools-extra/pseudo/tool/ClangPseudo.cpp clang-tools-extra/pseudo/unittests/DirectiveMapTest.cpp Index: clang-tools-extra/pseudo/unittests/DirectiveMapTest.cpp === --- clang-tools-extra/pseudo/unittests/DirectiveMapTest.cpp +++ clang-tools-extra/pseudo/unittests/DirectiveMapTest.cpp @@ -145,6 +145,162 @@ EXPECT_EQ(0u, X.End.Tokens.size()); } +TEST(DirectiveMap, ChooseBranches) { + LangOptions Opts; + const std::string Cases[] = { + R"cpp( +// Branches with no alternatives are taken +#if COND // TAKEN +int x; +#endif + )cpp", + + R"cpp( +// Empty branches are better than nothing +#if COND // TAKEN +#endif + )cpp", + + R"cpp( +// Trivially false branches are not taken, even with no alternatives. +#if 0 +int x; +#endif + )cpp", + + R"cpp( +// Longer branches are preferred over shorter branches +#if COND // TAKEN +int x = 1; +#else +int x; +#endif + +#if COND +int x; +#else // TAKEN +int x = 1; +#endif + )cpp", + + R"cpp( +// Trivially true branches are taken if previous branches are trivial. +#if 1 // TAKEN +#else + int x = 1; +#endif + +#if 0 + int x = 1; +#elif 0 + int x = 2; +#elif 1 // TAKEN + int x; +#endif + +#if 0 + int x = 1; +#elif FOO // TAKEN + int x = 2; +#elif 1 + int x; +#endif + )cpp", + + R"cpp( +// #else is a trivially true branch +#if 0 + int x = 1; +#elif 0 + int x = 2; +#else // TAKEN + int x; +#endif + )cpp", + + R"cpp( +// Directives break ties, but nondirective text is more important. +#if FOO + #define A 1 2 3 +#else // TAKEN + #define B 4 5 6 + #define C 7 8 9 +#endif + +#if FOO // TAKEN + ; + #define A 1 2 3 +#else + #define B 4 5 6 + #define C 7 8 9 +#endif + )cpp", + + R"cpp( +// Avoid #error directives. +#if FOO + int x = 42; + #error This branch is no good +#else // TAKEN +#endif + +#if FOO + // All paths here lead to errors. + int x = 42; + #if 1 // TAKEN +#if COND // TAKEN + #error This branch is no good +#else + #error This one is no good either +#endif + #endif +#else // TAKEN +#endif + )cpp", + + R"cpp( +// Populate taken branches recursively. +#if FOO // TAKEN + int x = 42; + #if BAR +; + #else // TAKEN +int y = 43; + #endif +#else + int x; + #if BAR // TAKEN +int y; + #else +; + #endif +#endif + )cpp", + }; + for (const auto &Code : Cases) { +TokenStream S = cook(lex(Code, Opts), Opts); + +std::function Verify = +[&](const DirectiveMap &M) { + for (const auto &C : M.Chunks) { +if (C.kind() != DirectiveMap::Chunk::K_Conditional) + continue; +const DirectiveMap::Conditional &Cond(C); +for (unsigned I = 0; I < Cond.Branches.size(); ++I) { + auto Directive = S.tokens(Cond.Branches[I].first.Tokens); + EXPECT_EQ(I == Cond.Taken, Directive.back().text() == "// TAKEN") + << "At line " << Directive.front().Line << " of: " << Code; + Verify(Cond.Branches[I].second); +} + } +}; + +DirectiveMap Map = DirectiveMap::parse(S); +chooseConditionalBranches(Map, S); +Verify(Map); + } +} + } // namespace } // namespace pseudo } // namespace clang Index: clang-tools-extra/pseudo/tool/ClangPseudo.cpp === --- clang-tools-extra/pseudo/tool/ClangPseudo.cpp +++ clang-tools-extra/pseudo/tool/ClangPseudo.cpp @@ -75,6 +75,7 @@ clang::LangOptions LangOpts; // FIXME: use real options. auto Stream = clang::pseudo::lex(Text, La
[PATCH] D121165: [pseudo] Add crude heuristics to choose taken preprocessor branches.
sammccall marked 6 inline comments as done. sammccall added inline comments. Comment at: clang/lib/Tooling/Syntax/Pseudo/DirectiveMap.cpp:210 +namespace { +class BranchChooser { +public: hokein wrote: > We evaluate each conditional directive *independently*, I wonder whether it > is important to use any low-hanging tricks to ensure consistent choices are > made among different conditional directives. (my gut feeling is that it is > nice to have, but we should not worry too much about it at the moment). > > For example, > > ``` > #ifdef __cplusplus > extern "C" { > #endif > > #ifdef __cplusplus > } > #endif > ``` > > If we enable the first #if, and the second one should be enabled as well. > (this example is a trivial and common pattern, it has been handled by the > existing code). Added a comment describing this case, I don't think we should tackle this now. Comment at: clang/lib/Tooling/Syntax/Pseudo/DirectiveMap.cpp:305 + // false if the branch is never taken, and None otherwise. + llvm::Optional isTriviallyTaken(const DirectiveMap::Directive &Dir) { +switch (Dir.Kind) { hokein wrote: > nit: instead of using `trivially`, I think `confidently` fits better what the > method does. You're right this is a bad name, but also because returning false doesn't mean it's not trivially/confidently taken, but rather that it's trivially/confidently *not* taken. I think removing the adverb actually makes this clearer. Also this case is confusing ``` #if foo #else // is this "always taken"? #bar ``` Renamed to `isTakenWhenReached`, wdyt? Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D121165/new/ https://reviews.llvm.org/D121165 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D121165: [pseudo] Add crude heuristics to choose taken preprocessor branches.
hokein added inline comments. Comment at: clang/include/clang/Tooling/Syntax/Pseudo/DirectiveMap.h:123 +/// The choices are stored in Conditional::Taken nodes. +void chooseConditionalBranches(DirectiveMap &, const TokenStream &Code); + off-topic: a random idea on the name of `DirectiveMap` came up to my mind when reading the code, how about `DirectiveTree`, the comment of it says the structure is a tree, and the `BranchChooser` is actually a tree-visiting pattern. Comment at: clang/include/clang/Tooling/Syntax/Pseudo/Token.h:84 + /// Returns the next token in the stream, skipping over comments. + const Token &next_nc() const { +const Token *T = this; nit: the name doesn't match the LLVM code-style. Comment at: clang/lib/Tooling/Syntax/Pseudo/DirectiveMap.cpp:210 +namespace { +class BranchChooser { +public: We evaluate each conditional directive *independently*, I wonder whether it is important to use any low-hanging tricks to ensure consistent choices are made among different conditional directives. (my gut feeling is that it is nice to have, but we should not worry too much about it at the moment). For example, ``` #ifdef __cplusplus extern "C" { #endif #ifdef __cplusplus } #endif ``` If we enable the first #if, and the second one should be enabled as well. (this example is a trivial and common pattern, it has been handled by the existing code). Comment at: clang/lib/Tooling/Syntax/Pseudo/DirectiveMap.cpp:218 + // Describes the code seen by making particular branch choices. + struct Score { +unsigned Tokens = 0; // excluding comments and directives nit: add a comment saying `higher is better`. Comment at: clang/lib/Tooling/Syntax/Pseudo/DirectiveMap.cpp:225 + // Seeing errors is bad, other things are good. + return std::tie(Other.Errors, Tokens, Directives) > + std::tie(Errors, Other.Tokens, Other.Directives); Maybe `(-Errors, Tokens, Directives) > (-Other.Errors, ...)` Comment at: clang/lib/Tooling/Syntax/Pseudo/DirectiveMap.cpp:295 + // Is this the best branch so far? (Including if it's #if 1). + if (TookTrivial || !C.Taken || BranchScore > Best) { +Best = BranchScore; nit: `!C.Taken` => `!C.Token.hasValue()`, which is clearer. Comment at: clang/lib/Tooling/Syntax/Pseudo/DirectiveMap.cpp:305 + // false if the branch is never taken, and None otherwise. + llvm::Optional isTriviallyTaken(const DirectiveMap::Directive &Dir) { +switch (Dir.Kind) { nit: instead of using `trivially`, I think `confidently` fits better what the method does. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D121165/new/ https://reviews.llvm.org/D121165 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D121165: [pseudo] Add crude heuristics to choose taken preprocessor branches.
sammccall created this revision. sammccall added a reviewer: hokein. Herald added a project: All. sammccall requested review of this revision. Herald added subscribers: cfe-commits, alextsao1999. Herald added a project: clang. In files where different preprocessing paths are possible, our goal is to choose a preprocessed token sequence which we can parse that pins down as much of the grammatical structure as possible. This forms the "primary parse", and the not-taken branches get parsed later, and are constrained to be compatible with the primary parse. Concretely: int x = #ifdef // TAKEN 2 + 2 + 2 // determined during primary parse to be an expression #else 2 // constrained to be an expression during a secondary parse #endif ; Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D121165 Files: clang/include/clang/Tooling/Syntax/Pseudo/DirectiveMap.h clang/include/clang/Tooling/Syntax/Pseudo/Token.h clang/lib/Tooling/Syntax/Pseudo/DirectiveMap.cpp clang/test/Syntax/lex.c clang/tools/clang-pseudo/ClangPseudo.cpp clang/unittests/Tooling/Syntax/Pseudo/DirectiveMapTest.cpp Index: clang/unittests/Tooling/Syntax/Pseudo/DirectiveMapTest.cpp === --- clang/unittests/Tooling/Syntax/Pseudo/DirectiveMapTest.cpp +++ clang/unittests/Tooling/Syntax/Pseudo/DirectiveMapTest.cpp @@ -146,6 +146,162 @@ EXPECT_EQ(0u, X.End.Tokens.size()); } +TEST(DirectiveMap, ChooseBranches) { + LangOptions Opts; + const std::string Cases[] = { + R"cpp( +// Branches with no alternatives are taken +#if COND // TAKEN +int x; +#endif + )cpp", + + R"cpp( +// Empty branches are better than nothing +#if COND // TAKEN +#endif + )cpp", + + R"cpp( +// Trivially false branches are not taken, even with no alternatives. +#if 0 +int x; +#endif + )cpp", + + R"cpp( +// Longer branches are preferred over shorter branches +#if COND // TAKEN +int x = 1; +#else +int x; +#endif + +#if COND +int x; +#else // TAKEN +int x = 1; +#endif + )cpp", + + R"cpp( +// Trivially true branches are taken if previous branches are trivial. +#if 1 // TAKEN +#else + int x = 1; +#endif + +#if 0 + int x = 1; +#elif 0 + int x = 2; +#elif 1 // TAKEN + int x; +#endif + +#if 0 + int x = 1; +#elif FOO // TAKEN + int x = 2; +#elif 1 + int x; +#endif + )cpp", + + R"cpp( +// #else is a trivially true branch +#if 0 + int x = 1; +#elif 0 + int x = 2; +#else // TAKEN + int x; +#endif + )cpp", + + R"cpp( +// Directives break ties, but nondirective text is more important. +#if FOO + #define A 1 2 3 +#else // TAKEN + #define B 4 5 6 + #define C 7 8 9 +#endif + +#if FOO // TAKEN + ; + #define A 1 2 3 +#else + #define B 4 5 6 + #define C 7 8 9 +#endif + )cpp", + + R"cpp( +// Avoid #error directives. +#if FOO + int x = 42; + #error This branch is no good +#else // TAKEN +#endif + +#if FOO + // All paths here lead to errors. + int x = 42; + #if 1 // TAKEN +#if COND // TAKEN + #error This branch is no good +#else + #error This one is no good either +#endif + #endif +#else // TAKEN +#endif + )cpp", + + R"cpp( +// Populate taken branches recursively. +#if FOO // TAKEN + int x = 42; + #if BAR +; + #else // TAKEN +int y = 43; + #endif +#else + int x; + #if BAR // TAKEN +int y; + #else +; + #endif +#endif + )cpp", + }; + for (const auto &Code : Cases) { +TokenStream S = cook(lex(Code, Opts), Opts); + +std::function Verify = +[&](const DirectiveMap &M) { + for (const auto &C : M.Chunks) { +if (C.kind() != DirectiveMap::Chunk::K_Conditional) + continue; +const DirectiveMap::Conditional &Cond(C); +for (unsigned I = 0; I < Cond.Branches.size(); ++I) { + auto Directive = S.tokens(Cond.Branches[I].first.Tokens); + EXPECT_EQ(I == Cond.Taken, Directive.back().text() == "// TAKEN") + << "At line " << Directive.front().Line << " of: " << Code; + Verify(Cond.Branches[I].second); +} + } +}; + +DirectiveMap Map