[PATCH] D121165: [pseudo] Add crude heuristics to choose taken preprocessor branches.

2022-04-06 Thread Sam McCall via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
sammccall marked an inline comment as done.
Closed by commit rGaf89e4792d23: [pseudo] Add crude heuristics to choose taken 
preprocessor branches. (authored by sammccall).

Changed prior to commit:
  https://reviews.llvm.org/D121165?vs=419142&id=420878#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D121165/new/

https://reviews.llvm.org/D121165

Files:
  clang-tools-extra/pseudo/include/clang-pseudo/DirectiveMap.h
  clang-tools-extra/pseudo/include/clang-pseudo/Token.h
  clang-tools-extra/pseudo/lib/DirectiveMap.cpp
  clang-tools-extra/pseudo/test/lex.c
  clang-tools-extra/pseudo/tool/ClangPseudo.cpp
  clang-tools-extra/pseudo/unittests/DirectiveMapTest.cpp

Index: clang-tools-extra/pseudo/unittests/DirectiveMapTest.cpp
===
--- clang-tools-extra/pseudo/unittests/DirectiveMapTest.cpp
+++ clang-tools-extra/pseudo/unittests/DirectiveMapTest.cpp
@@ -145,6 +145,162 @@
   EXPECT_EQ(0u, X.End.Tokens.size());
 }
 
+TEST(DirectiveMap, ChooseBranches) {
+  LangOptions Opts;
+  const std::string Cases[] = {
+  R"cpp(
+// Branches with no alternatives are taken
+#if COND // TAKEN
+int x;
+#endif
+  )cpp",
+
+  R"cpp(
+// Empty branches are better than nothing
+#if COND // TAKEN
+#endif
+  )cpp",
+
+  R"cpp(
+// Trivially false branches are not taken, even with no alternatives.
+#if 0
+int x;
+#endif
+  )cpp",
+
+  R"cpp(
+// Longer branches are preferred over shorter branches
+#if COND // TAKEN
+int x = 1;
+#else
+int x;
+#endif
+
+#if COND
+int x;
+#else // TAKEN
+int x = 1;
+#endif
+  )cpp",
+
+  R"cpp(
+// Trivially true branches are taken if previous branches are trivial.
+#if 1 // TAKEN
+#else
+  int x = 1;
+#endif
+
+#if 0
+  int x = 1;
+#elif 0
+  int x = 2;
+#elif 1 // TAKEN
+  int x;
+#endif
+
+#if 0
+  int x = 1;
+#elif FOO // TAKEN
+  int x = 2;
+#elif 1
+  int x;
+#endif
+  )cpp",
+
+  R"cpp(
+// #else is a trivially true branch
+#if 0
+  int x = 1;
+#elif 0
+  int x = 2;
+#else // TAKEN
+  int x;
+#endif
+  )cpp",
+
+  R"cpp(
+// Directives break ties, but nondirective text is more important.
+#if FOO
+  #define A 1 2 3
+#else // TAKEN
+  #define B 4 5 6
+  #define C 7 8 9
+#endif
+
+#if FOO // TAKEN
+  ;
+  #define A 1 2 3
+#else
+  #define B 4 5 6
+  #define C 7 8 9
+#endif
+  )cpp",
+
+  R"cpp(
+// Avoid #error directives.
+#if FOO
+  int x = 42;
+  #error This branch is no good
+#else // TAKEN
+#endif
+
+#if FOO
+  // All paths here lead to errors.
+  int x = 42;
+  #if 1 // TAKEN
+#if COND // TAKEN
+  #error This branch is no good
+#else
+  #error This one is no good either
+#endif
+  #endif
+#else // TAKEN
+#endif
+  )cpp",
+
+  R"cpp(
+// Populate taken branches recursively.
+#if FOO // TAKEN
+  int x = 42;
+  #if BAR
+;
+  #else // TAKEN
+int y = 43;
+  #endif
+#else
+  int x;
+  #if BAR // TAKEN
+int y;
+  #else
+;
+  #endif
+#endif
+  )cpp",
+  };
+  for (const auto &Code : Cases) {
+TokenStream S = cook(lex(Code, Opts), Opts);
+
+std::function Verify =
+[&](const DirectiveMap &M) {
+  for (const auto &C : M.Chunks) {
+if (C.kind() != DirectiveMap::Chunk::K_Conditional)
+  continue;
+const DirectiveMap::Conditional &Cond(C);
+for (unsigned I = 0; I < Cond.Branches.size(); ++I) {
+  auto Directive = S.tokens(Cond.Branches[I].first.Tokens);
+  EXPECT_EQ(I == Cond.Taken, Directive.back().text() == "// TAKEN")
+  << "At line " << Directive.front().Line << " of: " << Code;
+  Verify(Cond.Branches[I].second);
+}
+  }
+};
+
+DirectiveMap Map = DirectiveMap::parse(S);
+chooseConditionalBranches(Map, S);
+Verify(Map);
+  }
+}
+
 } // namespace
 } // namespace pseudo
 } // namespace clang
Index: clang-tools-extra/pseudo/tool/ClangPseudo.cpp
===
--- clang-tools-extra/pseudo/tool/ClangPseudo.cpp
+++ clang-tools-extra/pseudo/tool/Cla

[PATCH] D121165: [pseudo] Add crude heuristics to choose taken preprocessor branches.

2022-04-06 Thread Sam McCall via Phabricator via cfe-commits
sammccall marked an inline comment as done.
sammccall added inline comments.



Comment at: clang-tools-extra/pseudo/lib/DirectiveMap.cpp:329
+const auto &Tokens = Code.tokens(Dir.Tokens);
+if (Tokens.empty() || Tokens.front().Kind != tok::hash)
+  return llvm::None;

hokein wrote:
> I thought the first token of the `Directive` should always be `tok::hash`, 
> isn't it?
Yeah, this was defensive against parsing recovery, but we're sufficiently close 
to the parser that we don't need to be.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D121165/new/

https://reviews.llvm.org/D121165

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D121165: [pseudo] Add crude heuristics to choose taken preprocessor branches.

2022-04-01 Thread Haojian Wu via Phabricator via cfe-commits
hokein accepted this revision.
hokein added a comment.
This revision is now accepted and ready to land.

Thanks, looks good!




Comment at: clang-tools-extra/pseudo/include/clang-pseudo/DirectiveMap.h:95
 
   // FIXME: add heuristically selection of conditional branches.
   // FIXME: allow deriving a preprocessed stream

it is done in this patch, can be removed now.



Comment at: clang-tools-extra/pseudo/lib/DirectiveMap.cpp:329
+const auto &Tokens = Code.tokens(Dir.Tokens);
+if (Tokens.empty() || Tokens.front().Kind != tok::hash)
+  return llvm::None;

I thought the first token of the `Directive` should always be `tok::hash`, 
isn't it?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D121165/new/

https://reviews.llvm.org/D121165

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D121165: [pseudo] Add crude heuristics to choose taken preprocessor branches.

2022-03-30 Thread Sam McCall via Phabricator via cfe-commits
sammccall updated this revision to Diff 419142.
sammccall marked 2 inline comments as done.
sammccall added a comment.
Herald added a project: clang-tools-extra.

Rebase and address comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D121165/new/

https://reviews.llvm.org/D121165

Files:
  clang-tools-extra/pseudo/include/clang-pseudo/DirectiveMap.h
  clang-tools-extra/pseudo/include/clang-pseudo/Token.h
  clang-tools-extra/pseudo/lib/DirectiveMap.cpp
  clang-tools-extra/pseudo/test/lex.c
  clang-tools-extra/pseudo/tool/ClangPseudo.cpp
  clang-tools-extra/pseudo/unittests/DirectiveMapTest.cpp

Index: clang-tools-extra/pseudo/unittests/DirectiveMapTest.cpp
===
--- clang-tools-extra/pseudo/unittests/DirectiveMapTest.cpp
+++ clang-tools-extra/pseudo/unittests/DirectiveMapTest.cpp
@@ -145,6 +145,162 @@
   EXPECT_EQ(0u, X.End.Tokens.size());
 }
 
+TEST(DirectiveMap, ChooseBranches) {
+  LangOptions Opts;
+  const std::string Cases[] = {
+  R"cpp(
+// Branches with no alternatives are taken
+#if COND // TAKEN
+int x;
+#endif
+  )cpp",
+
+  R"cpp(
+// Empty branches are better than nothing
+#if COND // TAKEN
+#endif
+  )cpp",
+
+  R"cpp(
+// Trivially false branches are not taken, even with no alternatives.
+#if 0
+int x;
+#endif
+  )cpp",
+
+  R"cpp(
+// Longer branches are preferred over shorter branches
+#if COND // TAKEN
+int x = 1;
+#else
+int x;
+#endif
+
+#if COND
+int x;
+#else // TAKEN
+int x = 1;
+#endif
+  )cpp",
+
+  R"cpp(
+// Trivially true branches are taken if previous branches are trivial.
+#if 1 // TAKEN
+#else
+  int x = 1;
+#endif
+
+#if 0
+  int x = 1;
+#elif 0
+  int x = 2;
+#elif 1 // TAKEN
+  int x;
+#endif
+
+#if 0
+  int x = 1;
+#elif FOO // TAKEN
+  int x = 2;
+#elif 1
+  int x;
+#endif
+  )cpp",
+
+  R"cpp(
+// #else is a trivially true branch
+#if 0
+  int x = 1;
+#elif 0
+  int x = 2;
+#else // TAKEN
+  int x;
+#endif
+  )cpp",
+
+  R"cpp(
+// Directives break ties, but nondirective text is more important.
+#if FOO
+  #define A 1 2 3
+#else // TAKEN
+  #define B 4 5 6
+  #define C 7 8 9
+#endif
+
+#if FOO // TAKEN
+  ;
+  #define A 1 2 3
+#else
+  #define B 4 5 6
+  #define C 7 8 9
+#endif
+  )cpp",
+
+  R"cpp(
+// Avoid #error directives.
+#if FOO
+  int x = 42;
+  #error This branch is no good
+#else // TAKEN
+#endif
+
+#if FOO
+  // All paths here lead to errors.
+  int x = 42;
+  #if 1 // TAKEN
+#if COND // TAKEN
+  #error This branch is no good
+#else
+  #error This one is no good either
+#endif
+  #endif
+#else // TAKEN
+#endif
+  )cpp",
+
+  R"cpp(
+// Populate taken branches recursively.
+#if FOO // TAKEN
+  int x = 42;
+  #if BAR
+;
+  #else // TAKEN
+int y = 43;
+  #endif
+#else
+  int x;
+  #if BAR // TAKEN
+int y;
+  #else
+;
+  #endif
+#endif
+  )cpp",
+  };
+  for (const auto &Code : Cases) {
+TokenStream S = cook(lex(Code, Opts), Opts);
+
+std::function Verify =
+[&](const DirectiveMap &M) {
+  for (const auto &C : M.Chunks) {
+if (C.kind() != DirectiveMap::Chunk::K_Conditional)
+  continue;
+const DirectiveMap::Conditional &Cond(C);
+for (unsigned I = 0; I < Cond.Branches.size(); ++I) {
+  auto Directive = S.tokens(Cond.Branches[I].first.Tokens);
+  EXPECT_EQ(I == Cond.Taken, Directive.back().text() == "// TAKEN")
+  << "At line " << Directive.front().Line << " of: " << Code;
+  Verify(Cond.Branches[I].second);
+}
+  }
+};
+
+DirectiveMap Map = DirectiveMap::parse(S);
+chooseConditionalBranches(Map, S);
+Verify(Map);
+  }
+}
+
 } // namespace
 } // namespace pseudo
 } // namespace clang
Index: clang-tools-extra/pseudo/tool/ClangPseudo.cpp
===
--- clang-tools-extra/pseudo/tool/ClangPseudo.cpp
+++ clang-tools-extra/pseudo/tool/ClangPseudo.cpp
@@ -75,6 +75,7 @@
 clang::LangOptions LangOpts; // FIXME: use real options.
 auto Stream = clang::pseudo::lex(Text, La

[PATCH] D121165: [pseudo] Add crude heuristics to choose taken preprocessor branches.

2022-03-30 Thread Sam McCall via Phabricator via cfe-commits
sammccall marked 6 inline comments as done.
sammccall added inline comments.



Comment at: clang/lib/Tooling/Syntax/Pseudo/DirectiveMap.cpp:210
+namespace {
+class BranchChooser {
+public:

hokein wrote:
> We evaluate each conditional directive *independently*, I wonder whether it 
> is important to use any low-hanging tricks to ensure consistent choices are 
> made among different conditional directives. (my gut feeling is that it is 
> nice to have, but we should not worry too much about it at the moment).
> 
> For example,
> 
> ```
> #ifdef __cplusplus
> extern "C" {
> #endif
> 
> #ifdef __cplusplus
> }
> #endif
> ```
> 
> If we enable the first #if, and the second one should be enabled as well. 
> (this example is a trivial and common pattern, it has been handled by the 
> existing code).
Added a comment describing this case, I don't think we should tackle this now.



Comment at: clang/lib/Tooling/Syntax/Pseudo/DirectiveMap.cpp:305
+  // false if the branch is never taken, and None otherwise.
+  llvm::Optional isTriviallyTaken(const DirectiveMap::Directive &Dir) {
+switch (Dir.Kind) {

hokein wrote:
> nit: instead of using `trivially`, I think `confidently` fits better what the 
> method does.
You're right this is a bad name, but also because returning false doesn't mean 
it's not trivially/confidently taken, but rather that it's 
trivially/confidently *not* taken.
I think removing the adverb actually makes this clearer.

Also this case is confusing
```
#if foo
#else // is this "always taken"?
#bar
```

Renamed to `isTakenWhenReached`, wdyt?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D121165/new/

https://reviews.llvm.org/D121165

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D121165: [pseudo] Add crude heuristics to choose taken preprocessor branches.

2022-03-10 Thread Haojian Wu via Phabricator via cfe-commits
hokein added inline comments.



Comment at: clang/include/clang/Tooling/Syntax/Pseudo/DirectiveMap.h:123
+/// The choices are stored in Conditional::Taken nodes.
+void chooseConditionalBranches(DirectiveMap &, const TokenStream &Code);
+

off-topic: a random idea on the name of `DirectiveMap` came up to my mind when 
reading the code, how about `DirectiveTree`, the comment of it says the 
structure is a tree, and the `BranchChooser` is actually a tree-visiting 
pattern.



Comment at: clang/include/clang/Tooling/Syntax/Pseudo/Token.h:84
+  /// Returns the next token in the stream, skipping over comments.
+  const Token &next_nc() const {
+const Token *T = this;

nit: the name doesn't match the LLVM code-style.



Comment at: clang/lib/Tooling/Syntax/Pseudo/DirectiveMap.cpp:210
+namespace {
+class BranchChooser {
+public:

We evaluate each conditional directive *independently*, I wonder whether it is 
important to use any low-hanging tricks to ensure consistent choices are made 
among different conditional directives. (my gut feeling is that it is nice to 
have, but we should not worry too much about it at the moment).

For example,

```
#ifdef __cplusplus
extern "C" {
#endif

#ifdef __cplusplus
}
#endif
```

If we enable the first #if, and the second one should be enabled as well. (this 
example is a trivial and common pattern, it has been handled by the existing 
code).



Comment at: clang/lib/Tooling/Syntax/Pseudo/DirectiveMap.cpp:218
+  // Describes the code seen by making particular branch choices.
+  struct Score {
+unsigned Tokens = 0; // excluding comments and directives

nit: add a comment saying `higher is better`.



Comment at: clang/lib/Tooling/Syntax/Pseudo/DirectiveMap.cpp:225
+  // Seeing errors is bad, other things are good.
+  return std::tie(Other.Errors, Tokens, Directives) >
+ std::tie(Errors, Other.Tokens, Other.Directives);

Maybe `(-Errors, Tokens, Directives) > (-Other.Errors, ...)`



Comment at: clang/lib/Tooling/Syntax/Pseudo/DirectiveMap.cpp:295
+  // Is this the best branch so far? (Including if it's #if 1).
+  if (TookTrivial || !C.Taken || BranchScore > Best) {
+Best = BranchScore;

nit: `!C.Taken` => `!C.Token.hasValue()`, which is clearer.



Comment at: clang/lib/Tooling/Syntax/Pseudo/DirectiveMap.cpp:305
+  // false if the branch is never taken, and None otherwise.
+  llvm::Optional isTriviallyTaken(const DirectiveMap::Directive &Dir) {
+switch (Dir.Kind) {

nit: instead of using `trivially`, I think `confidently` fits better what the 
method does.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D121165/new/

https://reviews.llvm.org/D121165

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D121165: [pseudo] Add crude heuristics to choose taken preprocessor branches.

2022-03-07 Thread Sam McCall via Phabricator via cfe-commits
sammccall created this revision.
sammccall added a reviewer: hokein.
Herald added a project: All.
sammccall requested review of this revision.
Herald added subscribers: cfe-commits, alextsao1999.
Herald added a project: clang.

In files where different preprocessing paths are possible, our goal is to
choose a preprocessed token sequence which we can parse that pins down as much
of the grammatical structure as possible.
This forms the "primary parse", and the not-taken branches get parsed later,
and are constrained to be compatible with the primary parse.

Concretely:

  int x =
#ifdef // TAKEN
  2 + 2 + 2 // determined during primary parse to be an expression
#else
  2 // constrained to be an expression during a secondary parse
#endif
;


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D121165

Files:
  clang/include/clang/Tooling/Syntax/Pseudo/DirectiveMap.h
  clang/include/clang/Tooling/Syntax/Pseudo/Token.h
  clang/lib/Tooling/Syntax/Pseudo/DirectiveMap.cpp
  clang/test/Syntax/lex.c
  clang/tools/clang-pseudo/ClangPseudo.cpp
  clang/unittests/Tooling/Syntax/Pseudo/DirectiveMapTest.cpp

Index: clang/unittests/Tooling/Syntax/Pseudo/DirectiveMapTest.cpp
===
--- clang/unittests/Tooling/Syntax/Pseudo/DirectiveMapTest.cpp
+++ clang/unittests/Tooling/Syntax/Pseudo/DirectiveMapTest.cpp
@@ -146,6 +146,162 @@
   EXPECT_EQ(0u, X.End.Tokens.size());
 }
 
+TEST(DirectiveMap, ChooseBranches) {
+  LangOptions Opts;
+  const std::string Cases[] = {
+  R"cpp(
+// Branches with no alternatives are taken
+#if COND // TAKEN
+int x;
+#endif
+  )cpp",
+
+  R"cpp(
+// Empty branches are better than nothing
+#if COND // TAKEN
+#endif
+  )cpp",
+
+  R"cpp(
+// Trivially false branches are not taken, even with no alternatives.
+#if 0
+int x;
+#endif
+  )cpp",
+
+  R"cpp(
+// Longer branches are preferred over shorter branches
+#if COND // TAKEN
+int x = 1;
+#else
+int x;
+#endif
+
+#if COND
+int x;
+#else // TAKEN
+int x = 1;
+#endif
+  )cpp",
+
+  R"cpp(
+// Trivially true branches are taken if previous branches are trivial.
+#if 1 // TAKEN
+#else
+  int x = 1;
+#endif
+
+#if 0
+  int x = 1;
+#elif 0
+  int x = 2;
+#elif 1 // TAKEN
+  int x;
+#endif
+
+#if 0
+  int x = 1;
+#elif FOO // TAKEN
+  int x = 2;
+#elif 1
+  int x;
+#endif
+  )cpp",
+
+  R"cpp(
+// #else is a trivially true branch
+#if 0
+  int x = 1;
+#elif 0
+  int x = 2;
+#else // TAKEN
+  int x;
+#endif
+  )cpp",
+
+  R"cpp(
+// Directives break ties, but nondirective text is more important.
+#if FOO
+  #define A 1 2 3
+#else // TAKEN
+  #define B 4 5 6
+  #define C 7 8 9
+#endif
+
+#if FOO // TAKEN
+  ;
+  #define A 1 2 3
+#else
+  #define B 4 5 6
+  #define C 7 8 9
+#endif
+  )cpp",
+
+  R"cpp(
+// Avoid #error directives.
+#if FOO
+  int x = 42;
+  #error This branch is no good
+#else // TAKEN
+#endif
+
+#if FOO
+  // All paths here lead to errors.
+  int x = 42;
+  #if 1 // TAKEN
+#if COND // TAKEN
+  #error This branch is no good
+#else
+  #error This one is no good either
+#endif
+  #endif
+#else // TAKEN
+#endif
+  )cpp",
+
+  R"cpp(
+// Populate taken branches recursively.
+#if FOO // TAKEN
+  int x = 42;
+  #if BAR
+;
+  #else // TAKEN
+int y = 43;
+  #endif
+#else
+  int x;
+  #if BAR // TAKEN
+int y;
+  #else
+;
+  #endif
+#endif
+  )cpp",
+  };
+  for (const auto &Code : Cases) {
+TokenStream S = cook(lex(Code, Opts), Opts);
+
+std::function Verify =
+[&](const DirectiveMap &M) {
+  for (const auto &C : M.Chunks) {
+if (C.kind() != DirectiveMap::Chunk::K_Conditional)
+  continue;
+const DirectiveMap::Conditional &Cond(C);
+for (unsigned I = 0; I < Cond.Branches.size(); ++I) {
+  auto Directive = S.tokens(Cond.Branches[I].first.Tokens);
+  EXPECT_EQ(I == Cond.Taken, Directive.back().text() == "// TAKEN")
+  << "At line " << Directive.front().Line << " of: " << Code;
+  Verify(Cond.Branches[I].second);
+}
+  }
+};
+
+DirectiveMap Map