[PATCH] D75687: [clangd] Only minimally escape text when rendering to markdown.

2020-03-17 Thread Sam McCall via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG704cd4d5d075: [clangd] Only minimally escape text when 
rendering to markdown. (authored by sammccall).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75687/new/

https://reviews.llvm.org/D75687

Files:
  clang-tools-extra/clangd/FormattedString.cpp
  clang-tools-extra/clangd/FormattedString.h
  clang-tools-extra/clangd/unittests/FormattedStringTests.cpp
  clang-tools-extra/clangd/unittests/HoverTests.cpp

Index: clang-tools-extra/clangd/unittests/HoverTests.cpp
===
--- clang-tools-extra/clangd/unittests/HoverTests.cpp
+++ clang-tools-extra/clangd/unittests/HoverTests.cpp
@@ -1905,7 +1905,7 @@
   llvm::StringRef ExpectedMarkdown = R"md(### variable `foo`  
 
 ---
-Value \= `val`  
+Value = `val`  
 
 ---
 ```cpp
Index: clang-tools-extra/clangd/unittests/FormattedStringTests.cpp
===
--- clang-tools-extra/clangd/unittests/FormattedStringTests.cpp
+++ clang-tools-extra/clangd/unittests/FormattedStringTests.cpp
@@ -17,25 +17,96 @@
 namespace markup {
 namespace {
 
-TEST(Render, Escaping) {
-  // Check some ASCII punctuation
-  Paragraph P;
-  P.appendText("*!`");
-  EXPECT_EQ(P.asMarkdown(), "\\*\\!\\`");
+std::string escape(llvm::StringRef Text) {
+  return Paragraph().appendText(Text.str()).asMarkdown();
+}
+
+MATCHER_P(escaped, C, "") {
+  return testing::ExplainMatchResult(::testing::HasSubstr(std::string{'\\', C}),
+ arg, result_listener);
+}
 
+MATCHER(escapedNone, "") {
+  return testing::ExplainMatchResult(::testing::Not(::testing::HasSubstr("\\")),
+ arg, result_listener);
+}
+
+TEST(Render, Escaping) {
   // Check all ASCII punctuation.
-  P = Paragraph();
   std::string Punctuation = R"txt(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)txt";
-  // Same text, with each character escaped.
-  std::string EscapedPunctuation;
-  EscapedPunctuation.reserve(2 * Punctuation.size());
-  for (char C : Punctuation)
-EscapedPunctuation += std::string("\\") + C;
-  P.appendText(Punctuation);
-  EXPECT_EQ(P.asMarkdown(), EscapedPunctuation);
+  std::string EscapedPunc = R"txt(!"#$%&'()\*+,-./:;<=>?@[\\]^\_\`{|}~)txt";
+  EXPECT_EQ(escape(Punctuation), EscapedPunc);
+
+  // Inline code
+  EXPECT_EQ(escape("`foo`"), R"(\`foo\`)");
+  EXPECT_EQ(escape("`foo"), R"(\`foo)");
+  EXPECT_EQ(escape("foo`"), R"(foo\`)");
+  EXPECT_EQ(escape("``foo``"), R"(\`\`foo\`\`)");
+  // Code blocks
+  EXPECT_EQ(escape("```"), R"(\`\`\`)"); // This could also be inline code!
+  EXPECT_EQ(escape("~~~"), R"(\~~~)");
+
+  // Rulers and headings
+  EXPECT_THAT(escape("## Heading"), escaped('#'));
+  EXPECT_THAT(escape("Foo # bar"), escapedNone());
+  EXPECT_EQ(escape("---"), R"(\---)");
+  EXPECT_EQ(escape("-"), R"(\-)");
+  EXPECT_EQ(escape("==="), R"(\===)");
+  EXPECT_EQ(escape("="), R"(\=)");
+  EXPECT_EQ(escape("***"), R"(\*\*\*)"); // \** could start emphasis!
+
+  // HTML tags.
+  EXPECT_THAT(escape(""), escaped('<'));
+  EXPECT_THAT(escape("std::vector"), escaped('<'));
+  EXPECT_THAT(escape("std::map"), escapedNone());
+  // Autolinks
+  EXPECT_THAT(escape("Email "), escapedNone());
+  EXPECT_THAT(escape("Website "), escapedNone());
+
+  // Bullet lists.
+  EXPECT_THAT(escape("- foo"), escaped('-'));
+  EXPECT_THAT(escape("* foo"), escaped('*'));
+  EXPECT_THAT(escape("+ foo"), escaped('+'));
+  EXPECT_THAT(escape("+"), escaped('+'));
+  EXPECT_THAT(escape("a + foo"), escapedNone());
+  EXPECT_THAT(escape("a+ foo"), escapedNone());
+  EXPECT_THAT(escape("1. foo"), escaped('.'));
+  EXPECT_THAT(escape("a. foo"), escapedNone());
+
+  // Emphasis.
+  EXPECT_EQ(escape("*foo*"), R"(\*foo\*)");
+  EXPECT_EQ(escape("**foo**"), R"(\*\*foo\*\*)");
+  EXPECT_THAT(escape("*foo"), escaped('*'));
+  EXPECT_THAT(escape("foo *"), escapedNone());
+  EXPECT_THAT(escape("foo * bar"), escapedNone());
+  EXPECT_THAT(escape("foo_bar"), escapedNone());
+  EXPECT_THAT(escape("foo _bar"), escaped('_'));
+  EXPECT_THAT(escape("foo_ bar"), escaped('_'));
+  EXPECT_THAT(escape("foo _ bar"), escapedNone());
+
+  // HTML entities.
+  EXPECT_THAT(escape("fish &chips;"), escaped('&'));
+  EXPECT_THAT(escape("fish & chips;"), escapedNone());
+  EXPECT_THAT(escape("fish &chips"), escapedNone());
+  EXPECT_THAT(escape("foo * bar"), escaped('&'));
+  EXPECT_THAT(escape("foo ¯ bar"), escaped('&'));
+  EXPECT_THAT(escape("foo &?; bar"), escapedNone());
+
+  // Links.
+  EXPECT_THAT(escape("[foo](bar)"), escaped(']'));
+  EXPECT_THAT(escape("[foo]: bar"), escaped(']'));
+  // No need to escape these, as the target never exists.
+  EXPECT_THAT(escape("[foo][]"), escapedNone());
+  EXPECT_THAT(escape("[foo][bar]"), escapedNone());
+  EXPECT_THAT(escape("[foo]"), escapedNone());
 
   // In code blocks we don't need to escape ASC

[PATCH] D75687: [clangd] Only minimally escape text when rendering to markdown.

2020-03-17 Thread Kadir Cetinkaya via Phabricator via cfe-commits
kadircet added a comment.

thanks, still LG


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75687/new/

https://reviews.llvm.org/D75687



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D75687: [clangd] Only minimally escape text when rendering to markdown.

2020-03-17 Thread Sam McCall via Phabricator via cfe-commits
sammccall marked an inline comment as done.
sammccall added a comment.

Filed https://github.com/google/llvm-premerge-checks/issues/147 for the 
spurious unit test failure.




Comment at: clang-tools-extra/clangd/FormattedString.cpp:150
+// Not a delimiter if surrounded by space.
+return !SpaceSurrounds();
+  case '-': // Setex heading, horizontal ruler, or bullet.

kadircet wrote:
> kadircet wrote:
> > `_` seems to behave different than `*` :(
> > 
> > it seems to rather depend on the spaces around the text being emphasized, 
> > i.e
> > 
> > ```
> > foo _ bar _ foo -> no emphasis
> > foo _ bar_ foo -> no emphasis
> > foo _bar_ foo -> emphasis on bar
> > foo_bar_ foo -> no emphasis
> > ```
> > 
> > so this should rather be `Before.endswith(" ") && isAlpha(After)` for the 
> > beginning of emphasis and the opposite for the ending.
> > Not sure if there's an easy way to decide on it in isolation.
> regarding this one, i suppose we'll just be escaping in some unnecessary 
> cases(like the 2nd and the 4th), but still better than the current state so 
> nvm.
Oops, I forgot to reply to this one. Good catch that `*` and `_` are different.
The rules are indeed really complicated (particularly for _ next to 
punctuation) and we don't know whether we are at the start or end.

However my reading of the spec says alnum_alnum never needs to be escaped, and 
that's incredibly common, so I've added that special case. (Same is true for 
alnum___alnum, but I don't think that's common enough to bother with).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75687/new/

https://reviews.llvm.org/D75687



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D75687: [clangd] Only minimally escape text when rendering to markdown.

2020-03-17 Thread Sam McCall via Phabricator via cfe-commits
sammccall updated this revision to Diff 250730.
sammccall added a comment.

Fix '_' rules.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75687/new/

https://reviews.llvm.org/D75687

Files:
  clang-tools-extra/clangd/FormattedString.cpp
  clang-tools-extra/clangd/FormattedString.h
  clang-tools-extra/clangd/unittests/FormattedStringTests.cpp
  clang-tools-extra/clangd/unittests/HoverTests.cpp

Index: clang-tools-extra/clangd/unittests/HoverTests.cpp
===
--- clang-tools-extra/clangd/unittests/HoverTests.cpp
+++ clang-tools-extra/clangd/unittests/HoverTests.cpp
@@ -1905,7 +1905,7 @@
   llvm::StringRef ExpectedMarkdown = R"md(### variable `foo`  
 
 ---
-Value \= `val`  
+Value = `val`  
 
 ---
 ```cpp
Index: clang-tools-extra/clangd/unittests/FormattedStringTests.cpp
===
--- clang-tools-extra/clangd/unittests/FormattedStringTests.cpp
+++ clang-tools-extra/clangd/unittests/FormattedStringTests.cpp
@@ -17,25 +17,96 @@
 namespace markup {
 namespace {
 
-TEST(Render, Escaping) {
-  // Check some ASCII punctuation
-  Paragraph P;
-  P.appendText("*!`");
-  EXPECT_EQ(P.asMarkdown(), "\\*\\!\\`");
+std::string escape(llvm::StringRef Text) {
+  return Paragraph().appendText(Text.str()).asMarkdown();
+}
+
+MATCHER_P(escaped, C, "") {
+  return testing::ExplainMatchResult(::testing::HasSubstr(std::string{'\\', C}),
+ arg, result_listener);
+}
 
+MATCHER(escapedNone, "") {
+  return testing::ExplainMatchResult(::testing::Not(::testing::HasSubstr("\\")),
+ arg, result_listener);
+}
+
+TEST(Render, Escaping) {
   // Check all ASCII punctuation.
-  P = Paragraph();
   std::string Punctuation = R"txt(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)txt";
-  // Same text, with each character escaped.
-  std::string EscapedPunctuation;
-  EscapedPunctuation.reserve(2 * Punctuation.size());
-  for (char C : Punctuation)
-EscapedPunctuation += std::string("\\") + C;
-  P.appendText(Punctuation);
-  EXPECT_EQ(P.asMarkdown(), EscapedPunctuation);
+  std::string EscapedPunc = R"txt(!"#$%&'()\*+,-./:;<=>?@[\\]^\_\`{|}~)txt";
+  EXPECT_EQ(escape(Punctuation), EscapedPunc);
+
+  // Inline code
+  EXPECT_EQ(escape("`foo`"), R"(\`foo\`)");
+  EXPECT_EQ(escape("`foo"), R"(\`foo)");
+  EXPECT_EQ(escape("foo`"), R"(foo\`)");
+  EXPECT_EQ(escape("``foo``"), R"(\`\`foo\`\`)");
+  // Code blocks
+  EXPECT_EQ(escape("```"), R"(\`\`\`)"); // This could also be inline code!
+  EXPECT_EQ(escape("~~~"), R"(\~~~)");
+
+  // Rulers and headings
+  EXPECT_THAT(escape("## Heading"), escaped('#'));
+  EXPECT_THAT(escape("Foo # bar"), escapedNone());
+  EXPECT_EQ(escape("---"), R"(\---)");
+  EXPECT_EQ(escape("-"), R"(\-)");
+  EXPECT_EQ(escape("==="), R"(\===)");
+  EXPECT_EQ(escape("="), R"(\=)");
+  EXPECT_EQ(escape("***"), R"(\*\*\*)"); // \** could start emphasis!
+
+  // HTML tags.
+  EXPECT_THAT(escape(""), escaped('<'));
+  EXPECT_THAT(escape("std::vector"), escaped('<'));
+  EXPECT_THAT(escape("std::map"), escapedNone());
+  // Autolinks
+  EXPECT_THAT(escape("Email "), escapedNone());
+  EXPECT_THAT(escape("Website "), escapedNone());
+
+  // Bullet lists.
+  EXPECT_THAT(escape("- foo"), escaped('-'));
+  EXPECT_THAT(escape("* foo"), escaped('*'));
+  EXPECT_THAT(escape("+ foo"), escaped('+'));
+  EXPECT_THAT(escape("+"), escaped('+'));
+  EXPECT_THAT(escape("a + foo"), escapedNone());
+  EXPECT_THAT(escape("a+ foo"), escapedNone());
+  EXPECT_THAT(escape("1. foo"), escaped('.'));
+  EXPECT_THAT(escape("a. foo"), escapedNone());
+
+  // Emphasis.
+  EXPECT_EQ(escape("*foo*"), R"(\*foo\*)");
+  EXPECT_EQ(escape("**foo**"), R"(\*\*foo\*\*)");
+  EXPECT_THAT(escape("*foo"), escaped('*'));
+  EXPECT_THAT(escape("foo *"), escapedNone());
+  EXPECT_THAT(escape("foo * bar"), escapedNone());
+  EXPECT_THAT(escape("foo_bar"), escapedNone());
+  EXPECT_THAT(escape("foo _bar"), escaped('_'));
+  EXPECT_THAT(escape("foo_ bar"), escaped('_'));
+  EXPECT_THAT(escape("foo _ bar"), escapedNone());
+
+  // HTML entities.
+  EXPECT_THAT(escape("fish &chips;"), escaped('&'));
+  EXPECT_THAT(escape("fish & chips;"), escapedNone());
+  EXPECT_THAT(escape("fish &chips"), escapedNone());
+  EXPECT_THAT(escape("foo * bar"), escaped('&'));
+  EXPECT_THAT(escape("foo ¯ bar"), escaped('&'));
+  EXPECT_THAT(escape("foo &?; bar"), escapedNone());
+
+  // Links.
+  EXPECT_THAT(escape("[foo](bar)"), escaped(']'));
+  EXPECT_THAT(escape("[foo]: bar"), escaped(']'));
+  // No need to escape these, as the target never exists.
+  EXPECT_THAT(escape("[foo][]"), escapedNone());
+  EXPECT_THAT(escape("[foo][bar]"), escapedNone());
+  EXPECT_THAT(escape("[foo]"), escapedNone());
 
   // In code blocks we don't need to escape ASCII punctuation.
-  P = Paragraph();
+  Paragraph P = Paragraph();
   P.appendCode("* foo !+ bar * baz");

[PATCH] D75687: [clangd] Only minimally escape text when rendering to markdown.

2020-03-17 Thread Kadir Cetinkaya via Phabricator via cfe-commits
kadircet accepted this revision.
kadircet added a comment.
This revision is now accepted and ready to land.

thanks, lgtm!




Comment at: clang-tools-extra/clangd/FormattedString.cpp:150
+// Not a delimiter if surrounded by space.
+return !SpaceSurrounds();
+  case '-': // Setex heading, horizontal ruler, or bullet.

kadircet wrote:
> `_` seems to behave different than `*` :(
> 
> it seems to rather depend on the spaces around the text being emphasized, i.e
> 
> ```
> foo _ bar _ foo -> no emphasis
> foo _ bar_ foo -> no emphasis
> foo _bar_ foo -> emphasis on bar
> foo_bar_ foo -> no emphasis
> ```
> 
> so this should rather be `Before.endswith(" ") && isAlpha(After)` for the 
> beginning of emphasis and the opposite for the ending.
> Not sure if there's an easy way to decide on it in isolation.
regarding this one, i suppose we'll just be escaping in some unnecessary 
cases(like the 2nd and the 4th), but still better than the current state so nvm.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75687/new/

https://reviews.llvm.org/D75687



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D75687: [clangd] Only minimally escape text when rendering to markdown.

2020-03-16 Thread Sam McCall via Phabricator via cfe-commits
sammccall added a comment.

Sorry for the slow turnaround, bit off more than I could chew on a few fronts 
:-(




Comment at: clang-tools-extra/clangd/FormattedString.cpp:69
+return false;
+  if (Contents.front() == '!' || Contents.front() == '?' ||
+  Contents.front() == '/')

kadircet wrote:
> what is the `?` for ?
` nit: After.ltrim('#')
I'm trying to avoid repeating the known values of C as there's various 
fallthrough/copied cases and it seems more fragile.



Comment at: clang-tools-extra/clangd/FormattedString.cpp:164
+  case '<': // HTML tag (or autolink, which we choose not to escape)
+return looksLikeTag(After);
+  case '>': // Quote marker. Needs escaping at start of line.

kadircet wrote:
> is this really worth all the trouble ?
Simplified it a bit. I think it's worth not escaping `<` when not matched with 
`>`. Sadly the rule can't be quite that simple because multiple chunks can form 
a tag.



Comment at: clang-tools-extra/clangd/FormattedString.cpp:166
+  case '>': // Quote marker. Needs escaping at start of line.
+return StartsLine && Before.empty();
+  case '&': { // HTML entity reference

kadircet wrote:
> I believe it is also allowed to have whitespaces(less than 4?) before the `>` 
> to be interpreted as a quote marker.
Yes, but a chunk never begins with whitespace (see assert at top of function)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75687/new/

https://reviews.llvm.org/D75687



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D75687: [clangd] Only minimally escape text when rendering to markdown.

2020-03-16 Thread Sam McCall via Phabricator via cfe-commits
sammccall updated this revision to Diff 250624.
sammccall marked 10 inline comments as done.
sammccall added a comment.

Address review comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75687/new/

https://reviews.llvm.org/D75687

Files:
  clang-tools-extra/clangd/FormattedString.cpp
  clang-tools-extra/clangd/FormattedString.h
  clang-tools-extra/clangd/unittests/FormattedStringTests.cpp
  clang-tools-extra/clangd/unittests/HoverTests.cpp

Index: clang-tools-extra/clangd/unittests/HoverTests.cpp
===
--- clang-tools-extra/clangd/unittests/HoverTests.cpp
+++ clang-tools-extra/clangd/unittests/HoverTests.cpp
@@ -1905,7 +1905,7 @@
   llvm::StringRef ExpectedMarkdown = R"md(### variable `foo`  
 
 ---
-Value \= `val`  
+Value = `val`  
 
 ---
 ```cpp
Index: clang-tools-extra/clangd/unittests/FormattedStringTests.cpp
===
--- clang-tools-extra/clangd/unittests/FormattedStringTests.cpp
+++ clang-tools-extra/clangd/unittests/FormattedStringTests.cpp
@@ -17,25 +17,94 @@
 namespace markup {
 namespace {
 
-TEST(Render, Escaping) {
-  // Check some ASCII punctuation
-  Paragraph P;
-  P.appendText("*!`");
-  EXPECT_EQ(P.asMarkdown(), "\\*\\!\\`");
+std::string escape(llvm::StringRef Text) {
+  return Paragraph().appendText(Text.str()).asMarkdown();
+}
+
+MATCHER_P(escaped, C, "") {
+  return testing::ExplainMatchResult(::testing::HasSubstr(std::string{'\\', C}),
+ arg, result_listener);
+}
 
+MATCHER(escapedNone, "") {
+  return testing::ExplainMatchResult(::testing::Not(::testing::HasSubstr("\\")),
+ arg, result_listener);
+}
+
+TEST(Render, Escaping) {
   // Check all ASCII punctuation.
-  P = Paragraph();
   std::string Punctuation = R"txt(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)txt";
-  // Same text, with each character escaped.
-  std::string EscapedPunctuation;
-  EscapedPunctuation.reserve(2 * Punctuation.size());
-  for (char C : Punctuation)
-EscapedPunctuation += std::string("\\") + C;
-  P.appendText(Punctuation);
-  EXPECT_EQ(P.asMarkdown(), EscapedPunctuation);
+  std::string EscapedPunc = R"txt(!"#$%&'()\*+,-./:;<=>?@[\\]^\_\`{|}~)txt";
+  EXPECT_EQ(escape(Punctuation), EscapedPunc);
+
+  // Inline code
+  EXPECT_EQ(escape("`foo`"), R"(\`foo\`)");
+  EXPECT_EQ(escape("`foo"), R"(\`foo)");
+  EXPECT_EQ(escape("foo`"), R"(foo\`)");
+  EXPECT_EQ(escape("``foo``"), R"(\`\`foo\`\`)");
+  // Code blocks
+  EXPECT_EQ(escape("```"), R"(\`\`\`)"); // This could also be inline code!
+  EXPECT_EQ(escape("~~~"), R"(\~~~)");
+
+  // Rulers and headings
+  EXPECT_THAT(escape("## Heading"), escaped('#'));
+  EXPECT_THAT(escape("Foo # bar"), escapedNone());
+  EXPECT_EQ(escape("---"), R"(\---)");
+  EXPECT_EQ(escape("-"), R"(\-)");
+  EXPECT_EQ(escape("==="), R"(\===)");
+  EXPECT_EQ(escape("="), R"(\=)");
+  EXPECT_EQ(escape("***"), R"(\*\*\*)"); // \** could start emphasis!
+
+  // HTML tags.
+  EXPECT_THAT(escape(""), escaped('<'));
+  EXPECT_THAT(escape("std::vector"), escaped('<'));
+  EXPECT_THAT(escape("std::map"), escapedNone());
+  // Autolinks
+  EXPECT_THAT(escape("Email "), escapedNone());
+  EXPECT_THAT(escape("Website "), escapedNone());
+
+  // Bullet lists.
+  EXPECT_THAT(escape("- foo"), escaped('-'));
+  EXPECT_THAT(escape("* foo"), escaped('*'));
+  EXPECT_THAT(escape("+ foo"), escaped('+'));
+  EXPECT_THAT(escape("+"), escaped('+'));
+  EXPECT_THAT(escape("a + foo"), escapedNone());
+  EXPECT_THAT(escape("a+ foo"), escapedNone());
+  EXPECT_THAT(escape("1. foo"), escaped('.'));
+  EXPECT_THAT(escape("a. foo"), escapedNone());
+
+  // Emphasis.
+  EXPECT_EQ(escape("*foo*"), R"(\*foo\*)");
+  EXPECT_EQ(escape("**foo**"), R"(\*\*foo\*\*)");
+  EXPECT_THAT(escape("*foo"), escaped('*'));
+  EXPECT_THAT(escape("foo *"), escapedNone());
+  EXPECT_THAT(escape("foo * bar"), escapedNone());
+  EXPECT_THAT(escape("foo_bar"), escaped('_'));
+  EXPECT_THAT(escape("foo _ bar"), escapedNone());
+
+  // HTML entities.
+  EXPECT_THAT(escape("fish &chips;"), escaped('&'));
+  EXPECT_THAT(escape("fish & chips;"), escapedNone());
+  EXPECT_THAT(escape("fish &chips"), escapedNone());
+  EXPECT_THAT(escape("foo * bar"), escaped('&'));
+  EXPECT_THAT(escape("foo ¯ bar"), escaped('&'));
+  EXPECT_THAT(escape("foo &?; bar"), escapedNone());
+
+  // Links.
+  EXPECT_THAT(escape("[foo](bar)"), escaped(']'));
+  EXPECT_THAT(escape("[foo]: bar"), escaped(']'));
+  // No need to escape these, as the target never exists.
+  EXPECT_THAT(escape("[foo][]"), escapedNone());
+  EXPECT_THAT(escape("[foo][bar]"), escapedNone());
+  EXPECT_THAT(escape("[foo]"), escapedNone());
 
   // In code blocks we don't need to escape ASCII punctuation.
-  P = Paragraph();
+  Paragraph P = Paragraph();
   P.appendCode("* foo !+ bar * baz");
   EXPECT_EQ(P.asMarkdown(), "`* foo !+ bar * b

[PATCH] D75687: [clangd] Only minimally escape text when rendering to markdown.

2020-03-06 Thread Kadir Cetinkaya via Phabricator via cfe-commits
kadircet added a comment.

Just writing down my investigation results for context:
Looks like we'll never escape `$%'(,/:;?@[^{|}` anymore.

Markdown already doesn't provide backslash escaping for `$%',/:;?@^|` which 
leaves us with parentheses `([{}`:

- `[` looks fine as it is only used for hyperlinks and we escape the `]` if 
need be
- '(' again seems to be only special for providing a text for hyperlinks, so 
that should also be OK.
- `{}` apparently doesn't do anything in normal markdown but some extensions 
make use of it for adding `id`s to headers, i.e `# header1 {#mycrazyheader}`, 
so again should be OK.




Comment at: clang-tools-extra/clangd/FormattedString.cpp:69
+return false;
+  if (Contents.front() == '!' || Contents.front() == '?' ||
+  Contents.front() == '/')

what is the `?` for ?



Comment at: clang-tools-extra/clangd/FormattedString.cpp:73
+  // Check the start of the tag name.
+  if (Contents.empty() || !llvm::isAlpha(Contents.front()))
+return false;

we early exited already in case of empty `contents`



Comment at: clang-tools-extra/clangd/FormattedString.cpp:130
+  case '#': { // ATX heading.
+if (!(StartsLine && Before.empty()))
+  return false;

nit: if(!StartsLine || !Before.empty()))



Comment at: clang-tools-extra/clangd/FormattedString.cpp:132
+  return false;
+llvm::StringRef Rest = After.ltrim(C);
+return Rest.empty() || Rest.startswith(" ");

nit: After.ltrim('#')



Comment at: clang-tools-extra/clangd/FormattedString.cpp:133
+llvm::StringRef Rest = After.ltrim(C);
+return Rest.empty() || Rest.startswith(" ");
+  }

markdown is weird :(



Comment at: clang-tools-extra/clangd/FormattedString.cpp:139
+//   ]: is a link reference
+// The following are only links if the a link reference exists:
+//   ] by itself is a shortcut link

s/the a/the/



Comment at: clang-tools-extra/clangd/FormattedString.cpp:150
+// Not a delimiter if surrounded by space.
+return !SpaceSurrounds();
+  case '-': // Setex heading, horizontal ruler, or bullet.

`_` seems to behave different than `*` :(

it seems to rather depend on the spaces around the text being emphasized, i.e

```
foo _ bar _ foo -> no emphasis
foo _ bar_ foo -> no emphasis
foo _bar_ foo -> emphasis on bar
foo_bar_ foo -> no emphasis
```

so this should rather be `Before.endswith(" ") && isAlpha(After)` for the 
beginning of emphasis and the opposite for the ending.
Not sure if there's an easy way to decide on it in isolation.



Comment at: clang-tools-extra/clangd/FormattedString.cpp:162
+  return true;
+return !SpaceSurrounds();
+  case '<': // HTML tag (or autolink, which we choose not to escape)

nit: `return IsBullet() || RulerLength() || !SpaceSurrounds()`;



Comment at: clang-tools-extra/clangd/FormattedString.cpp:164
+  case '<': // HTML tag (or autolink, which we choose not to escape)
+return looksLikeTag(After);
+  case '>': // Quote marker. Needs escaping at start of line.

is this really worth all the trouble ?



Comment at: clang-tools-extra/clangd/FormattedString.cpp:166
+  case '>': // Quote marker. Needs escaping at start of line.
+return StartsLine && Before.empty();
+  case '&': { // HTML entity reference

I believe it is also allowed to have whitespaces(less than 4?) before the `>` 
to be interpreted as a quote marker.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75687/new/

https://reviews.llvm.org/D75687



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D75687: [clangd] Only minimally escape text when rendering to markdown.

2020-03-05 Thread Sam McCall via Phabricator via cfe-commits
sammccall updated this revision to Diff 248493.
sammccall added a comment.

couple more tests


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75687/new/

https://reviews.llvm.org/D75687

Files:
  clang-tools-extra/clangd/FormattedString.cpp
  clang-tools-extra/clangd/FormattedString.h
  clang-tools-extra/clangd/unittests/FormattedStringTests.cpp
  clang-tools-extra/clangd/unittests/HoverTests.cpp

Index: clang-tools-extra/clangd/unittests/HoverTests.cpp
===
--- clang-tools-extra/clangd/unittests/HoverTests.cpp
+++ clang-tools-extra/clangd/unittests/HoverTests.cpp
@@ -1905,7 +1905,7 @@
   llvm::StringRef ExpectedMarkdown = R"md(### variable `foo`  
 
 ---
-Value \= `val`  
+Value = `val`  
 
 ---
 ```cpp
Index: clang-tools-extra/clangd/unittests/FormattedStringTests.cpp
===
--- clang-tools-extra/clangd/unittests/FormattedStringTests.cpp
+++ clang-tools-extra/clangd/unittests/FormattedStringTests.cpp
@@ -17,25 +17,94 @@
 namespace markup {
 namespace {
 
-TEST(Render, Escaping) {
-  // Check some ASCII punctuation
-  Paragraph P;
-  P.appendText("*!`");
-  EXPECT_EQ(P.asMarkdown(), "\\*\\!\\`");
+std::string escape(llvm::StringRef Text) {
+  return Paragraph().appendText(Text.str()).asMarkdown();
+}
+
+MATCHER_P(escaped, C, "") {
+  return testing::ExplainMatchResult(::testing::HasSubstr(std::string{'\\', C}),
+ arg, result_listener);
+}
 
+MATCHER(escapedNone, "") {
+  return testing::ExplainMatchResult(::testing::Not(::testing::HasSubstr("\\")),
+ arg, result_listener);
+}
+
+TEST(Render, Escaping) {
   // Check all ASCII punctuation.
-  P = Paragraph();
   std::string Punctuation = R"txt(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)txt";
-  // Same text, with each character escaped.
-  std::string EscapedPunctuation;
-  EscapedPunctuation.reserve(2 * Punctuation.size());
-  for (char C : Punctuation)
-EscapedPunctuation += std::string("\\") + C;
-  P.appendText(Punctuation);
-  EXPECT_EQ(P.asMarkdown(), EscapedPunctuation);
+  std::string EscapedPunc = R"txt(!"#$%&'()\*+,-./:;<=>?@[\\]^\_\`{|}~)txt";
+  EXPECT_EQ(escape(Punctuation), EscapedPunc);
+
+  // Inline code
+  EXPECT_EQ(escape("`foo`"), R"(\`foo\`)");
+  EXPECT_EQ(escape("`foo"), R"(\`foo)");
+  EXPECT_EQ(escape("foo`"), R"(foo\`)");
+  EXPECT_EQ(escape("``foo``"), R"(\`\`foo\`\`)");
+  // Code blocks
+  EXPECT_EQ(escape("```"), R"(\`\`\`)"); // This could also be inline code!
+  EXPECT_EQ(escape("~~~"), R"(\~~~)");
+
+  // Rulers and headings
+  EXPECT_THAT(escape("## Heading"), escaped('#'));
+  EXPECT_THAT(escape("Foo # bar"), escapedNone());
+  EXPECT_EQ(escape("---"), R"(\---)");
+  EXPECT_EQ(escape("-"), R"(\-)");
+  EXPECT_EQ(escape("==="), R"(\===)");
+  EXPECT_EQ(escape("="), R"(\=)");
+  EXPECT_EQ(escape("***"), R"(\*\*\*)"); // \** could start emphasis!
+
+  // HTML tags.
+  EXPECT_THAT(escape(""), escaped('<'));
+  EXPECT_THAT(escape("std::vector"), escaped('<'));
+  EXPECT_THAT(escape("std::map"), escapedNone());
+  // Autolinks
+  EXPECT_THAT(escape("Email "), escapedNone());
+  EXPECT_THAT(escape("Website "), escapedNone());
+
+  // Bullet lists.
+  EXPECT_THAT(escape("- foo"), escaped('-'));
+  EXPECT_THAT(escape("* foo"), escaped('*'));
+  EXPECT_THAT(escape("+ foo"), escaped('+'));
+  EXPECT_THAT(escape("+"), escaped('+'));
+  EXPECT_THAT(escape("a + foo"), escapedNone());
+  EXPECT_THAT(escape("a+ foo"), escapedNone());
+  EXPECT_THAT(escape("1. foo"), escaped('.'));
+  EXPECT_THAT(escape("a. foo"), escapedNone());
+
+  // Emphasis.
+  EXPECT_EQ(escape("*foo*"), R"(\*foo\*)");
+  EXPECT_EQ(escape("**foo**"), R"(\*\*foo\*\*)");
+  EXPECT_THAT(escape("*foo"), escaped('*'));
+  EXPECT_THAT(escape("foo *"), escapedNone());
+  EXPECT_THAT(escape("foo * bar"), escapedNone());
+  EXPECT_THAT(escape("foo_bar"), escaped('_'));
+  EXPECT_THAT(escape("foo _ bar"), escapedNone());
+
+  // HTML entities.
+  EXPECT_THAT(escape("fish &chips;"), escaped('&'));
+  EXPECT_THAT(escape("fish & chips;"), escapedNone());
+  EXPECT_THAT(escape("fish &chips"), escapedNone());
+  EXPECT_THAT(escape("foo * bar"), escaped('&'));
+  EXPECT_THAT(escape("foo ¯ bar"), escaped('&'));
+  EXPECT_THAT(escape("foo &?; bar"), escapedNone());
+
+  // Links.
+  EXPECT_THAT(escape("[foo](bar)"), escaped(']'));
+  EXPECT_THAT(escape("[foo]: bar"), escaped(']'));
+  // No need to escape these, as the target never exists.
+  EXPECT_THAT(escape("[foo][]"), escapedNone());
+  EXPECT_THAT(escape("[foo][bar]"), escapedNone());
+  EXPECT_THAT(escape("[foo]"), escapedNone());
 
   // In code blocks we don't need to escape ASCII punctuation.
-  P = Paragraph();
+  Paragraph P = Paragraph();
   P.appendCode("* foo !+ bar * baz");
   EXPECT_EQ(P.asMarkdown(), "`* foo !+ bar * baz`");
 
Index: clang-tools-extra/clangd/FormattedS

[PATCH] D75687: [clangd] Only minimally escape text when rendering to markdown.

2020-03-05 Thread Sam McCall via Phabricator via cfe-commits
sammccall created this revision.
sammccall added a reviewer: kadircet.
Herald added subscribers: cfe-commits, usaxena95, arphaman, jkorous, MaskRay, 
ilya-biryukov.
Herald added a project: clang.

Conservatively escaping everything is bad in coc.nvim which shows the markdown
to the user, and we have reports of it causing problems for other parsers.

Fixes https://github.com/clangd/clangd/issues/301


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D75687

Files:
  clang-tools-extra/clangd/FormattedString.cpp
  clang-tools-extra/clangd/FormattedString.h
  clang-tools-extra/clangd/unittests/FormattedStringTests.cpp
  clang-tools-extra/clangd/unittests/HoverTests.cpp

Index: clang-tools-extra/clangd/unittests/HoverTests.cpp
===
--- clang-tools-extra/clangd/unittests/HoverTests.cpp
+++ clang-tools-extra/clangd/unittests/HoverTests.cpp
@@ -1905,7 +1905,7 @@
   llvm::StringRef ExpectedMarkdown = R"md(### variable `foo`  
 
 ---
-Value \= `val`  
+Value = `val`  
 
 ---
 ```cpp
Index: clang-tools-extra/clangd/unittests/FormattedStringTests.cpp
===
--- clang-tools-extra/clangd/unittests/FormattedStringTests.cpp
+++ clang-tools-extra/clangd/unittests/FormattedStringTests.cpp
@@ -17,25 +17,91 @@
 namespace markup {
 namespace {
 
-TEST(Render, Escaping) {
-  // Check some ASCII punctuation
-  Paragraph P;
-  P.appendText("*!`");
-  EXPECT_EQ(P.asMarkdown(), "\\*\\!\\`");
+std::string escape(llvm::StringRef Text) {
+  return Paragraph().appendText(Text.str()).asMarkdown();
+}
+
+MATCHER_P(escaped, C, "") {
+  return testing::ExplainMatchResult(::testing::HasSubstr(std::string{'\\', C}),
+ arg, result_listener);
+}
 
+MATCHER(escapedNone, "") {
+  return testing::ExplainMatchResult(::testing::Not(::testing::HasSubstr("\\")),
+ arg, result_listener);
+}
+
+TEST(Render, Escaping) {
   // Check all ASCII punctuation.
-  P = Paragraph();
   std::string Punctuation = R"txt(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)txt";
-  // Same text, with each character escaped.
-  std::string EscapedPunctuation;
-  EscapedPunctuation.reserve(2 * Punctuation.size());
-  for (char C : Punctuation)
-EscapedPunctuation += std::string("\\") + C;
-  P.appendText(Punctuation);
-  EXPECT_EQ(P.asMarkdown(), EscapedPunctuation);
+  std::string EscapedPunc = R"txt(!"#$%&'()\*+,-./:;<=>?@[\\]^\_\`{|}~)txt";
+  EXPECT_EQ(escape(Punctuation), EscapedPunc);
+
+  // Inline code
+  EXPECT_EQ(escape("`foo`"), R"(\`foo\`)");
+  EXPECT_EQ(escape("`foo"), R"(\`foo)");
+  EXPECT_EQ(escape("foo`"), R"(foo\`)");
+  EXPECT_EQ(escape("``foo``"), R"(\`\`foo\`\`)");
+  // Code blocks
+  EXPECT_EQ(escape("```"), R"(\`\`\`)"); // This could also be inline code!
+  EXPECT_EQ(escape("~~~"), R"(\~~~)");
+
+  // Rulers and headings
+  EXPECT_THAT(escape("## Heading"), escaped('#'));
+  EXPECT_THAT(escape("Foo # bar"), escapedNone());
+  EXPECT_EQ(escape("---"), R"(\---)");
+  EXPECT_EQ(escape("-"), R"(\-)");
+  EXPECT_EQ(escape("==="), R"(\===)");
+  EXPECT_EQ(escape("="), R"(\=)");
+  EXPECT_EQ(escape("***"), R"(\*\*\*)"); // \** could start emphasis!
+
+  // HTML tags.
+  EXPECT_THAT(escape(""), escapedNone());
+  EXPECT_THAT(escape("Website "), escapedNone());
+
+  // Bullet lists.
+  EXPECT_THAT(escape("- foo"), escaped('-'));
+  EXPECT_THAT(escape("* foo"), escaped('*'));
+  EXPECT_THAT(escape("+ foo"), escaped('+'));
+  EXPECT_THAT(escape("+"), escaped('+'));
+  EXPECT_THAT(escape("a + foo"), escapedNone());
+  EXPECT_THAT(escape("a+ foo"), escapedNone());
+  EXPECT_THAT(escape("1. foo"), escaped('.'));
+  EXPECT_THAT(escape("a. foo"), escapedNone());
+
+  // Emphasis.
+  EXPECT_EQ(escape("*foo*"), R"(\*foo\*)");
+  EXPECT_EQ(escape("**foo**"), R"(\*\*foo\*\*)");
+  EXPECT_THAT(escape("*foo"), escaped('*'));
+  EXPECT_THAT(escape("foo *"), escapedNone());
+  EXPECT_THAT(escape("foo * bar"), escapedNone());
+  EXPECT_THAT(escape("foo_bar"), escaped('_'));
+  EXPECT_THAT(escape("foo _ bar"), escapedNone());
+
+  // HTML entities.
+  EXPECT_THAT(escape("fish &chips;"), escaped('&'));
+  EXPECT_THAT(escape("fish & chips;"), escapedNone());
+  EXPECT_THAT(escape("fish &chips"), escapedNone());
+  EXPECT_THAT(escape("foo * bar"), escaped('&'));
+  EXPECT_THAT(escape("foo ¯ bar"), escaped('&'));
+  EXPECT_THAT(escape("foo &?; bar"), escapedNone());
+
+  // Links.
+  EXPECT_THAT(escape("[foo](bar)"), escaped(']'));
+  EXPECT_THAT(escape("[foo]: bar"), escaped(']'));
+  // No need to escape these, as the target never exists.
+  EXPECT_THAT(escape("[foo][]"), escapedNone());
+  EXPECT_THAT(escape("[foo][bar]"), escapedNone());
+  EXPECT_THAT(escape("[foo]"), escapedNone());
 
   // In code blocks we don't need to escape ASCII punctuation.
-  P = Paragraph();
+  Paragraph P = Paragraph();
   P.appendCode("* foo !+ bar * baz");
   EXPECT_EQ(P.asMarkdow