From 94ed3e615b89d7f94e3f05aa957523a884dd4ec1 Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <noritnk@kcn.ne.jp>
Date: Sun, 13 Jan 2019 07:53:32 +0900
Subject: [PATCH] grep: speed up multiple word matching

grep uses its KWset matcher for multiple word matching, but that is
very slow when most of the parts matched to a pattern are not words.
So, if the first match to a pattern is not a word, use the grep matcher
to match for its line.

Note that when START_PTR is set, the grep matcher uses the regex matcher
which is very slow to match words.  Therefore, we use the grep matcher
when only START_PTR is NULL.

* src/kwsearch.c (Fexecute): If an initial match is incomplete because
not on a word boundary, use the grep matcher to find a matching line.
---
 src/kwsearch.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/src/kwsearch.c b/src/kwsearch.c
index f121816..7644350 100644
--- a/src/kwsearch.c
+++ b/src/kwsearch.c
@@ -250,6 +250,23 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
                 else
                   goto success;
               }
+            if (!start_ptr && !localeinfo.multibyte)
+              {
+                if (! kwsearch->re)
+                  {
+                    fgrep_to_grep_pattern (&kwsearch->pattern, &kwsearch->size);
+                    kwsearch->re = GEAcompile (kwsearch->pattern,
+                                               kwsearch->size,
+                                               RE_SYNTAX_GREP);
+                  }
+                end = memchr (beg + len, eol, (buf + size) - (beg + len));
+                end = end ? end + 1 : buf + size;
+                if (EGexecute (kwsearch->re, beg, end - beg, match_size, NULL)
+                    != (size_t) -1)
+                  goto success_match_words;
+                beg = end - 1;
+                break;
+              }
             if (!len)
               break;
             offset = kwsexec (kwset, beg, --len, &kwsmatch, true);
@@ -270,6 +287,7 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
  success:
   end = memchr (beg + len, eol, (buf + size) - (beg + len));
   end = end ? end + 1 : buf + size;
+ success_match_words:
   beg = memrchr (buf, eol, beg - buf);
   beg = beg ? beg + 1 : buf;
   len = end - beg;
-- 
2.24.0.390.g083378cc35

