From e506937dff5ebb976986a27d8cb84a68d41ce3a5 Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <noritnk@kcn.ne.jp>
Date: Fri, 14 Mar 2014 21:13:57 +0900
Subject: [PATCH] grep: optimization by using the Galil rule for Boyer-Moore
 algorithm in KWSet

The Boyer-Moore algorithm runs in O(m n) in the worst case,
 which perhaps it may be much slower than the DFA.

The Galil rule enables to change O(m n) into O(n) for its case without
overheads and/or slow-down for other cases by avoiding to compare more
than once for a position in the text.  This patch implements it.

I prepare following string, which makes a worst case for Boyer-Moore
algorithm, to measure the performance.

    yes jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj | head -10000000 > ../k

I run the test with the patch (best-of-5 trials):

    env LC_ALL=C time -p src/grep kjjjjjjjjjjjjjjjjjjj k
        real 0.70       user 0.32       sys 0.38

Back out that commit (temporarily), recompile, and rerun the experiment:

    env LC_ALL=C time -p src/grep kjjjjjjjjjjjjjjjjjjj k
        real 3.97       user 3.56       sys 0.40

* src/kwset.c (struct kwset): Replace member `mind2' to `shift'.
(kwsprep): Calculate shift values at the fail at each position.
(bmexec): Use it.
---
 src/kwset.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 70 insertions(+), 23 deletions(-)

diff --git a/src/kwset.c b/src/kwset.c
index 410e046..906db01 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -83,7 +83,7 @@ struct kwset
   unsigned char delta[NCHAR];	/* Delta table for rapid search. */
   struct trie *next[NCHAR];	/* Table of children of the root. */
   char *target;			/* Target string if there's only one. */
-  int mind2;			/* Used in Boyer-Moore search for one string. */
+  int *shift;			/* Used in Boyer-Moore search for one string. */
   char const *trans;		/* Character translation table. */
 };
 
@@ -385,7 +385,7 @@ const char *
 kwsprep (kwset_t kws)
 {
   struct kwset *kwset;
-  int i;
+  int i, j;
   struct trie *curr;
   char const *trans;
   unsigned char delta[NCHAR];
@@ -401,8 +401,6 @@ kwsprep (kwset_t kws)
      of the hairy commentz-walter algorithm. */
   if (kwset->words == 1 && kwset->trans == NULL)
     {
-      char c;
-
       /* Looking for just one string.  Extract it from the trie. */
       kwset->target = obstack_alloc(&kwset->obstack, kwset->mind);
       if (!kwset->target)
@@ -417,11 +415,23 @@ kwsprep (kwset_t kws)
         delta[U(kwset->target[i])] = kwset->mind - (i + 1);
       /* Find the minimal delta2 shift that we might make after
          a backwards match has failed. */
-      c = kwset->target[kwset->mind - 1];
-      for (i = kwset->mind - 2; i >= 0; --i)
-        if (kwset->target[i] == c)
-          break;
-      kwset->mind2 = kwset->mind - (i + 1);
+      kwset->shift = (int *) obstack_alloc(&kwset->obstack,
+                                           sizeof (*kwset->shift) * (kwset->mind - 1));
+      for (i = 1; i < kwset->mind; ++i)
+       {
+         for (j = i + 1; j <= kwset->mind; ++j)
+           if (memcmp (kwset->target + kwset->mind - j, kwset->target + kwset->mind - i, i) == 0)
+             break;
+         if (j <= kwset->mind)
+           kwset->shift[i - 1] = j - i;
+         else
+           {
+             for (j = i - 1; j >= 1; --j)
+               if (memcmp (kwset->target, kwset->target + kwset->mind - j, j) == 0)
+                 break;
+             kwset->shift[i - 1] = kwset->mind - j;
+           }
+       }
     }
   else
     {
@@ -503,7 +513,7 @@ bmexec (kwset_t kws, char const *text, size_t size)
   struct kwset const *kwset;
   unsigned char const *d1;
   char const *ep, *sp, *tp;
-  int d, gc, i, len, md2;
+  int d, gc, i, j, len;
 
   kwset = (struct kwset const *) kws;
   len = kwset->mind;
@@ -521,7 +531,6 @@ bmexec (kwset_t kws, char const *text, size_t size)
   d1 = kwset->delta;
   sp = kwset->target + len;
   gc = U(sp[-2]);
-  md2 = kwset->mind2;
   tp = text + len;
 
   /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
@@ -550,14 +559,33 @@ bmexec (kwset_t kws, char const *text, size_t size)
           }
         break;
       found:
-        if (U(tp[-2]) == gc)
+        j = 3;
+        while (1)
           {
-            for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
-              ;
-            if (i > len)
-              return tp - len - text;
+            if (U(tp[-2]) == gc)
+              {
+                for (i = 3; i <= d && tp[-i] == sp[-i]; ++i)
+                  ;
+                if (i > d)
+                  {
+                    for (i = j; i <= len && tp[-i] == sp[-i]; ++i)
+                      ;
+                    if (i > len)
+                      return tp - len - text;
+                  }
+                d = kwset->shift[i - 2]; tp += d;
+                if (tp > ep || tp[-1] != sp[-1])
+                  break;
+                j = d + i;
+              }
+            else
+              {
+                d = kwset->shift[0]; tp += d;
+                if (tp > ep || tp[-1] != sp[-1])
+                  break;
+                j = d + 2;
+              }
           }
-        tp += md2;
       }
 
   /* Now we have only a few characters left to search.  We
@@ -569,14 +597,33 @@ bmexec (kwset_t kws, char const *text, size_t size)
       d = d1[U((tp += d)[-1])];
       if (d != 0)
         continue;
-      if (U(tp[-2]) == gc)
+      j = 3;
+      while (1)
         {
-          for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
-            ;
-          if (i > len)
-            return tp - len - text;
+          if (U(tp[-2]) == gc)
+            {
+              for (i = 3; i <= d && tp[-i] == sp[-i]; ++i)
+                ;
+              if (i > d)
+                {
+                  for (i = j; i <= len && tp[-i] == sp[-i]; ++i)
+                    ;
+                  if (i > len)
+                    return tp - len - text;
+                }
+              d = kwset->shift[i - 2]; tp += d;
+              if (tp > ep || tp[-1] != sp[-1])
+                break;
+              j = d + i;
+            }
+          else
+            {
+              d = kwset->shift[0]; tp += d;
+              if (tp > ep || tp[-1] != sp[-1])
+                break;
+              j = d + 2;
+            }
         }
-      d = md2;
     }
 
   return -1;
-- 
1.9.0

