I believe that memchr() isn't slower on HP-UX and Solaris but more
faster on x86 processors.

Now I wrote the additional patch.

It replaces `tp += d' to the other code in bmexec() and cwexec() for x86
processors.

I see that the code is more complex and slower theoretically, but it's
faster actually when `d' is small.  (We can test it by `grep -i jk k'
after patch#17230 and this patch.)

Therefore, I think that memchr() is faster than `tp += d' because the
increment instruction is more faster than the add instruction on x86
processors.  KWSet may be slower than DFA on them in the worst case,
even if doesn't use delta2.  Try to run and compare below and compare
without this patches.  The former uses only KWSet and the later uses only
DFA.  Later is faster on Linux x86, because DFA uses increment instrument
for shift of text pointer.  It may generate a wrong usage among users.
I think that it should be avoided.

  $ yes jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj | head -10000000 >k
  $ env LANG=C time -p src/grep jk k
  $ env LANG=C time -p src/grep -i jk k

> sometimes the former is more important than the latter, and this may be
> one of those times.

I see that grep attaches a high value to performance.  It uses not only
regex but also DFA.  Further more, DFA has the specific codes for utf-8
locale, which is used most frequently.  I think that grep may also have
specific codes for x86 processors, which are also used most frequently.

Norihiro
From 70fd81f7cf7311589b143e0adaa2aad41e7ab0da Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <[email protected]>
Date: Thu, 10 Apr 2014 20:52:54 +0900
Subject: [PATCH 3/3] grep: speed-up by replacing `incr' to `add' in x86 and
 x86-64

The increment instrument is more faster than the add instuction in x86
and x86-64 architecture.  So prefer the add instrument to the increment
instrument for them.
* src/kwset.c (SHIFT): New macro.  It uses the increment instrument
instead of the add instrument for x86 and x86-64.
(bmexec, cwexec): Use it.
---
 src/kwset.c | 92 +++++++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 62 insertions(+), 30 deletions(-)

diff --git a/src/kwset.c b/src/kwset.c
index c95bc56..998aab7 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -55,6 +55,35 @@
 
 #define U(c) (to_uchar (c))
 
+#if defined(__i386__) || defined(__x86_64__)
+#define SHIFT(p, d)    \
+  switch (d)           \
+    {                  \
+    default:           \
+      p += d;          \
+      break;           \
+    case 8:            \
+      ++p;             \
+    case 7:            \
+      ++p;             \
+    case 6:            \
+      ++p;             \
+    case 5:            \
+      ++p;             \
+    case 4:            \
+      ++p;             \
+    case 3:            \
+      ++p;             \
+    case 2:            \
+      ++p;             \
+    case 1:            \
+      ++p;             \
+    }
+#else
+#define SHIFT(p, d)    \
+  p += d;
+#endif
+
 /* Balanced tree of edges and labels leaving a given trie node. */
 struct tree
 {
@@ -517,18 +546,18 @@ bmexec (kwset_t kwset, char const *text, size_t size)
       {
         while (tp <= ep)
           {
-            d = d1[U(tp[-1])], tp += d;
-            d = d1[U(tp[-1])], tp += d;
+            d = d1[U(tp[-1])]; SHIFT(tp, d);
+            d = d1[U(tp[-1])]; SHIFT(tp, d);
             if (d == 0)
               goto found;
-            d = d1[U(tp[-1])], tp += d;
-            d = d1[U(tp[-1])], tp += d;
-            d = d1[U(tp[-1])], tp += d;
+            d = d1[U(tp[-1])]; SHIFT(tp, d);
+            d = d1[U(tp[-1])]; SHIFT(tp, d);
+            d = d1[U(tp[-1])]; SHIFT(tp, d);
             if (d == 0)
               goto found;
-            d = d1[U(tp[-1])], tp += d;
-            d = d1[U(tp[-1])], tp += d;
-            d = d1[U(tp[-1])], tp += d;
+            d = d1[U(tp[-1])]; SHIFT(tp, d);
+            d = d1[U(tp[-1])]; SHIFT(tp, d);
+            d = d1[U(tp[-1])]; SHIFT(tp, d);
             if (d == 0)
               goto found;
             /* memchar() of glibc is faster than seeking by delta1 on
@@ -549,8 +578,8 @@ bmexec (kwset_t kwset, char const *text, size_t size)
             else
 #endif
               {
-                d = d1[U(tp[-1])], tp += d;
-                d = d1[U(tp[-1])], tp += d;
+                d = d1[U(tp[-1])]; SHIFT(tp, d);
+                d = d1[U(tp[-1])]; SHIFT(tp, d);
               }
           }
         break;
@@ -573,24 +602,24 @@ bmexec (kwset_t kwset, char const *text, size_t size)
                         if (i > len)
                           return tp - len - text;
                       }
-                    d = kwset->shift[i - 2]; tp += d;
+                    d = kwset->shift[i - 2]; SHIFT(tp, d);
                     if (tp > ep)
                       break;
                     if (trans[U(tp[-1])] != gc1)
                       {
-                        d = d1[U(tp[-1])], tp += d;
+                        d = d1[U(tp[-1])]; SHIFT(tp, d);
                         break;
                       }
                     skip = i - 1;
                   }
                 else
                   {
-                    d = kwset->shift[0]; tp += d;
+                    d = kwset->shift[0]; SHIFT(tp, d);
                     if (tp > ep)
                       break;
                     if (trans[U(tp[-1])] != gc1)
                       {
-                        d = d1[U(tp[-1])], tp += d;
+                        d = d1[U(tp[-1])]; SHIFT(tp, d);
                         break;
                       }
                     skip = 1;
@@ -614,24 +643,24 @@ bmexec (kwset_t kwset, char const *text, size_t size)
                         if (i > len)
                           return tp - len - text;
                       }
-                    d = kwset->shift[i - 2]; tp += d;
+                    d = kwset->shift[i - 2]; SHIFT(tp, d);
                     if (tp > ep)
                       break;
                     if (tp[-1] != gc1)
                       {
-                        d = d1[U(tp[-1])], tp += d;
+                        d = d1[U(tp[-1])]; SHIFT(tp, d);
                         break;
                       }
                     skip = i - 1;
                   }
                 else
                   {
-                    d = kwset->shift[0]; tp += d;
+                    d = kwset->shift[0]; SHIFT(tp, d);
                     if (tp > ep)
                       break;
                     if (tp[-1] != gc1)
                       {
-                        d = d1[U(tp[-1])], tp += d;
+                        d = d1[U(tp[-1])]; SHIFT(tp, d);
                         break;
                       }
                     skip = 1;
@@ -646,7 +675,8 @@ bmexec (kwset_t kwset, char const *text, size_t size)
   d = d1[U(tp[-1])];
   while (d <= ep - tp)
     {
-      d = d1[U((tp += d)[-1])];
+      SHIFT(tp, d);
+      d = d1[U(tp[-1])];
       if (d != 0)
         continue;
       d = len;
@@ -667,7 +697,7 @@ bmexec (kwset_t kwset, char const *text, size_t size)
                       if (i > len)
                         return tp - len - text;
                     }
-                  d = kwset->shift[i - 2]; tp += d;
+                  d = kwset->shift[i - 2]; SHIFT(tp, d);
                   if (tp > ep)
                     break;
                   if (trans[U(tp[-1])] != gc1)
@@ -679,7 +709,7 @@ bmexec (kwset_t kwset, char const *text, size_t size)
                 }
               else
                 {
-                  d = kwset->shift[0]; tp += d;
+                  d = kwset->shift[0]; SHIFT(tp, d);
                   if (tp > ep)
                     break;
                   if (trans[U(tp[-1])] != gc1)
@@ -708,7 +738,7 @@ bmexec (kwset_t kwset, char const *text, size_t size)
                       if (i > len)
                         return tp - len - text;
                     }
-                  d = kwset->shift[i - 2]; tp += d;
+                  d = kwset->shift[i - 2]; SHIFT(tp, d);
                   if (tp > ep)
                     break;
                   if (tp[-1] != gc1)
@@ -720,7 +750,7 @@ bmexec (kwset_t kwset, char const *text, size_t size)
                 }
               else
                 {
-                  d = kwset->shift[0]; tp += d;
+                  d = kwset->shift[0]; SHIFT(tp, d);
                   if (tp > ep)
                     break;
                   if (tp[-1] != gc1)
@@ -781,17 +811,18 @@ cwexec (kwset_t kwset, char const *text, size_t len, 
struct kwsmatch *kwsmatch)
     {
       if (qlim && end <= qlim)
         {
-          end += d - 1;
           while ((d = delta[c = *end]) && end < qlim)
             {
-              end += d;
-              end += delta[U(*end)];
-              end += delta[U(*end)];
+              SHIFT(end, d);
+              d = delta[U(end[-1])]; SHIFT(end, d);
+              d = delta[U(end[-1])]; SHIFT(end, d);
             }
-          ++end;
         }
       else
-        d = delta[c = (end += d)[-1]];
+        {
+          SHIFT(end, d);
+          d = delta[c = end[-1]];
+        }
       if (d)
         continue;
       beg = end - 1;
@@ -840,7 +871,8 @@ cwexec (kwset_t kwset, char const *text, size_t len, struct 
kwsmatch *kwsmatch)
   d = 1;
   while (lim - end >= d)
     {
-      if ((d = delta[c = (end += d)[-1]]) != 0)
+      SHIFT(end, d);
+      if ((d = delta[c = end[-1]]) != 0)
         continue;
       beg = end - 1;
       if (!(trie = next[c]))
-- 
1.9.2

Reply via email to