Package: grep
Tags: patch

In MB_CUR_MAX == 1 and case-insensitive matching, KWSet doesn't so
helpful, because through parse alphabets of single-byte are changed into
CSET which isn't extracted by dfamust.

This patch opens CSET in dfamust, and makes it possible to take out
a longer character fixed string from tokens.

Norihiro
>From 7a67844524c0657fc395966536805d9736c0a88e Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <[email protected]>
Date: Tue, 18 Mar 2014 21:01:47 +0900
Subject: [PATCH] grep: open CSET and transform into the upper case when
 MB_CUR_MAX == 1 in dfamust

In MB_CUR_MAX == 1 and case-insensitive matching, KWSet doesn't so
helpful, because through parse alphabets of single-byte are changed into
CSET which isn't extracted by dfamust.

This patch opens CSET in dfamust, and makes it possible to take out
a longer character fixed string from tokens.

* src/dfa.c (dfamust): open CSET and transform into the upper case when
MB_CUR_MAX == 1 in dfamust.
---
 src/dfa.c | 34 ++++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index 5e60cd5..5258a21 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -4101,7 +4101,36 @@ dfamust (struct dfa *d)
               /* not on *my* shift */
               goto done;
             }
-          else if (t >= CSET || !MBS_SUPPORT || t == ANYCHAR || t == MBCSET)
+          else if (t >= CSET)
+            {
+              charclass ccl;
+              int j;
+              copyset (d->charclasses[t - CSET], ccl);
+              for (j = 0; j < NOTCHAR; ++j)
+                if (tstbit (j, ccl))
+                  break;
+              if (j < NOTCHAR)
+                {
+                  int c = (case_fold && MB_CUR_MAX == 1) ? toupper (j) : j;
+                  for (; j < NOTCHAR; j++)
+                    if (tstbit (j, ccl)
+                        && (!(case_fold && MB_CUR_MAX == 1) || c != toupper 
(j)))
+                      break;
+                  if (j < NOTCHAR)
+                    resetmust (mp);
+                  else
+                    {
+                      mp->is[0] = mp->left[0] = mp->right[0] = c;
+                      mp->is[1] = mp->left[1] = mp->right[1] = '\0';
+                      mp->in = enlist (mp->in, mp->is, (size_t) 1);
+                      if (mp->in == NULL)
+                        goto done;
+                    }
+                }
+              else
+                resetmust (mp);
+            }
+          else if (!MBS_SUPPORT || t == ANYCHAR || t == MBCSET)
             {
               /* easy enough */
               resetmust (mp);
@@ -4110,7 +4139,8 @@ dfamust (struct dfa *d)
             {
               /* plain character */
               resetmust (mp);
-              mp->is[0] = mp->left[0] = mp->right[0] = t;
+              mp->is[0] = mp->left[0] = mp->right[0] =
+                (case_fold && MB_CUR_MAX == 1) ? toupper (t) : t;
               mp->is[1] = mp->left[1] = mp->right[1] = '\0';
               mp->in = enlist (mp->in, mp->is, (size_t) 1);
               if (mp->in == NULL)
-- 
1.9.0

Reply via email to