I changed the type of `has_backref' into `bool'.
Norihiro
From 11bf4318c360c29a3000afee8ee9f41ec431130e Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <[email protected]>
Date: Tue, 1 Apr 2014 23:48:16 +0900
Subject: [PATCH] grep: prefer regex to DFA for ANYCHAR in multi-byte locales
* src/dfa.c (dfaexec): prefer regex to for ANYCHAR in multi-byte locales.
---
src/dfa.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index b6fbd58..80725ba 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -302,7 +302,8 @@ typedef struct
size_t hash; /* Hash of the positions of this state. */
position_set elems; /* Positions this state could match. */
unsigned char context; /* Context from previous state. */
- char backref; /* True if this state matches a \<digit>. */
+ bool has_backref; /* True if this state matches a \<digit>. */
+ bool has_mbcset; /* True if this state matches a MBCSET. */
unsigned short constraint; /* Constraint for this state to accept. */
token first_end; /* Token value of the first END in elems. */
position_set mbps; /* Positions which can match multibyte
@@ -2161,7 +2162,7 @@ state_index (struct dfa *d, position_set const *s, int
context)
alloc_position_set (&d->states[i].elems, s->nelem);
copy (s, &d->states[i].elems);
d->states[i].context = context;
- d->states[i].backref = 0;
+ d->states[i].has_backref = false;
d->states[i].constraint = 0;
d->states[i].first_end = 0;
if (MBS_SUPPORT)
@@ -2181,7 +2182,7 @@ state_index (struct dfa *d, position_set const *s, int
context)
else if (d->tokens[s->elems[j].index] == BACKREF)
{
d->states[i].constraint = NO_CONSTRAINT;
- d->states[i].backref = 1;
+ d->states[i].has_backref = true;
}
++d->sindex;
@@ -2649,6 +2650,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
if (d->states[s].mbps.nelem == 0)
alloc_position_set (&d->states[s].mbps, 1);
insert (pos, &(d->states[s].mbps));
+ d->states[s].has_mbcset |= (d->tokens[pos.index] == MBCSET);
continue;
}
else
@@ -3454,7 +3456,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
better performance (up to 25% better on [a-z], for
example) and enables support for collating symbols and
equivalence classes. */
- if (backref)
+ if (d->states[s].has_mbcset && backref)
{
*backref = 1;
free (mblen_buf);
@@ -3490,7 +3492,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
if (d->success[s] & sbit[*p])
{
if (backref)
- *backref = (d->states[s].backref != 0);
+ *backref = d->states[s].has_backref;
if (d->mb_cur_max > 1)
{
free (mblen_buf);
--
1.9.1