* src/dfa.c: move global variables holding parser state (`tok' and
`depth') into a new struct (`struct parser_state') and add an instance
of it to struct dfa. All references to the globals are replaced by
references to the dfa struct's new member.
---
src/dfa.c | 92 ++++++++++++++++++++++++++++++++++++---------------------------
1 file changed, 52 insertions(+), 40 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index d100578..858bc55 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -351,6 +351,18 @@ struct lexer_state
MB_CUR_MAX > 1. */
};
+/* Recursive descent parser for regular expressions. */
+
+struct parser_state
+{
+ token tok; /* Lookahead token. */
+ size_t depth; /* Current depth of a hypothetical stack
+ holding deferred productions. This is
+ used to determine the depth that will be
+ required of the real stack later on in
+ dfaanalyze. */
+};
+
/* A compiled regular expression. */
struct dfa
{
@@ -362,6 +374,9 @@ struct dfa
/* Scanner state */
struct lexer_state lexstate;
+ /* Parser state */
+ struct parser_state parsestate;
+
/* Fields filled by the parser. */
token *tokens; /* Postfix parse array. */
size_t tindex; /* Index for adding new tokens. */
@@ -1584,15 +1599,6 @@ lex (struct dfa *dfa)
return END; /* keeps pedantic compilers happy. */
}
-/* Recursive descent parser for regular expressions. */
-
-static token tok; /* Lookahead token. */
-static size_t depth; /* Current depth of a hypothetical stack
- holding deferred productions. This is
- used to determine the depth that will be
- required of the real stack later on in
- dfaanalyze. */
-
static void
addtok_mb (struct dfa *dfa, token t, int mbprop)
{
@@ -1617,7 +1623,7 @@ addtok_mb (struct dfa *dfa, token t, int mbprop)
case CAT:
case OR:
- --depth;
+ --dfa->parsestate.depth;
break;
case BACKREF:
@@ -1627,11 +1633,11 @@ addtok_mb (struct dfa *dfa, token t, int mbprop)
++dfa->nleaves;
/* fallthrough */
case EMPTY:
- ++depth;
+ ++dfa->parsestate.depth;
break;
}
- if (depth > dfa->depth)
- dfa->depth = depth;
+ if (dfa->parsestate.depth > dfa->depth)
+ dfa->depth = dfa->parsestate.depth;
}
static void addtok_wc (struct dfa *dfa, wint_t wc);
@@ -1801,7 +1807,7 @@ add_utf8_anychar (struct dfa *dfa)
static void
atom (struct dfa *dfa)
{
- if (tok == WCHAR)
+ if (dfa->parsestate.tok == WCHAR)
{
if (dfa->lexstate.wctok == WEOF)
addtok (dfa, BACKREF);
@@ -1822,9 +1828,9 @@ atom (struct dfa *dfa)
}
}
- tok = lex (dfa);
+ dfa->parsestate.tok = lex (dfa);
}
- else if (tok == ANYCHAR && using_utf8 ())
+ else if (dfa->parsestate.tok == ANYCHAR && using_utf8 ())
{
/* For UTF-8 expand the period to a series of CSETs that define a valid
UTF-8 character. This avoids using the slow multibyte path. I'm
@@ -1834,23 +1840,26 @@ atom (struct dfa *dfa)
UTF-8: it is the most used, and the structure of the encoding
makes the correctness more obvious. */
add_utf8_anychar (dfa);
- tok = lex (dfa);
+ dfa->parsestate.tok = lex (dfa);
}
- else if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
- || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
- || tok == ANYCHAR || tok == MBCSET
- || tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD)
+ else if ((dfa->parsestate.tok >= 0 && dfa->parsestate.tok < NOTCHAR)
+ || dfa->parsestate.tok >= CSET || dfa->parsestate.tok == BACKREF
+ || dfa->parsestate.tok == BEGLINE || dfa->parsestate.tok == ENDLINE
+ || dfa->parsestate.tok == BEGWORD || dfa->parsestate.tok == ANYCHAR
+ || dfa->parsestate.tok == MBCSET || dfa->parsestate.tok == ENDWORD
+ || dfa->parsestate.tok == LIMWORD
+ || dfa->parsestate.tok == NOTLIMWORD)
{
- addtok (dfa, tok);
- tok = lex (dfa);
+ addtok (dfa, dfa->parsestate.tok);
+ dfa->parsestate.tok = lex (dfa);
}
- else if (tok == LPAREN)
+ else if (dfa->parsestate.tok == LPAREN)
{
- tok = lex (dfa);
+ dfa->parsestate.tok = lex (dfa);
regexp (dfa);
- if (tok != RPAREN)
+ if (dfa->parsestate.tok != RPAREN)
dfaerror (_("unbalanced ("));
- tok = lex (dfa);
+ dfa->parsestate.tok = lex (dfa);
}
else
addtok (dfa, EMPTY);
@@ -1898,8 +1907,10 @@ closure (struct dfa *dfa)
size_t tindex, ntokens;
atom (dfa);
- while (tok == QMARK || tok == STAR || tok == PLUS || tok == REPMN)
- if (tok == REPMN && (dfa->lexstate.minrep || dfa->lexstate.maxrep))
+ while (dfa->parsestate.tok == QMARK || dfa->parsestate.tok == STAR
+ || dfa->parsestate.tok == PLUS || dfa->parsestate.tok == REPMN)
+ if (dfa->parsestate.tok == REPMN
+ && (dfa->lexstate.minrep || dfa->lexstate.maxrep))
{
ntokens = nsubtoks (dfa, dfa->tindex);
tindex = dfa->tindex - ntokens;
@@ -1918,18 +1929,18 @@ closure (struct dfa *dfa)
addtok (dfa, QMARK);
addtok (dfa, CAT);
}
- tok = lex (dfa);
+ dfa->parsestate.tok = lex (dfa);
}
- else if (tok == REPMN)
+ else if (dfa->parsestate.tok == REPMN)
{
dfa->tindex -= nsubtoks (dfa, dfa->tindex);
- tok = lex (dfa);
+ dfa->parsestate.tok = lex (dfa);
closure (dfa);
}
else
{
- addtok (dfa, tok);
- tok = lex (dfa);
+ addtok (dfa, dfa->parsestate.tok);
+ dfa->parsestate.tok = lex (dfa);
}
}
@@ -1937,7 +1948,8 @@ static void
branch (struct dfa* dfa)
{
closure (dfa);
- while (tok != RPAREN && tok != OR && tok >= 0)
+ while (dfa->parsestate.tok != RPAREN && dfa->parsestate.tok != OR
+ && dfa->parsestate.tok >= 0)
{
closure (dfa);
addtok (dfa, CAT);
@@ -1948,9 +1960,9 @@ static void
regexp (struct dfa *dfa)
{
branch (dfa);
- while (tok == OR)
+ while (dfa->parsestate.tok == OR)
{
- tok = lex (dfa);
+ dfa->parsestate.tok = lex (dfa);
branch (dfa);
addtok (dfa, OR);
}
@@ -1976,12 +1988,12 @@ dfaparse (char const *s, size_t len, struct dfa *d)
if (!syntax_bits_set)
dfaerror (_("no syntax specified"));
- tok = lex (d);
- depth = d->depth;
+ d->parsestate.tok = lex (d);
+ d->parsestate.depth = d->depth;
regexp (d);
- if (tok != END)
+ if (d->parsestate.tok != END)
dfaerror (_("unbalanced )"));
addtok (d, END - d->nregexps);
--
2.8.1