From 4894de4996e3b8281d6fc3c68d0caf234cbd1790 Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka Date: Thu, 20 Feb 2014 21:58:49 +0900 Subject: [PATCH] Use mbrtowc_cache in DFA engine. --- src/dfa.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 3 deletions(-) diff --git a/src/dfa.c b/src/dfa.c index 8906ed3..c073cf3 100644 --- a/src/dfa.c +++ b/src/dfa.c @@ -419,6 +419,13 @@ struct dfa the dfa. */ }; +struct mbrtowc_cache { + size_t length; + wchar_t wchar; +}; + +static struct mbrtowc_cache mbrtowc_cache[NOTCHAR]; + /* Some macros for user access to dfa internals. */ /* ACCEPTING returns true if s could possibly be an accepting state of r. */ @@ -822,12 +829,22 @@ static unsigned char const *buf_end; /* reference to end in dfaexec. */ else \ { \ wchar_t _wc; \ - cur_mb_len = mbrtowc (&_wc, lexptr, lexleft, &mbs); \ + bool use_cache = false; \ + cur_mb_len = mbrtowc_cache[to_uchar (*lexptr)].length; \ + if (cur_mb_len != (size_t) -2) \ + { \ + _wc = mbrtowc_cache[to_uchar (*lexptr)].wchar; \ + use_cache = true; \ + } \ + else \ + cur_mb_len = mbrtowc (&_wc, lexptr, lexleft, &mbs); \ if (cur_mb_len <= 0) \ { \ cur_mb_len = 1; \ --lexleft; \ (wc) = (c) = to_uchar (*lexptr++); \ + if (!use_cache) \ + memset (&mbs, 0, sizeof mbs); \ } \ else \ { \ @@ -3271,8 +3288,19 @@ prepare_wc_buf (const char *begin, const char *end) { if (remain_bytes == 0) { - remain_bytes - = mbrtowc (inputwcs + i, begin + i, end - begin - i + 1, &mbs); + bool use_cache = false; + remain_bytes = mbrtowc_cache[to_uchar (begin[i])].length; + if (remain_bytes != (size_t) -2) + { + inputwcs[i] = mbrtowc_cache[to_uchar (begin[i])].wchar; + use_cache = true; + } + else + { + remain_bytes + = mbrtowc (inputwcs + i, begin + i, end - begin - i + 1, &mbs); + } + if (remain_bytes < 1 || remain_bytes == (size_t) -1 || remain_bytes == (size_t) -2 @@ -3281,6 +3309,8 @@ prepare_wc_buf (const char *begin, const char *end) remain_bytes = 0; inputwcs[i] = (wchar_t) begin[i]; mblen_buf[i] = 0; + if (!use_cache) + memset (&mbs, 0, sizeof mbs); if (begin[i] == eol) break; } @@ -3545,11 +3575,26 @@ dfaoptimize (struct dfa *d) d->mb_cur_max = 1; } +void +build_mbrtowc_cache (void) +{ + int i; + + for (i = CHAR_MIN; i <= CHAR_MAX; ++i) + { + char c = i; + unsigned char uc = i; + mbstate_t s = { 0 }; + mbrtowc_cache[uc].length = mbrtowc (&mbrtowc_cache[uc].wchar, &c, 1, &s); + } +} + /* Parse and analyze a single string of the given length. */ void dfacomp (char const *s, size_t len, struct dfa *d, int searchflag) { dfainit (d); + build_mbrtowc_cache (); dfaparse (s, len, d); dfamust (d); dfaoptimize (d); -- 1.8.5.2