Thanks for the review and the fixes. I found a couple more things.
First, it's not portable to cast wint_t * to wchar_t *, since the
pointed-to types might be different sizes or representations. Second, we
can put the cache directly in the struct dfa, saving the overhead of
doing a separate malloc.
The attached further patch should address these problems. I pushed
this, along with the earlier two patches in this sequence, and am
marking this as done.
>From c51be43c00c8ae05a1a19d503865ef5f97ea6612 Mon Sep 17 00:00:00 2001
From: Paul Eggert <[email protected]>
Date: Fri, 28 Mar 2014 09:32:29 -0700
Subject: [PATCH 3/3] dfa: avoid an indirection and port wint_t usage
* src/dfa.c (struct dfa): Put mbrtowc_cache directly into struct dfa
rather than having a pointer; this saves a malloc and an indirection.
All uses changed.
(dfambcache): Port to hosts where wint_t * can't be cast to wchar_t *.
---
src/dfa.c | 31 ++++++++++++++++---------------
1 file changed, 16 insertions(+), 15 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index 1ca7f38..4ed2189 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -381,7 +381,7 @@ struct dfa
character (if any) for that byte. WEOF means the byte is the
leading byte of a multibyte character. Invalid and null bytes are
mapped to themselves. */
- wint_t *mbrtowc_cache;
+ wint_t mbrtowc_cache[NOTCHAR];
#endif
/* Array of the bracket expression in the DFA. */
@@ -490,38 +490,42 @@ dfambcache (struct dfa *d)
{
#if MBS_SUPPORT
int i;
- MALLOC (d->mbrtowc_cache, NOTCHAR);
for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
{
char c = i;
unsigned char uc = i;
mbstate_t s = { 0 };
- switch (mbrtowc ((wchar_t *) &d->mbrtowc_cache[uc], &c, 1, &s))
+ wchar_t wc;
+ wint_t wi;
+ switch (mbrtowc (&wc, &c, 1, &s))
{
- case (size_t) -2: d->mbrtowc_cache[uc] = WEOF; break;
- case (size_t) -1: d->mbrtowc_cache[uc] = uc; break;
+ default: wi = wc; break;
+ case (size_t) -2: wi = WEOF; break;
+ case (size_t) -1: wi = uc; break;
}
+ d->mbrtowc_cache[uc] = wi;
}
#endif
}
#if MBS_SUPPORT
-/* Store into *PWC the result of converting the leading bytes of the
- multibyte buffer S of length N bytes, updating the conversion state
- in *MBS. On conversion error, convert just a single byte as-is.
- Return the number of bytes converted.
+/* Given the dfa D, store into *PWC the result of converting the
+ leading bytes of the multibyte buffer S of length N bytes, updating
+ the conversion state in *MBS. On conversion error, convert just a
+ single byte as-is. Return the number of bytes converted.
This differs from mbrtowc (PWC, S, N, MBS) as follows:
+ * Extra arg D, containing an mbrtowc_cache for speed.
* N must be at least 1.
* S[N - 1] must be a sentinel byte.
* Shift encodings are not supported.
* The return value is always in the range 1..N.
* *MBS is always valid afterwards.
- * *PWC is always set to something.
- * This uses mbrtowc_cache for speed in the typical case. */
+ * *PWC is always set to something. */
static size_t
-mbs_to_wchar (struct dfa *d, wchar_t *pwc, char const *s, size_t n, mbstate_t *mbs)
+mbs_to_wchar (struct dfa *d, wchar_t *pwc, char const *s, size_t n,
+ mbstate_t *mbs)
{
unsigned char uc = s[0];
wint_t wc = d->mbrtowc_cache[uc];
@@ -3653,9 +3657,6 @@ dfafree (struct dfa *d)
if (d->mb_cur_max > 1)
free_mbdata (d);
-#if MBS_SUPPORT
- free (d->mbrtowc_cache);
-#endif
for (i = 0; i < d->sindex; ++i)
{
--
1.9.0