Module Name: src
Committed By: christos
Date: Fri Feb 15 00:28:10 UTC 2013
Modified Files:
src/lib/libc/gen: vis.c
Log Message:
More fixes from: J.R. Oldroyd
- The input loop control that I changed yesterday to:
while (mbslength >= 0) {
There are circumstances where this causes an extra \000 to
be added at the end of some tests. This error was showing
in my own tests here, but I did not notice it yesterday.
(I really need to add my tests to the test suite, catching
every error by eye is hard.) To fix, I've now changed the
code to increment mbslength only if mbslength == 1 to start
with. (Note that this check for "== 1" is why the arg to
strvisx() in vis(1) must be 1, not mbilen.)
- The cast sequence when manually inserting bytes after a
multibyte conversion error:
*src = (wint_t)(u_char)*mbsrc;
is wrong. This is causing problems in the case when an
8859-1 input string is processed in the UTF-8 locale.
It needs to be:
*src = (wint_t)*mbsrc;
Without the (u_char) all the locale mismatch combinations
then work.
- The code:
if (mblength < len)
len = mblength;
needs to be there. It resets len for the single character
input case after we've actually processed two input
characters (c and nextc) because we incremented mbslength
at the start of the loop. Without this code, single
character conversions end up with a \000 or other byte
appended.
To generate a diff of this commit:
cvs rdiff -u -r1.52 -r1.53 src/lib/libc/gen/vis.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/lib/libc/gen/vis.c
diff -u src/lib/libc/gen/vis.c:1.52 src/lib/libc/gen/vis.c:1.53
--- src/lib/libc/gen/vis.c:1.52 Thu Feb 14 08:57:53 2013
+++ src/lib/libc/gen/vis.c Thu Feb 14 19:28:10 2013
@@ -1,4 +1,4 @@
-/* $NetBSD: vis.c,v 1.52 2013/02/14 13:57:53 christos Exp $ */
+/* $NetBSD: vis.c,v 1.53 2013/02/15 00:28:10 christos Exp $ */
/*-
* Copyright (c) 1989, 1993
@@ -57,7 +57,7 @@
#include <sys/cdefs.h>
#if defined(LIBC_SCCS) && !defined(lint)
-__RCSID("$NetBSD: vis.c,v 1.52 2013/02/14 13:57:53 christos Exp $");
+__RCSID("$NetBSD: vis.c,v 1.53 2013/02/15 00:28:10 christos Exp $");
#endif /* LIBC_SCCS and not lint */
#ifdef __FBSDID
__FBSDID("$FreeBSD$");
@@ -298,6 +298,20 @@ istrsnvisx(char *mbdst, size_t *dlen, co
_DIAGASSERT(mbsrc != NULL);
_DIAGASSERT(mbextra != NULL);
+ /*
+ * Input (mbsrc) is a char string considered to be multibyte
+ * characters. The input loop will read this string pulling
+ * one character, possibly multiple bytes, from mbsrc and
+ * converting each to wchar_t in src.
+ *
+ * The vis conversion will be done using the wide char
+ * wchar_t string.
+ *
+ * This will then be converted back to a multibyte string to
+ * return to the caller.
+ */
+
+ /* Allocate space for the wide char strings */
psrc = pdst = extra = nextra = NULL;
if (!mblength)
mblength = strlen(mbsrc);
@@ -312,22 +326,53 @@ istrsnvisx(char *mbdst, size_t *dlen, co
dst = pdst;
src = psrc;
+ /*
+ * Input loop.
+ * Handle up to mblength characters (not bytes). We do not
+ * stop at NULs because we may be processing a block of data
+ * that includes NULs. We process one more than the character
+ * count so that we also get the next character of input which
+ * is needed under some circumstances as a look-ahead character.
+ */
mbslength = (ssize_t)mblength;
- while (mbslength >= 0) {
+ /*
+ * When inputing a single character, must also read in the
+ * next character for nextc, the look-ahead character.
+ */
+ if (mbslength == 1)
+ mbslength++;
+ while (mbslength > 0) {
+ /* Convert one multibyte character to wchar_t. */
clen = mbtowc(src, mbsrc, MB_LEN_MAX);
if (clen < 0) {
- *src = (wint_t)(u_char)*mbsrc;
+ /* Conversion error, process as a byte instead. */
+ *src = (wint_t)*mbsrc;
clen = 1;
}
if (clen == 0)
+ /*
+ * NUL in input gives 0 return value. process
+ * as single NUL byte.
+ */
clen = 1;
+ /* Advance output pointer if we still have input left. */
src++;
+ /* Advance input pointer by number of bytes read. */
mbsrc += clen;
+ /* Decrement input count */
mbslength -= clen;
}
len = src - psrc;
src = psrc;
+ /*
+ * In the single character input case, we will have actually
+ * processed two characters, c and nextc. Reset len back to
+ * just a single character.
+ */
+ if (mblength < len)
+ len = mblength;
+ /* Convert extra argument to list of characters for this mode. */
mbstowcs(extra, mbextra, strlen(mbextra));
MAKEEXTRALIST(flag, nextra, extra);
if (!nextra) {
@@ -340,8 +385,14 @@ istrsnvisx(char *mbdst, size_t *dlen, co
goto out;
}
+ /* Look up which processing function to call. */
f = getvisfun(flag);
+ /*
+ * Main processing loop.
+ * Call do_Xvis processing function one character at a time
+ * with next character available for look-ahead.
+ */
for (start = dst; len > 0; len--) {
c = *src++;
dst = (*f)(dst, c, flag, len >= 1 ? *src : L'\0', nextra);
@@ -351,8 +402,10 @@ istrsnvisx(char *mbdst, size_t *dlen, co
}
}
+ /* Terminate the output string. */
*dst = L'\0';
+ /* Convert wchar_t string back to multibyte output string. */
len = dlen ? *dlen : ((wcslen(start) + 1) * MB_LEN_MAX);
olen = wcstombs(mbdst, start, len * sizeof(*mbdst));