U0 and C0 mode switches are also caused by counted length of the type
unpack("C/U", 0). It's almost certain that the user of such constructs
didn't want a mode change just because his data sequence happened to be
empty. The followin patch fixes this.
Before:
perl -wle 'print for unpack("aC/UU", "b\0\341\277\274")'
b
8188
perl -wle 'print for unpack("aU0C/CU", "b\0\341\277\274")'
b
225
After:
./perl -Ilib -wle 'print for unpack("aC/UU", "b\0\341\277\274")'
b
255
./perl -Ilib -wle 'print for unpack("aU0C/CU", "b\0\341\277\274")'
b
8188
Patch relative to 5.8.6
--- pp_pack.c.old Sat Jan 29 13:26:27 2005
+++ pp_pack.c Sat Jan 29 13:44:57 2005
@@ -565,6 +565,7 @@
const int bits_in_uv = 8 * sizeof(cuv);
char* strrelbeg = s;
bool beyond = FALSE;
+ bool explicit_length;
bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
IV aiv;
@@ -594,6 +595,7 @@
break;
}
+ explicit_length = TRUE;
redo_switch:
beyond = s >= strend;
switch(datumtype) {
@@ -818,7 +820,8 @@
case 'C':
unpack_C: /* unpack U will jump here if not UTF-8 */
if (len == 0) {
- symptr->flags &= ~FLAG_UNPACK_DO_UTF8;
+ if (explicit_length)
+ symptr->flags &= ~FLAG_UNPACK_DO_UTF8;
break;
}
if (len > strend - s)
@@ -845,7 +848,8 @@
break;
case 'U':
if (len == 0) {
- symptr->flags |= FLAG_UNPACK_DO_UTF8;
+ if (explicit_length)
+ symptr->flags |= FLAG_UNPACK_DO_UTF8;
break;
}
if ((symptr->flags & FLAG_UNPACK_DO_UTF8) == 0)
@@ -1694,6 +1698,7 @@
Perl_croak(aTHX_ "Code missing after '/' in unpack" );
}
datumtype = symptr->code;
+ explicit_length = FALSE;
goto redo_switch;
}
}