U0 and C0 mode switches are also caused by counted length of the type
unpack("C/U", 0). It's almost certain that the user of such constructs
didn't want a mode change just because his data sequence happened to be
empty. The followin patch fixes this.

Before:
perl -wle 'print for unpack("aC/UU", "b\0\341\277\274")'
b
8188
perl -wle 'print for unpack("aU0C/CU", "b\0\341\277\274")'
b
225

After:
./perl -Ilib -wle 'print for unpack("aC/UU", "b\0\341\277\274")'
b
255
./perl -Ilib -wle 'print for unpack("aU0C/CU", "b\0\341\277\274")'
b
8188

Patch relative to 5.8.6

--- pp_pack.c.old       Sat Jan 29 13:26:27 2005
+++ pp_pack.c   Sat Jan 29 13:44:57 2005
@@ -565,6 +565,7 @@
     const int bits_in_uv = 8 * sizeof(cuv);
     char* strrelbeg = s;
     bool beyond = FALSE;
+    bool explicit_length;
     bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
 
     IV aiv;
@@ -594,6 +595,7 @@
            break;
         }
 
+        explicit_length = TRUE;
       redo_switch:
         beyond = s >= strend;
        switch(datumtype) {
@@ -818,7 +820,8 @@
        case 'C':
        unpack_C: /* unpack U will jump here if not UTF-8 */
             if (len == 0) {
-                symptr->flags &= ~FLAG_UNPACK_DO_UTF8;
+                if (explicit_length) 
+                    symptr->flags &= ~FLAG_UNPACK_DO_UTF8;
                break;
            }
            if (len > strend - s)
@@ -845,7 +848,8 @@
            break;
        case 'U':
            if (len == 0) {
-                symptr->flags |= FLAG_UNPACK_DO_UTF8;
+                if (explicit_length) 
+                    symptr->flags |= FLAG_UNPACK_DO_UTF8;
                break;
            }
            if ((symptr->flags & FLAG_UNPACK_DO_UTF8) == 0)
@@ -1694,6 +1698,7 @@
                Perl_croak(aTHX_ "Code missing after '/' in unpack" );
             }
             datumtype = symptr->code;
+            explicit_length = FALSE;
            goto redo_switch;
         }
     }

Reply via email to