# New Ticket Created by  Nicholas Clark 
# Please include the string:  [perl #30220]
# in the subject line of all future correspondence about this issue. 
# <URL: http://rt.perl.org:80/rt3/Ticket/Display.html?id=30220 >


---
osname= darwin
osvers= 7.3.0
arch=   darwin-64int-2level
cc=     ccache gcc 
---
Flags:
    category=core
    severity=medium
    ack=no
---
There's an off by one error in string_unescape_cstring when the string
has a delimiter. The code appears to be reading 1 character beyond the
end of the allocated string, but usually it won't show as (I infer) the
byte 1 beyond is zero (ie ASCII NUL) and triggers this:

        if (!r || r == (Parrot_UInt4)delimiter)
            break;


I think that the logic bug is as follows.
Given a 13 character C string "found sub1\\n\"" and delimiter of '"'

    size_t clength = strlen(cstring);

clength starts as 13

    if (delimiter && clength)
        --clength;

clength is now 12

    result = string_make(interpreter, cstring, clength, "iso-8859-1",
            PObj_constant_FLAG);

A 12 character, 12 byte parrot string is made:

(gdb) print *( struct parrot_string_t *)context
$11 = {
  obj = {
    u = {
      _b = {
        _bufstart = 0xb30b0ff4, 
        _buflen = 12
      }, 
      _ptrs = {
        _struct_val = 0xb30b0ff4, 
        _pmc_val = 0xc
      }, 
      _int_val = -1291120652, 
      _num_val = -8.2231060297357384e-63, 
      _string_val = 0xb30b0ff4
    }, 
    flags = 131328, 
    _pobj_version = 0
  }, 
  bufused = 12, 
  strstart = 0xb30b0ff4, 
  strlen = 12, 
  representation = enum_stringrep_one, 
  hashval = 0
}

ie "found sub1\\n"  (note that last '"' is gone)

and now the loop loops, with the termination condition being "find the
delimiter or find NUL":

        if (!r || r == (Parrot_UInt4)delimiter)
            break;

BUG. There isn't a delimiter or NUL in those 12 bytes.

And undefined bonus crap is printed out. However, if I use a debugging malloc
that allocates everything on individual pages, the byte after the 12th will
error. Hence:

(gdb) where
#0  0x00034114 in char8_at (offs=12, context=0xb003f850) at src/string.c:2918
#1  0x000344e0 in string_unescape_cstring (interpreter=0xb000ec6c, cstring=0xb2faaff1 
"found sub1\\n\"", delimiter=34 '"') at src/string.c:3007

(note offs=12, ie the 13th byte of the string).

I can't recreate this on Linux - there's no corruption by
default, and running under valgrind fails, apparently due to valgrind bugs.

The string in question is part of test 45 of t/pmc/sub.t


It looks like the loop was originally processing a \0 terminated string
The included patch changes it to terminate at the (counted) end of the input
buffer. I'm not sure if the check on meeting the delimiter midway is now
unnecessary. It's not clear if that code is only there to deal with
terminating when the end delimiter is encountered, and the end delimiter is
now stripped by the --clength; earlier.

All this would be much easier to figure out if the code had any comments.

Index: src/string.c
===================================================================
RCS file: /cvs/public/parrot/src/string.c,v
retrieving revision 1.204
diff -d -u -r1.204 string.c
--- src/string.c        31 May 2004 20:38:56 -0000      1.204
+++ src/string.c        11 Jun 2004 10:15:44 -0000
@@ -3003,9 +3003,14 @@
     char_at     = set_char_getter(result);
     set_char_at = set_char_setter(result);
 
-    for (offs = d = 0; ; ++offs) {
+    for (offs = d = 0; offs < clength; ++offs) {
         r = (char_at)(offs, result);
-        if (!r || r == (Parrot_UInt4)delimiter)
+        /* There cannot be any NULs within this string.  */
+        assert(r != '\0');
+        /* XXX Is this logic correct? Is it acceptable for the passed in
+           C string to contain any unescape delimiters within its body?
+        */
+        if (r == (Parrot_UInt4)delimiter)
             break;
         if (r == '\\') {
             ++offs;


Nicholas Clark



---
Summary of my parrot 0.1.0 configuration:
  configdate='Fri Jun 11 09:15:19 2004'
  Platform:
    osname=darwin, archname=darwin-2level
    jitcapable=1, jitarchname=ppc-darwin,
    jitosname=DARWIN, jitcpuarch=ppc
    execcapable=1
    perl=/Users/nick/Reference/5.8.4/bin/perl5.8.4-32
  Compiler:
    cc='ccache gcc', ccflags='-pipe -fno-common -no-cpp-precomp  -pipe -fno-common 
-Wno-long-double ',
  Linker and Libraries:
    ld='env MACOSX_DEPLOYMENT_TARGET=10.3 cc', ldflags=' -flat_namespace ',
    cc_ldflags='',
    libs='-lm'
  Dynamic Linking:
    so='.dylib', ld_shared=' -bundle -undefined dynamic_lookup',
    ld_shared_flags=''
  Types:
    iv=long, intvalsize=4, intsize=4, opcode_t=long, opcode_t_size=4,
    ptrsize=4, ptr_alignment=4 byteorder=4321, 
    nv=double, numvalsize=8, doublesize=8

---
Environment:
    DYLD_LIBRARY_PATH    HOME    LANG    LANGUAGE    LD_LIBRARY_PATH    LOGDIR    PATH 
   PERL5LIB    SHELL

Reply via email to