iliaa Sat Dec 6 17:07:02 2003 EDT Modified files: (Branch: PHP_4_3) /php-src/ext/standard file.c Log: Fixed memory corruption occuring in fgetcsv() on certain files.
Index: php-src/ext/standard/file.c diff -u php-src/ext/standard/file.c:1.279.2.41 php-src/ext/standard/file.c:1.279.2.42 --- php-src/ext/standard/file.c:1.279.2.41 Fri Nov 28 13:33:18 2003 +++ php-src/ext/standard/file.c Sat Dec 6 17:07:00 2003 @@ -21,7 +21,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: file.c,v 1.279.2.41 2003/11/28 18:33:18 iliaa Exp $ */ +/* $Id: file.c,v 1.279.2.42 2003/12/06 22:07:00 iliaa Exp $ */ /* Synced with php 3.0 revision 1.218 1999-06-16 [ssb] */ @@ -2155,19 +2155,42 @@ } /* }}} */ +static char *_php_fgetcsv_find_enclosure(char *start, int len, char enclosure) +{ + char *s=start, *p, *e=start+len; + + while (e > s && (p = memchr(s, enclosure, (e - s)))) { + if (p > s && *(p - 1) == '\\') { /* check escape characters */ + int enc_cnt=0; + char *pp = p - 1; + + while (pp >= s && *pp == '\\') { + enc_cnt++; + pp--; + } + if ((enc_cnt % 2)) { + s = p + 1; + continue; + } + } + return p; + } + + return NULL; +} + /* {{{ proto array fgetcsv(resource fp, int length [, string delimiter [, string enclosure]]) Get line from file pointer and parse for CSV fields */ PHP_FUNCTION(fgetcsv) { - char *temp, *tptr, *bptr, *lineEnd; char delimiter = ','; /* allow this to be set as parameter */ char enclosure = '"'; /* allow this to be set as parameter */ /* first section exactly as php_fgetss */ zval **fd, **bytes, **p_delim, **p_enclosure; - int len, temp_len, buf_len; - char *buf; + int len, buf_len; + char *buf, *p, *s, *e, *re; php_stream *stream; switch(ZEND_NUM_ARGS()) { @@ -2208,13 +2231,11 @@ if (ZEND_NUM_ARGS() >= 4) { convert_to_string_ex(p_enclosure); - /* Make sure that there is at least one character in string */ - if (Z_STRLEN_PP(p_enclosure) < 1) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "enclosure must be a character"); - RETURN_FALSE; + if (Z_STRLEN_PP(p_enclosure) < 1) { /* no enclosure */ + enclosure = 0; + } else { /* use first character from string */ + enclosure = Z_STRVAL_PP(p_enclosure)[0]; } - /* use first character from string */ - enclosure = Z_STRVAL_PP(p_enclosure)[0]; } php_stream_from_zval(stream, fd); @@ -2227,159 +2248,107 @@ } buf = emalloc(len + 1); - /* needed because recv/read/gzread doesnt set null char at end */ - memset(buf, 0, len + 1); if (php_stream_get_line(stream, buf, len, &buf_len) == NULL) { efree(buf); RETURN_FALSE; } + s = buf; + re = e = buf + buf_len; - /* Now into new section that parses buf for delimiter/enclosure fields */ + /* strip leading spaces */ + while (isspace(*s) && *s != delimiter) { + s++; + } + /* strip trailing spaces */ + while (isspace(*(--e)) && *e != delimiter); + e++; - /* Strip trailing space from buf, saving end of line in case required for enclosure field */ + array_init(return_value); - lineEnd = emalloc(len + 1); - bptr = buf; - tptr = buf + buf_len -1; - while ( isspace((int)*(unsigned char *)tptr) && (*tptr!=delimiter) && (tptr > bptr) ) tptr--; - tptr++; - strcpy(lineEnd, tptr); - - /* add single space - makes it easier to parse trailing null field */ - *tptr++ = ' '; - *tptr = 0; - - /* reserve workspace for building each individual field */ - - temp_len = len; - temp = emalloc(temp_len + 1); /* unlikely but possible! */ - tptr = temp; + if (!enclosure || !(p = _php_fgetcsv_find_enclosure(s, (e - s), enclosure))) { +no_enclosure: + while ((p = memchr(s, delimiter, (e - s)))) { + add_next_index_stringl(return_value, s, (p - s), 1); + s = p + 1; + } + } else { + char *p2=NULL, *buf2; + int buf2_len; +enclosure: + /* handle complete fields before the enclosure */ + while (s < p && (p2 = memchr(s, delimiter, (p - s)))) { + add_next_index_stringl(return_value, s, (p2 - s), 1); + s = p2 + 1; + } - /* Initialize return array */ - array_init(return_value); + if ((p - s)) { + buf2_len = p - s; + buf2 = emalloc(buf2_len + 1); + memcpy(buf2, s, buf2_len); + } else { + buf2 = NULL; + buf2_len = 0; + s++; + } + p++; + + /* try to find end of enclosure */ + while (!(p2 = _php_fgetcsv_find_enclosure(p, (e - p), enclosure))) { + buf2 = erealloc(buf2, buf2_len + (re - p) + 1); + memcpy(buf2 + buf2_len, p, (re - p)); + buf2_len += (re - p); - /* Main loop to read CSV fields */ - /* NB this routine will return a single null entry for a blank line */ + if (php_stream_get_line(stream, buf, len, &buf_len) == NULL) { + goto enclosure_done; + } + s = p = buf; + re = e = buf + buf_len; + /* strip trailing spaces */ + while (isspace(*(--e)) && *e != delimiter); + e++; + } - do { - /* 1. Strip any leading space */ - while(isspace((int)*(unsigned char *)bptr) && (*bptr!=delimiter)) bptr++; - /* 2. Read field, leaving bptr pointing at start of next field */ - if (enclosure && *bptr == enclosure) { - bptr++; /* move on to first character in field */ - - /* 2A. handle enclosure delimited field */ - while (*bptr) { - /* we need to determine if the enclosure is 'real' or is it escaped */ - if (*(bptr - 1) == '\\') { - int escape_cnt = 0; - char *bptr_p = bptr - 2; - - while (bptr_p > buf && *bptr_p == '\\') { - escape_cnt++; - bptr_p--; - } - if (!(escape_cnt % 2)) { - goto normal_char; - continue; - } - } - - if (*bptr == enclosure) { - /* handle the enclosure */ - if ( *(bptr+1) == enclosure) { - /* embedded enclosure */ - *tptr++ = *bptr; bptr +=2; - } else { - /* must be end of string - skip to start of next field or end */ - while ( (*bptr != delimiter) && *bptr ) bptr++; - if (*bptr == delimiter) bptr++; - *tptr=0; /* terminate temporary string */ - break; /* .. from handling this field - resumes at 3. */ - } - } else { -normal_char: - /* normal character */ - *tptr++ = *bptr++; - - if (*bptr == 0) { /* embedded line end? */ - if ((bptr - buf) < buf_len) { - while (*bptr == '\0') { - *tptr++ = *bptr++; - } - continue; - } - - *(tptr-1)=0; /* remove space character added on reading line */ - strcat(temp, lineEnd); /* add the embedded line end to the field */ - - /* read a new line from input, as at start of routine */ - memset(buf, 0, len+1); - - if (php_stream_get_line(stream, buf, len, &buf_len) == NULL) { - /* we've got an unterminated enclosure, assign all the data - * from the start of the enclosure to end of data to the last element - */ - if (temp_len > len) { - *tptr = 0; - break; - } - - efree(lineEnd); - efree(temp); - efree(buf); - zval_dtor(return_value); - RETURN_FALSE; - } + /* end of enclosure found, now find the delimeter */ + if ((p = memchr(p2, delimiter, (e - p2)))) { + p2 = s; + s = p + 1; + if (p > p2 && *(p - 1) == enclosure) { + p--; + } + if (p - p2) { + buf2 = erealloc(buf2, buf2_len + (p - p2) + 1); + memcpy(buf2 + buf2_len, p2, (p - p2)); + buf2_len += (p - p2); + } + buf2[buf2_len] = '\0'; + add_next_index_stringl(return_value, buf2, buf2_len, 0); - temp_len += len; - temp = erealloc(temp, temp_len+1); - bptr = buf; - tptr = buf + buf_len -1; - while (isspace((int)*(unsigned char *)tptr) && (*tptr!=delimiter) && (tptr > bptr)) - tptr--; - tptr++; - strcpy(lineEnd, tptr); - *tptr++ = ' '; - *tptr = 0; - - tptr = temp; /* reset temp pointer to end of field as read so far */ - while (*tptr) - tptr++; - } - } + if (!(p = _php_fgetcsv_find_enclosure(s, (e - s), enclosure))) { + goto no_enclosure; + } else { + goto enclosure; } } else { - /* 2B. Handle non-enclosure field */ - while ((*bptr != delimiter) && ((bptr - buf) < buf_len)) - *tptr++ = *bptr++; - *tptr=0; /* terminate temporary string */ - - if ((tptr - temp)) { - tptr--; - while (isspace((int)*(unsigned char *)tptr) && (*tptr!=delimiter)) - *tptr-- = 0; /* strip any trailing spaces */ - if (*tptr) { - tptr++; - } + while (e < re && *(e - 1) == enclosure) { + e--; } - - if (*bptr == delimiter) - bptr++; - } - /* 3. Now pass our field back to php */ - if (*tptr == '\0') { - add_next_index_stringl(return_value, temp, (tptr - temp), 1); - } else { - add_next_index_string(return_value, temp, 1); + if (e - s) { + buf2 = erealloc(buf2, buf2_len + (e - s) + 1); + memcpy(buf2 + buf2_len, s, (e - s)); + buf2_len += (e - s); + } +enclosure_done: + s = e = NULL; + buf2[buf2_len] = '\0'; + add_next_index_stringl(return_value, buf2, buf2_len, 0); } - tptr = temp; - } while (*bptr); + } - efree(lineEnd); - efree(temp); + if (s < e) { + add_next_index_stringl(return_value, s, (e - s), 1); + } efree(buf); } /* }}} */
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php