pollita Tue Mar 14 21:15:05 2006 UTC Modified files: /php-src/main php_streams.h /php-src/main/streams streams.c /php-src/ext/standard file.c Log: More stream updates. fgets() will work now as will anything which calls one of the _php_stream_get_line() family of functions. The one exception here is when the legacy defines are used on a unicode stream. At the moment they'll simply return NULL, I'll update these to do sloppy conversion in a bit. 'make (u)test' still doesn't work, but it's a different doesn't work.
http://cvs.php.net/viewcvs.cgi/php-src/main/php_streams.h?r1=1.106&r2=1.107&diff_format=u Index: php-src/main/php_streams.h diff -u php-src/main/php_streams.h:1.106 php-src/main/php_streams.h:1.107 --- php-src/main/php_streams.h:1.106 Mon Mar 13 04:40:11 2006 +++ php-src/main/php_streams.h Tue Mar 14 21:15:05 2006 @@ -16,7 +16,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: php_streams.h,v 1.106 2006/03/13 04:40:11 pollita Exp $ */ +/* $Id: php_streams.h,v 1.107 2006/03/14 21:15:05 pollita Exp $ */ #ifndef PHP_STREAMS_H #define PHP_STREAMS_H @@ -320,10 +320,14 @@ PHPAPI int _php_stream_flush(php_stream *stream, int closing TSRMLS_DC); #define php_stream_flush(stream) _php_stream_flush((stream), 0 TSRMLS_CC) -PHPAPI char *_php_stream_get_line(php_stream *stream, char *buf, size_t maxlen, size_t *returned_len TSRMLS_DC); -#define php_stream_gets(stream, buf, maxlen) _php_stream_get_line((stream), (buf), (maxlen), NULL TSRMLS_CC) +PHPAPI void *_php_stream_get_line(php_stream *stream, int buf_type, zstr buf, size_t maxlen, size_t maxchars, size_t *returned_len TSRMLS_DC); +#define php_stream_get_line(stream, buf, maxlen, retlen) _php_stream_get_line((stream), IS_STRING, ZSTR(buf), (maxlen), 0, (retlen) TSRMLS_CC) +#define php_stream_get_line_ex(stream, buf_type, buf, maxlen, maxchars, retlen) \ + _php_stream_get_line((stream), (buf_type), ZSTR(buf), (maxlen), (maxchars), (retlen) TSRMLS_CC) +#define php_stream_gets(stream, buf, maxlen) _php_stream_get_line((stream), IS_STRING, ZSTR(buf), (maxlen), 0, NULL TSRMLS_CC) +#define php_stream_gets_ex(stream, buf_type, buf, maxlen, maxchars) \ + _php_stream_get_line((stream), (buf_type), ZSTR(buf), (maxlen), (maxchars), NULL TSRMLS_CC) -#define php_stream_get_line(stream, buf, maxlen, retlen) _php_stream_get_line((stream), (buf), (maxlen), (retlen) TSRMLS_CC) PHPAPI char *php_stream_get_record(php_stream *stream, size_t maxlen, size_t *returned_len, char *delim, size_t delim_len TSRMLS_DC); PHPAPI UChar *_php_stream_u_get_line(php_stream *stream, UChar *buf, int32_t *pmax_bytes, int32_t *pmax_chars, int *pis_unicode TSRMLS_DC); http://cvs.php.net/viewcvs.cgi/php-src/main/streams/streams.c?r1=1.101&r2=1.102&diff_format=u Index: php-src/main/streams/streams.c diff -u php-src/main/streams/streams.c:1.101 php-src/main/streams/streams.c:1.102 --- php-src/main/streams/streams.c:1.101 Mon Mar 13 20:54:06 2006 +++ php-src/main/streams/streams.c Tue Mar 14 21:15:05 2006 @@ -19,7 +19,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: streams.c,v 1.101 2006/03/13 20:54:06 pollita Exp $ */ +/* $Id: streams.c,v 1.102 2006/03/14 21:15:05 pollita Exp $ */ #define _GNU_SOURCE #include "php.h" @@ -955,18 +955,25 @@ /* If buf == NULL, the buffer will be allocated automatically and will be of an * appropriate length to hold the line, regardless of the line length, memory - * permitting -- returned string will be up to (maxlen-1), last byte holding terminating NULL - * Like php_stream_read(), this will treat unicode streams as ugly binary data (use with caution) */ -PHPAPI char *_php_stream_get_line(php_stream *stream, char *buf, size_t maxlen, - size_t *returned_len TSRMLS_DC) + * permitting -- returned string will be up to (maxlen-1) units of (maxchars) characters, last byte holding terminating NULL + * Like php_stream_read(), this will (UTODO) treat unicode streams as ugly binary data (use with caution) */ +PHPAPI void *_php_stream_get_line(php_stream *stream, int buf_type, zstr buf, size_t maxlen, size_t maxchars, size_t *returned_len TSRMLS_DC) { size_t avail = 0; size_t current_buf_size = 0; size_t total_copied = 0; int grow_mode = 0; - char *bufstart = buf; + int is_unicode = php_stream_reads_unicode(stream); + int split_surrogate = 0; + zstr bufstart = buf; + + if ((buf_type == IS_STRING && is_unicode) || + (buf_type == IS_UNICODE && !is_unicode)) { + /* UTODO: Allow sloppy conversion */ + return NULL; + } - if (buf == NULL) { + if (buf.v == NULL) { grow_mode = 1; } else if (maxlen == 0) { return NULL; @@ -988,20 +995,39 @@ for (;;) { avail = stream->writepos - stream->readpos; - if (avail > 0) { - size_t cpysz = 0; - char *readptr; - char *eol; + if (!split_surrogate && avail > 0) { + size_t cpysz = avail; + zstr readptr; int done = 0; - readptr = stream->readbuf.s + stream->readpos; - eol = php_stream_locate_eol(stream, (zstr)NULL, 0 TSRMLS_CC); + if (is_unicode) { + UChar *eol; + readptr.u = stream->readbuf.u + stream->readpos; + + eol = php_stream_locate_eol(stream, ZSTR(NULL), 0 TSRMLS_CC); + if (eol) { + cpysz = eol - readptr.u + 1; + done = 1; + } - if (eol) { - cpysz = eol - readptr + 1; - done = 1; + if (U16_IS_SURROGATE(readptr.u[cpysz - 1]) && + U16_IS_SURROGATE_LEAD(readptr.u[cpysz - 1])) { + /* Don't orphan */ + cpysz--; + if (!cpysz) { + /* Force the loop to land on fill_read_buffer */ + split_surrogate = 1; /* must specifically be 1 */ + continue; + } + } } else { - cpysz = avail; + char *eol; + readptr.s = stream->readbuf.s + stream->readpos; + eol = php_stream_locate_eol(stream, ZSTR(NULL), 0 TSRMLS_CC); + if (eol) { + cpysz = eol - readptr.s + 1; + done = 1; + } } if (grow_mode) { @@ -1012,9 +1038,9 @@ * than 8K, we waste 1 byte per additional 8K or so. * That seems acceptable to me, to avoid making this code * hard to follow */ - bufstart = erealloc(bufstart, current_buf_size + cpysz + 1); + bufstart.s = erealloc(bufstart.s, PS_ULEN(stream, current_buf_size + cpysz + 1)); + buf.s = bufstart.s + PS_ULEN(stream, total_copied); current_buf_size += cpysz + 1; - buf = bufstart + total_copied; } else { if (cpysz >= maxlen - 1) { cpysz = maxlen - 1; @@ -1022,11 +1048,29 @@ } } - memcpy(buf, readptr, cpysz); + if (is_unicode) { + int ulen = u_countChar32(readptr.u, cpysz); + + if (ulen > maxchars) { + int32_t i = 0; + + ulen = maxchars; + U16_FWD_N(readptr.u, i, cpysz, ulen); + cpysz = i; + } + maxchars -= ulen; + memcpy(buf.u, readptr.u, UBYTES(cpysz)); + buf.u += cpysz; + } else { + if (cpysz > maxchars) { + cpysz = maxchars; + } + memcpy(buf.s, readptr.s, cpysz); + buf.s += cpysz; + } stream->position += cpysz; stream->readpos += cpysz; - buf += cpysz; maxlen -= cpysz; total_copied += cpysz; @@ -1050,32 +1094,31 @@ php_stream_fill_read_buffer(stream, toread TSRMLS_CC); - if (stream->writepos - stream->readpos == 0) { + if (stream->writepos - stream->readpos <= split_surrogate) { break; } + split_surrogate = 0; } } if (total_copied == 0) { if (grow_mode) { - assert(bufstart == NULL); + assert(bufstart.v == NULL); } return NULL; } - buf[0] = '\0'; + if (is_unicode) { + buf.u[0] = 0; + } else { + buf.s[0] = 0; + } + if (returned_len) { *returned_len = total_copied; } - return bufstart; -} - -PHPAPI UChar *_php_stream_u_get_line(php_stream *stream, UChar *buf, int32_t *pmax_bytes, int32_t *pmax_chars, int *pis_unicode TSRMLS_DC) -{ - /* TODO: Bring this back up to date */ - - return NULL; + return bufstart.s; } /* Same deal as php_stream_read() and php_stream_get_line() http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/file.c?r1=1.425&r2=1.426&diff_format=u Index: php-src/ext/standard/file.c diff -u php-src/ext/standard/file.c:1.425 php-src/ext/standard/file.c:1.426 --- php-src/ext/standard/file.c:1.425 Mon Mar 13 15:01:44 2006 +++ php-src/ext/standard/file.c Tue Mar 14 21:15:05 2006 @@ -21,7 +21,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: file.c,v 1.425 2006/03/13 15:01:44 derick Exp $ */ +/* $Id: file.c,v 1.426 2006/03/14 21:15:05 pollita Exp $ */ /* Synced with php 3.0 revision 1.218 1999-06-16 [ssb] */ @@ -993,9 +993,8 @@ zval *zstream; int argc = ZEND_NUM_ARGS(); long length = -1; - UChar *buf = NULL; - int32_t num_chars = -1, num_bytes = -1; - int is_unicode; + zstr buf; + size_t retlen = 0; if (zend_parse_parameters(argc TSRMLS_CC, "r|l", &zstream, &length) == FAILURE) { RETURN_NULL(); @@ -1003,19 +1002,15 @@ php_stream_from_zval(stream, &zstream); - if (length > 0) { - /* Don't try to short circuit this by just using num_chars in parse_parameters, long doesn't always mean 32-bit */ - num_chars = length; - } - - if ((buf = php_stream_u_get_line(stream, NULL, &num_bytes, &num_chars, &is_unicode)) == NULL) { + buf.v = php_stream_get_line_ex(stream, php_stream_reads_unicode(stream) ? IS_UNICODE : IS_STRING, NULL, 0, length, &retlen); + if (!buf.v) { RETURN_FALSE; } - if (is_unicode) { - RETURN_UNICODEL(buf, num_chars, 0); + if (php_stream_reads_unicode(stream)) { + RETURN_UNICODEL(buf.u, retlen, 0); } else { - RETURN_STRINGL((char*)buf, num_bytes, 0); + RETURN_STRINGL(buf.s, retlen, 0); } } /* }}} */
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php