pollita Tue Mar 14 21:15:05 2006 UTC
Modified files:
/php-src/main php_streams.h
/php-src/main/streams streams.c
/php-src/ext/standard file.c
Log:
More stream updates.
fgets() will work now as will anything which calls one of the
_php_stream_get_line() family of functions.
The one exception here is when the legacy defines are used on a unicode
stream. At the moment they'll simply return NULL, I'll update these
to do sloppy conversion in a bit.
'make (u)test' still doesn't work, but it's a different doesn't work.
http://cvs.php.net/viewcvs.cgi/php-src/main/php_streams.h?r1=1.106&r2=1.107&diff_format=u
Index: php-src/main/php_streams.h
diff -u php-src/main/php_streams.h:1.106 php-src/main/php_streams.h:1.107
--- php-src/main/php_streams.h:1.106 Mon Mar 13 04:40:11 2006
+++ php-src/main/php_streams.h Tue Mar 14 21:15:05 2006
@@ -16,7 +16,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: php_streams.h,v 1.106 2006/03/13 04:40:11 pollita Exp $ */
+/* $Id: php_streams.h,v 1.107 2006/03/14 21:15:05 pollita Exp $ */
#ifndef PHP_STREAMS_H
#define PHP_STREAMS_H
@@ -320,10 +320,14 @@
PHPAPI int _php_stream_flush(php_stream *stream, int closing TSRMLS_DC);
#define php_stream_flush(stream) _php_stream_flush((stream), 0 TSRMLS_CC)
-PHPAPI char *_php_stream_get_line(php_stream *stream, char *buf, size_t
maxlen, size_t *returned_len TSRMLS_DC);
-#define php_stream_gets(stream, buf, maxlen) _php_stream_get_line((stream),
(buf), (maxlen), NULL TSRMLS_CC)
+PHPAPI void *_php_stream_get_line(php_stream *stream, int buf_type, zstr buf,
size_t maxlen, size_t maxchars, size_t *returned_len TSRMLS_DC);
+#define php_stream_get_line(stream, buf, maxlen, retlen)
_php_stream_get_line((stream), IS_STRING, ZSTR(buf), (maxlen), 0, (retlen)
TSRMLS_CC)
+#define php_stream_get_line_ex(stream, buf_type, buf, maxlen, maxchars,
retlen) \
+
_php_stream_get_line((stream),
(buf_type), ZSTR(buf), (maxlen), (maxchars), (retlen) TSRMLS_CC)
+#define php_stream_gets(stream, buf, maxlen)
_php_stream_get_line((stream), IS_STRING, ZSTR(buf), (maxlen), 0, NULL
TSRMLS_CC)
+#define php_stream_gets_ex(stream, buf_type, buf, maxlen, maxchars) \
+
_php_stream_get_line((stream),
(buf_type), ZSTR(buf), (maxlen), (maxchars), NULL TSRMLS_CC)
-#define php_stream_get_line(stream, buf, maxlen, retlen)
_php_stream_get_line((stream), (buf), (maxlen), (retlen) TSRMLS_CC)
PHPAPI char *php_stream_get_record(php_stream *stream, size_t maxlen, size_t
*returned_len, char *delim, size_t delim_len TSRMLS_DC);
PHPAPI UChar *_php_stream_u_get_line(php_stream *stream, UChar *buf, int32_t
*pmax_bytes, int32_t *pmax_chars, int *pis_unicode TSRMLS_DC);
http://cvs.php.net/viewcvs.cgi/php-src/main/streams/streams.c?r1=1.101&r2=1.102&diff_format=u
Index: php-src/main/streams/streams.c
diff -u php-src/main/streams/streams.c:1.101
php-src/main/streams/streams.c:1.102
--- php-src/main/streams/streams.c:1.101 Mon Mar 13 20:54:06 2006
+++ php-src/main/streams/streams.c Tue Mar 14 21:15:05 2006
@@ -19,7 +19,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: streams.c,v 1.101 2006/03/13 20:54:06 pollita Exp $ */
+/* $Id: streams.c,v 1.102 2006/03/14 21:15:05 pollita Exp $ */
#define _GNU_SOURCE
#include "php.h"
@@ -955,18 +955,25 @@
/* If buf == NULL, the buffer will be allocated automatically and will be of an
* appropriate length to hold the line, regardless of the line length, memory
- * permitting -- returned string will be up to (maxlen-1), last byte holding
terminating NULL
- * Like php_stream_read(), this will treat unicode streams as ugly binary data
(use with caution) */
-PHPAPI char *_php_stream_get_line(php_stream *stream, char *buf, size_t maxlen,
- size_t *returned_len TSRMLS_DC)
+ * permitting -- returned string will be up to (maxlen-1) units of (maxchars)
characters, last byte holding terminating NULL
+ * Like php_stream_read(), this will (UTODO) treat unicode streams as ugly
binary data (use with caution) */
+PHPAPI void *_php_stream_get_line(php_stream *stream, int buf_type, zstr buf,
size_t maxlen, size_t maxchars, size_t *returned_len TSRMLS_DC)
{
size_t avail = 0;
size_t current_buf_size = 0;
size_t total_copied = 0;
int grow_mode = 0;
- char *bufstart = buf;
+ int is_unicode = php_stream_reads_unicode(stream);
+ int split_surrogate = 0;
+ zstr bufstart = buf;
+
+ if ((buf_type == IS_STRING && is_unicode) ||
+ (buf_type == IS_UNICODE && !is_unicode)) {
+ /* UTODO: Allow sloppy conversion */
+ return NULL;
+ }
- if (buf == NULL) {
+ if (buf.v == NULL) {
grow_mode = 1;
} else if (maxlen == 0) {
return NULL;
@@ -988,20 +995,39 @@
for (;;) {
avail = stream->writepos - stream->readpos;
- if (avail > 0) {
- size_t cpysz = 0;
- char *readptr;
- char *eol;
+ if (!split_surrogate && avail > 0) {
+ size_t cpysz = avail;
+ zstr readptr;
int done = 0;
- readptr = stream->readbuf.s + stream->readpos;
- eol = php_stream_locate_eol(stream, (zstr)NULL, 0
TSRMLS_CC);
+ if (is_unicode) {
+ UChar *eol;
+ readptr.u = stream->readbuf.u + stream->readpos;
+
+ eol = php_stream_locate_eol(stream, ZSTR(NULL),
0 TSRMLS_CC);
+ if (eol) {
+ cpysz = eol - readptr.u + 1;
+ done = 1;
+ }
- if (eol) {
- cpysz = eol - readptr + 1;
- done = 1;
+ if (U16_IS_SURROGATE(readptr.u[cpysz - 1]) &&
+ U16_IS_SURROGATE_LEAD(readptr.u[cpysz -
1])) {
+ /* Don't orphan */
+ cpysz--;
+ if (!cpysz) {
+ /* Force the loop to land on
fill_read_buffer */
+ split_surrogate = 1; /* must
specifically be 1 */
+ continue;
+ }
+ }
} else {
- cpysz = avail;
+ char *eol;
+ readptr.s = stream->readbuf.s + stream->readpos;
+ eol = php_stream_locate_eol(stream, ZSTR(NULL),
0 TSRMLS_CC);
+ if (eol) {
+ cpysz = eol - readptr.s + 1;
+ done = 1;
+ }
}
if (grow_mode) {
@@ -1012,9 +1038,9 @@
* than 8K, we waste 1 byte per additional 8K
or so.
* That seems acceptable to me, to avoid making
this code
* hard to follow */
- bufstart = erealloc(bufstart, current_buf_size
+ cpysz + 1);
+ bufstart.s = erealloc(bufstart.s,
PS_ULEN(stream, current_buf_size + cpysz + 1));
+ buf.s = bufstart.s + PS_ULEN(stream,
total_copied);
current_buf_size += cpysz + 1;
- buf = bufstart + total_copied;
} else {
if (cpysz >= maxlen - 1) {
cpysz = maxlen - 1;
@@ -1022,11 +1048,29 @@
}
}
- memcpy(buf, readptr, cpysz);
+ if (is_unicode) {
+ int ulen = u_countChar32(readptr.u, cpysz);
+
+ if (ulen > maxchars) {
+ int32_t i = 0;
+
+ ulen = maxchars;
+ U16_FWD_N(readptr.u, i, cpysz, ulen);
+ cpysz = i;
+ }
+ maxchars -= ulen;
+ memcpy(buf.u, readptr.u, UBYTES(cpysz));
+ buf.u += cpysz;
+ } else {
+ if (cpysz > maxchars) {
+ cpysz = maxchars;
+ }
+ memcpy(buf.s, readptr.s, cpysz);
+ buf.s += cpysz;
+ }
stream->position += cpysz;
stream->readpos += cpysz;
- buf += cpysz;
maxlen -= cpysz;
total_copied += cpysz;
@@ -1050,32 +1094,31 @@
php_stream_fill_read_buffer(stream, toread TSRMLS_CC);
- if (stream->writepos - stream->readpos == 0) {
+ if (stream->writepos - stream->readpos <=
split_surrogate) {
break;
}
+ split_surrogate = 0;
}
}
if (total_copied == 0) {
if (grow_mode) {
- assert(bufstart == NULL);
+ assert(bufstart.v == NULL);
}
return NULL;
}
- buf[0] = '\0';
+ if (is_unicode) {
+ buf.u[0] = 0;
+ } else {
+ buf.s[0] = 0;
+ }
+
if (returned_len) {
*returned_len = total_copied;
}
- return bufstart;
-}
-
-PHPAPI UChar *_php_stream_u_get_line(php_stream *stream, UChar *buf, int32_t
*pmax_bytes, int32_t *pmax_chars, int *pis_unicode TSRMLS_DC)
-{
- /* TODO: Bring this back up to date */
-
- return NULL;
+ return bufstart.s;
}
/* Same deal as php_stream_read() and php_stream_get_line()
http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/file.c?r1=1.425&r2=1.426&diff_format=u
Index: php-src/ext/standard/file.c
diff -u php-src/ext/standard/file.c:1.425 php-src/ext/standard/file.c:1.426
--- php-src/ext/standard/file.c:1.425 Mon Mar 13 15:01:44 2006
+++ php-src/ext/standard/file.c Tue Mar 14 21:15:05 2006
@@ -21,7 +21,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: file.c,v 1.425 2006/03/13 15:01:44 derick Exp $ */
+/* $Id: file.c,v 1.426 2006/03/14 21:15:05 pollita Exp $ */
/* Synced with php 3.0 revision 1.218 1999-06-16 [ssb] */
@@ -993,9 +993,8 @@
zval *zstream;
int argc = ZEND_NUM_ARGS();
long length = -1;
- UChar *buf = NULL;
- int32_t num_chars = -1, num_bytes = -1;
- int is_unicode;
+ zstr buf;
+ size_t retlen = 0;
if (zend_parse_parameters(argc TSRMLS_CC, "r|l", &zstream, &length) ==
FAILURE) {
RETURN_NULL();
@@ -1003,19 +1002,15 @@
php_stream_from_zval(stream, &zstream);
- if (length > 0) {
- /* Don't try to short circuit this by just using num_chars in
parse_parameters, long doesn't always mean 32-bit */
- num_chars = length;
- }
-
- if ((buf = php_stream_u_get_line(stream, NULL, &num_bytes, &num_chars,
&is_unicode)) == NULL) {
+ buf.v = php_stream_get_line_ex(stream, php_stream_reads_unicode(stream)
? IS_UNICODE : IS_STRING, NULL, 0, length, &retlen);
+ if (!buf.v) {
RETURN_FALSE;
}
- if (is_unicode) {
- RETURN_UNICODEL(buf, num_chars, 0);
+ if (php_stream_reads_unicode(stream)) {
+ RETURN_UNICODEL(buf.u, retlen, 0);
} else {
- RETURN_STRINGL((char*)buf, num_bytes, 0);
+ RETURN_STRINGL(buf.s, retlen, 0);
}
}
/* }}} */
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php