pollita Wed Mar 29 01:20:43 2006 UTC Added files: /php-src/main/streams unicode_filter.c
Removed files: /php-src/ext/unicode unicode_filter.c Modified files: /php-src/ext/standard basic_functions.c file.c streamsfuncs.c streamsfuncs.h /php-src/ext/unicode config.m4 config.w32 php_unicode.h unicode.c /php-src/main main.c php_streams.h /php-src/main/streams filter.c php_stream_filter_api.h streams.c /php-src/win32/build config.w32 Log: Another (and hopefully last) major streams commit. This moves unicode conversion to the filter layer (rather than at the lower streams layer) unicode_filter.c has been moved from ext/unicode to main/streams as it's an integral part of the streams unicode conversion process. There are now three ways to set encoding on a stream: (1) By context $ctx = stream_context_create(NULL,array('encoding'=>'latin1')); $fp = fopen('somefile', 'r+t', false, $ctx); (2) By stream_encoding() $fp = fopen('somefile', 'r+'); stream_encoding($fp, 'latin1'); (3) By filter $fp = fopen('somefile', 'r+'); stream_filter_append($fp, 'unicode.from.latin1', STREAM_FILTER_READ); stream_filter_append($fp, 'unicode.to.latin1', STREAM_FILTER_WRITE); Note: Methods 1 and 2 are convenience wrappers around method 3.
http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/basic_functions.c?r1=1.766&r2=1.767&diff_format=u Index: php-src/ext/standard/basic_functions.c diff -u php-src/ext/standard/basic_functions.c:1.766 php-src/ext/standard/basic_functions.c:1.767 --- php-src/ext/standard/basic_functions.c:1.766 Wed Mar 22 10:20:20 2006 +++ php-src/ext/standard/basic_functions.c Wed Mar 29 01:20:42 2006 @@ -17,7 +17,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: basic_functions.c,v 1.766 2006/03/22 10:20:20 derick Exp $ */ +/* $Id: basic_functions.c,v 1.767 2006/03/29 01:20:42 pollita Exp $ */ #include "php.h" #include "php_streams.h" @@ -589,6 +589,7 @@ PHP_FE(stream_filter_prepend, NULL) PHP_FE(stream_filter_append, NULL) PHP_FE(stream_filter_remove, NULL) + PHP_FE(stream_encoding, NULL) PHP_FE(stream_socket_client, second_and_third_args_force_ref) PHP_FE(stream_socket_server, second_and_third_args_force_ref) PHP_FE(stream_socket_accept, third_arg_force_ref) http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/file.c?r1=1.430&r2=1.431&diff_format=u Index: php-src/ext/standard/file.c diff -u php-src/ext/standard/file.c:1.430 php-src/ext/standard/file.c:1.431 --- php-src/ext/standard/file.c:1.430 Mon Mar 27 23:41:05 2006 +++ php-src/ext/standard/file.c Wed Mar 29 01:20:42 2006 @@ -21,7 +21,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: file.c,v 1.430 2006/03/27 23:41:05 iliaa Exp $ */ +/* $Id: file.c,v 1.431 2006/03/29 01:20:42 pollita Exp $ */ /* Synced with php 3.0 revision 1.218 1999-06-16 [ssb] */ @@ -1008,14 +1008,14 @@ php_stream_from_zval(stream, &zstream); - buf.v = php_stream_get_line_ex(stream, php_stream_reads_unicode(stream) ? IS_UNICODE : IS_STRING, NULL_ZSTR, 0, length, &retlen); + buf.v = php_stream_get_line_ex(stream, stream->readbuf_type, NULL_ZSTR, 0, length, &retlen); if (!buf.v) { RETURN_FALSE; } - if (php_stream_reads_unicode(stream)) { + if (stream->readbuf_type == IS_UNICODE) { RETURN_UNICODEL(buf.u, retlen, 0); - } else { + } else { /* IS_STRING */ RETURN_STRINGL(buf.s, retlen, 0); } } @@ -1034,7 +1034,7 @@ PHP_STREAM_TO_ZVAL(stream, arg1); - if (php_stream_reads_unicode(stream)) { + if (stream->readbuf_type == IS_UNICODE) { int buflen = 1; UChar *buf = php_stream_read_unicode_chars(stream, &buflen); @@ -1042,7 +1042,7 @@ RETURN_FALSE; } RETURN_UNICODEL(buf, buflen, 0); - } else { + } else { /* IS_STRING */ char buf[2]; buf[0] = php_stream_getc(stream); @@ -1068,7 +1068,7 @@ php_stream_from_zval(stream, &zstream); - if (php_stream_reads_unicode(stream)) { + if (stream->readbuf_type == IS_UNICODE) { UChar *buf = php_stream_get_line_ex(stream, IS_UNICODE, NULL_ZSTR, 0, length, &retlen); UChar *allowed = NULL; int allowed_len = 0; @@ -1085,7 +1085,7 @@ retlen = php_u_strip_tags(buf, retlen, &stream->fgetss_state, allowed, allowed_len TSRMLS_CC); RETURN_UNICODEL(buf, retlen, 0); - } else { + } else { /* IS_STRING */ char *buf = php_stream_get_line_ex(stream, IS_STRING, NULL_ZSTR, 0, length, &retlen); char *allowed = NULL; int allowed_len = 0; @@ -1752,7 +1752,7 @@ RETURN_FALSE; } - if (php_stream_reads_unicode(stream)) { + if (stream->readbuf_type == IS_UNICODE) { int buflen = len; UChar *buf = php_stream_read_unicode_chars(stream, &buflen); @@ -1761,7 +1761,7 @@ } RETURN_UNICODEL(buf, buflen, 0); - } else { + } else { /* IS_STRING */ char *buf = emalloc(len + 1); int buflen = php_stream_read(stream, buf, len); http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/streamsfuncs.c?r1=1.72&r2=1.73&diff_format=u Index: php-src/ext/standard/streamsfuncs.c diff -u php-src/ext/standard/streamsfuncs.c:1.72 php-src/ext/standard/streamsfuncs.c:1.73 --- php-src/ext/standard/streamsfuncs.c:1.72 Sun Mar 26 04:40:11 2006 +++ php-src/ext/standard/streamsfuncs.c Wed Mar 29 01:20:42 2006 @@ -17,7 +17,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: streamsfuncs.c,v 1.72 2006/03/26 04:40:11 pollita Exp $ */ +/* $Id: streamsfuncs.c,v 1.73 2006/03/29 01:20:42 pollita Exp $ */ #include "php.h" #include "php_globals.h" @@ -489,11 +489,11 @@ add_assoc_zval(return_value, "write_filters", newval); } - if (php_stream_reads_unicode(stream)) { + if (stream->readbuf_type == IS_UNICODE) { int readbuf_len = u_countChar32(stream->readbuf.u + stream->readpos, stream->writepos - stream->readpos); add_assoc_long(return_value, "unread_bytes", UBYTES(stream->writepos - stream->readpos)); add_assoc_long(return_value, "unread_chars", readbuf_len); - } else { + } else { /* IS_STRING */ add_assoc_long(return_value, "unread_bytes", stream->writepos - stream->readpos); add_assoc_long(return_value, "unread_chars", stream->writepos - stream->readpos); } @@ -1275,7 +1275,7 @@ php_stream_from_zval(stream, &zstream); - if (php_stream_reads_unicode(stream)) { + if (stream->readbuf_type == IS_UNICODE) { UChar *buf; UChar *d = NULL; int dlen = 0; @@ -1294,7 +1294,7 @@ } RETURN_UNICODEL(buf, buf_size, 0); - } else { + } else { /* IS_STRING */ char *buf; char *d = NULL; int dlen = 0; @@ -1462,6 +1462,67 @@ } /* }}} */ +/* {{{ proto void stream_encoding(resource stream[, string encoding]) +Set character set for stream encoding +UTODO: Return current encoding charset +*/ +PHP_FUNCTION(stream_encoding) +{ + zval *zstream; + php_stream *stream; + char *encoding = NULL; + int encoding_len = 0; + int remove_read_tail = 0, remove_write_tail = 0; + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "r|s", &zstream, &encoding, &encoding_len) == FAILURE) { + return; + } + + php_stream_from_zval(stream, &zstream); + + /* Double check that the target encoding is legal before attempting anything */ + + if (stream->readfilters.tail) { + if (stream->readfilters.tail->fops == &php_unicode_from_string_filter_ops) { + /* Remove the current unicode.from.* filter, + the filter layer will transcode anything in the read buffer back to binary + or invalidate the read buffer */ + remove_read_tail = 1; + } else if (stream->readbuf_type == IS_UNICODE) { + /* There's an encoding on the stream already, but then there's filtering happening after that point + It's asking too much for PHP to figure out what the user wants, throw an error back in their face */ + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot change encoding on filtered stream"); + RETURN_FALSE; + } + } + + if (stream->writefilters.tail) { + if (stream->writefilters.tail->fops == &php_unicode_to_string_filter_ops) { + /* Remove the current unicode.to.* filter */ + remove_write_tail = 1; + } else if ((stream->writefilters.tail->fops->flags & PSFO_FLAG_OUTPUTS_UNICODE) == 0) { + /* conversion to binary is happening, them another filter is doing something + bailout for same reason as read filters */ + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot change encoding on filtered stream"); + RETURN_FALSE; + } + } + + if (remove_read_tail) { + php_stream_filter_remove(stream->readfilters.tail, 1 TSRMLS_CC); + } + if (remove_write_tail) { + php_stream_filter_remove(stream->writefilters.tail, 1 TSRMLS_CC); + } + + /* UTODO: Allow overriding error handling for converters */ + php_stream_encoding_apply(stream, 1, encoding, UG(from_error_mode), UG(from_subst_char)); + php_stream_encoding_apply(stream, 0, encoding, UG(to_error_mode), NULL); + + RETURN_TRUE; +} +/* }}} */ + /* * Local variables: * tab-width: 4 http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/streamsfuncs.h?r1=1.14&r2=1.15&diff_format=u Index: php-src/ext/standard/streamsfuncs.h diff -u php-src/ext/standard/streamsfuncs.h:1.14 php-src/ext/standard/streamsfuncs.h:1.15 --- php-src/ext/standard/streamsfuncs.h:1.14 Sun Jan 1 13:09:55 2006 +++ php-src/ext/standard/streamsfuncs.h Wed Mar 29 01:20:42 2006 @@ -16,7 +16,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: streamsfuncs.h,v 1.14 2006/01/01 13:09:55 sniper Exp $ */ +/* $Id: streamsfuncs.h,v 1.15 2006/03/29 01:20:42 pollita Exp $ */ /* Flags for stream_socket_client */ #define PHP_STREAM_CLIENT_PERSISTENT 1 @@ -53,6 +53,7 @@ PHP_FUNCTION(stream_filter_prepend); PHP_FUNCTION(stream_filter_append); PHP_FUNCTION(stream_filter_remove); +PHP_FUNCTION(stream_encoding); PHP_FUNCTION(stream_socket_enable_crypto); PHP_FUNCTION(stream_socket_pair); http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/config.m4?r1=1.6&r2=1.7&diff_format=u Index: php-src/ext/unicode/config.m4 diff -u php-src/ext/unicode/config.m4:1.6 php-src/ext/unicode/config.m4:1.7 --- php-src/ext/unicode/config.m4:1.6 Sun Mar 26 11:06:24 2006 +++ php-src/ext/unicode/config.m4 Wed Mar 29 01:20:43 2006 @@ -1,7 +1,7 @@ dnl -dnl $Id: config.m4,v 1.6 2006/03/26 11:06:24 derick Exp $ +dnl $Id: config.m4,v 1.7 2006/03/29 01:20:43 pollita Exp $ dnl PHP_SUBST(UNICODE_SHARED_LIBADD) AC_DEFINE(HAVE_UNICODE, 1, [ ]) -PHP_NEW_EXTENSION(unicode, unicode.c locale.c unicode_filter.c unicode_iterators.c collator.c, $ext_shared) +PHP_NEW_EXTENSION(unicode, unicode.c locale.c unicode_iterators.c collator.c, $ext_shared) http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/config.w32?r1=1.6&r2=1.7&diff_format=u Index: php-src/ext/unicode/config.w32 diff -u php-src/ext/unicode/config.w32:1.6 php-src/ext/unicode/config.w32:1.7 --- php-src/ext/unicode/config.w32:1.6 Sun Mar 26 11:06:24 2006 +++ php-src/ext/unicode/config.w32 Wed Mar 29 01:20:43 2006 @@ -1,5 +1,5 @@ -// $Id: config.w32,v 1.6 2006/03/26 11:06:24 derick Exp $ +// $Id: config.w32,v 1.7 2006/03/29 01:20:43 pollita Exp $ // vim:ft=javascript -EXTENSION("unicode", "unicode.c unicode_filter.c unicode_iterators.c collator.c locale.c"); +EXTENSION("unicode", "unicode.c unicode_iterators.c collator.c locale.c"); AC_DEFINE('HAVE_UNICODE', 1, 'ICU API extension'); http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/php_unicode.h?r1=1.6&r2=1.7&diff_format=u Index: php-src/ext/unicode/php_unicode.h diff -u php-src/ext/unicode/php_unicode.h:1.6 php-src/ext/unicode/php_unicode.h:1.7 --- php-src/ext/unicode/php_unicode.h:1.6 Sun Mar 26 11:06:24 2006 +++ php-src/ext/unicode/php_unicode.h Wed Mar 29 01:20:43 2006 @@ -14,7 +14,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: php_unicode.h,v 1.6 2006/03/26 11:06:24 derick Exp $ */ +/* $Id: php_unicode.h,v 1.7 2006/03/29 01:20:43 pollita Exp $ */ #ifndef PHP_UNICODE_H #define PHP_UNICODE_H @@ -67,7 +67,6 @@ PHP_METHOD(collator, __construct); void php_init_collation(TSRMLS_D); -extern php_stream_filter_factory php_unicode_filter_factory; #ifdef __cplusplus } // extern "C" http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode.c?r1=1.18&r2=1.19&diff_format=u Index: php-src/ext/unicode/unicode.c diff -u php-src/ext/unicode/unicode.c:1.18 php-src/ext/unicode/unicode.c:1.19 --- php-src/ext/unicode/unicode.c:1.18 Mon Mar 27 03:19:30 2006 +++ php-src/ext/unicode/unicode.c Wed Mar 29 01:20:43 2006 @@ -15,7 +15,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: unicode.c,v 1.18 2006/03/27 03:19:30 andrei Exp $ */ +/* $Id: unicode.c,v 1.19 2006/03/29 01:20:43 pollita Exp $ */ #include "php_unicode.h" #if HAVE_UNICODE @@ -273,10 +273,6 @@ /* {{{ PHP_MINIT_FUNCTION */ PHP_MINIT_FUNCTION(unicode) { - if (php_stream_filter_register_factory("unicode.*", &php_unicode_filter_factory TSRMLS_CC) == FAILURE) { - return FAILURE; - } - php_register_unicode_iterators(TSRMLS_C); php_init_collation(TSRMLS_C); @@ -287,9 +283,6 @@ /* {{{ PHP_MSHUTDOWN_FUNCTION */ PHP_MSHUTDOWN_FUNCTION(unicode) { - if (php_stream_filter_unregister_factory("unicode.*" TSRMLS_CC) == FAILURE) { - return FAILURE; - } /* add your stuff here */ http://cvs.php.net/viewcvs.cgi/php-src/main/main.c?r1=1.683&r2=1.684&diff_format=u Index: php-src/main/main.c diff -u php-src/main/main.c:1.683 php-src/main/main.c:1.684 --- php-src/main/main.c:1.683 Sun Mar 26 06:19:24 2006 +++ php-src/main/main.c Wed Mar 29 01:20:43 2006 @@ -18,7 +18,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: main.c,v 1.683 2006/03/26 06:19:24 andrei Exp $ */ +/* $Id: main.c,v 1.684 2006/03/29 01:20:43 pollita Exp $ */ /* {{{ includes */ @@ -1611,6 +1611,12 @@ return FAILURE; } + /* Initialize unicode filters */ + if (php_stream_filter_register_factory("unicode.*", &php_unicode_filter_factory TSRMLS_CC) == FAILURE) { + php_printf("PHP: Unable to initialize unicode stream filters.\n"); + return FAILURE; + } + /* initialize registry for images to be used in phpinfo() (this uses configuration parameters from php.ini) */ @@ -1744,6 +1750,7 @@ zend_shutdown(TSRMLS_C); + /* Destroys filter & transport registries too */ php_shutdown_stream_wrappers(module_number TSRMLS_CC); php_shutdown_info_logos(); http://cvs.php.net/viewcvs.cgi/php-src/main/php_streams.h?r1=1.108&r2=1.109&diff_format=u Index: php-src/main/php_streams.h diff -u php-src/main/php_streams.h:1.108 php-src/main/php_streams.h:1.109 --- php-src/main/php_streams.h:1.108 Fri Mar 24 19:22:24 2006 +++ php-src/main/php_streams.h Wed Mar 29 01:20:43 2006 @@ -16,7 +16,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: php_streams.h,v 1.108 2006/03/24 19:22:24 pollita Exp $ */ +/* $Id: php_streams.h,v 1.109 2006/03/29 01:20:43 pollita Exp $ */ #ifndef PHP_STREAMS_H #define PHP_STREAMS_H @@ -206,12 +206,9 @@ php_stream_context *context; int flags; /* PHP_STREAM_FLAG_XXX */ - /* unicode */ - UConverter *input_encoding; - UConverter *output_encoding; - /* buffer */ off_t position; /* of underlying stream */ + zend_uchar readbuf_type; zstr readbuf; /* readbuf.s or readbuf.u */ size_t readbuflen; /* Length in units (char or UChar) */ off_t readpos; /* Position in units (char or UChar) */ @@ -252,8 +249,6 @@ #define php_stream_from_zval_no_verify(xstr, ppzval) (xstr) = (php_stream*)zend_fetch_resource((ppzval) TSRMLS_CC, -1, "stream", NULL, 2, php_file_le_stream(), php_file_le_pstream()) #define PS_ULEN(is_unicode, len) ((is_unicode) ? UBYTES(len) : (len)) -#define php_stream_reads_unicode(stream) ((stream->input_encoding) ? 1 : 0) -#define php_stream_writes_unicode(stream) ((stream->output_encoding) ? 1 : 0) BEGIN_EXTERN_C() PHPAPI int php_stream_from_persistent_id(const char *persistent_id, php_stream **stream TSRMLS_DC); http://cvs.php.net/viewcvs.cgi/php-src/main/streams/filter.c?r1=1.27&r2=1.28&diff_format=u Index: php-src/main/streams/filter.c diff -u php-src/main/streams/filter.c:1.27 php-src/main/streams/filter.c:1.28 --- php-src/main/streams/filter.c:1.27 Sun Mar 26 06:19:24 2006 +++ php-src/main/streams/filter.c Wed Mar 29 01:20:43 2006 @@ -16,7 +16,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: filter.c,v 1.27 2006/03/26 06:19:24 andrei Exp $ */ +/* $Id: filter.c,v 1.28 2006/03/29 01:20:43 pollita Exp $ */ #include "php.h" #include "php_globals.h" @@ -396,50 +396,63 @@ chain->tail = filter; filter->chain = chain; - if (&(stream->readfilters) == chain && (stream->writepos - stream->readpos) > 0) { + if (&(stream->readfilters) == chain) { /* Let's going ahead and wind anything in the buffer through this filter */ php_stream_bucket_brigade brig_in = { NULL, NULL }, brig_out = { NULL, NULL }; php_stream_bucket_brigade *brig_inp = &brig_in, *brig_outp = &brig_out; - php_stream_filter_status_t status; + php_stream_filter_status_t status = PSFS_FEED_ME; php_stream_bucket *bucket; size_t consumed = 0; - if (stream->input_encoding) { - bucket = php_stream_bucket_new_unicode(stream, stream->readbuf.u + stream->readpos, stream->writepos - stream->readpos, 0, 0 TSRMLS_CC); - } else { - bucket = php_stream_bucket_new(stream, stream->readbuf.s + stream->readpos, stream->writepos - stream->readpos, 0, 0 TSRMLS_CC); + if ((stream->writepos - stream->readpos) > 0) { + if (stream->readbuf_type == IS_UNICODE) { + bucket = php_stream_bucket_new_unicode(stream, stream->readbuf.u + stream->readpos, stream->writepos - stream->readpos, 0, 0 TSRMLS_CC); + } else { + bucket = php_stream_bucket_new(stream, stream->readbuf.s + stream->readpos, stream->writepos - stream->readpos, 0, 0 TSRMLS_CC); + } + php_stream_bucket_append(brig_inp, bucket TSRMLS_CC); + status = filter->fops->filter(stream, filter, brig_inp, brig_outp, &consumed, PSFS_FLAG_NORMAL TSRMLS_CC); + + if (stream->readpos + consumed > stream->writepos || consumed < 0) { + /* No behaving filter should cause this. */ + status = PSFS_ERR_FATAL; + } } - php_stream_bucket_append(brig_inp, bucket TSRMLS_CC); - status = filter->fops->filter(stream, filter, brig_inp, brig_outp, &consumed, PSFS_FLAG_NORMAL TSRMLS_CC); - if (stream->readpos + consumed > stream->writepos || consumed < 0) { - /* No behaving filter should cause this. */ - status = PSFS_ERR_FATAL; - } - - switch (status) { - case PSFS_ERR_FATAL: - /* If this first cycle simply fails then there's something wrong with the filter. - Pull the filter off the chain and leave the read buffer alone. */ - if (chain->head == filter) { - chain->head = NULL; - chain->tail = NULL; - } else { - filter->prev->next = NULL; - chain->tail = filter->prev; - } - php_stream_bucket_unlink(bucket TSRMLS_CC); - php_stream_bucket_delref(bucket TSRMLS_CC); - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Filter failed to process pre-buffered data. Not adding to filterchain."); - break; - case PSFS_FEED_ME: + if (status == PSFS_ERR_FATAL) { + /* If this first cycle simply fails then there's something wrong with the filter. + Pull the filter off the chain and leave the read buffer alone. */ + if (chain->head == filter) { + chain->head = NULL; + chain->tail = NULL; + } else { + filter->prev->next = NULL; + chain->tail = filter->prev; + } + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_delref(bucket TSRMLS_CC); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Filter failed to process pre-buffered data. Not adding to filterchain."); + } else { + /* This filter addition may change the readbuffer type. + Since all the previously held data is in the bucket brigade, + we can reappropriate the buffer that already exists (if one does) */ + if (stream->readbuf_type == IS_UNICODE && (filter->fops->flags & PSFO_FLAG_OUTPUTS_UNICODE) == 0) { + /* Buffer is currently based on unicode characters, but filter only outputs STRING adjust counting */ + stream->readbuf_type = IS_STRING; + stream->readbuflen *= UBYTES(1); + } else if (stream->readbuf_type == IS_STRING && (filter->fops->flags & PSFO_FLAG_OUTPUTS_STRING) == 0) { + /* Buffer is currently based on binary characters, but filter only outputs UNICODE adjust counting */ + stream->readbuf_type = IS_UNICODE; + stream->readbuflen /= UBYTES(1); + } + + if (status == PSFS_FEED_ME) { /* We don't actually need data yet, leave this filter in a feed me state until data is needed. Reset stream's internal read buffer since the filter is "holding" it. */ stream->readpos = 0; stream->writepos = 0; - break; - case PSFS_PASS_ON: + } else if (status == PSFS_PASS_ON) { /* Put any filtered data onto the readbuffer stack. Previously read data has been at least partially consumed. */ stream->readpos += consumed; @@ -454,23 +467,20 @@ bucket = brig_outp->head; /* Convert for stream type */ - if (bucket->buf_type != IS_UNICODE && stream->input_encoding) { - /* Stream expects unicode, convert using stream encoding */ - php_stream_bucket_convert(bucket, IS_UNICODE, stream->input_encoding); - } else if (bucket->buf_type == IS_UNICODE && !stream->input_encoding) { - /* Stream expects binary, filter provided unicode, just take the buffer as is */ - php_stream_bucket_convert_notranscode(bucket, IS_STRING); + if (bucket->buf_type != stream->readbuf_type) { + /* Stream expects different type than bucket contains, convert slopily */ + php_stream_bucket_convert_notranscode(bucket, stream->readbuf_type); } /* Grow buffer to hold this bucket if need be. TODO: See warning in main/stream/streams.c::php_stream_fill_read_buffer */ if (stream->readbuflen - stream->writepos < bucket->buflen) { stream->readbuflen += bucket->buflen; - stream->readbuf.v = perealloc(stream->readbuf.v, PS_ULEN(stream->input_encoding, stream->readbuflen), stream->is_persistent); + stream->readbuf.v = perealloc(stream->readbuf.v, PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->readbuflen), stream->is_persistent); } /* Append to readbuf */ - if (stream->input_encoding) { + if (stream->readbuf_type == IS_UNICODE) { memcpy(stream->readbuf.u + stream->writepos, bucket->buf.u, UBYTES(bucket->buflen)); } else { memcpy(stream->readbuf.s + stream->writepos, bucket->buf.s, bucket->buflen); @@ -480,10 +490,9 @@ php_stream_bucket_unlink(bucket TSRMLS_CC); php_stream_bucket_delref(bucket TSRMLS_CC); } - break; + } } - - } + } /* end of readfilters specific code */ } PHPAPI int _php_stream_filter_check_chain(php_stream_filter_chain *chain TSRMLS_DC) @@ -597,26 +606,23 @@ /* Dump any newly flushed data to the read buffer */ if (stream->readpos > stream->chunk_size) { /* Back the buffer up */ - memcpy(stream->readbuf.s, stream->readbuf.s + PS_ULEN(stream->input_encoding, stream->readpos), PS_ULEN(stream->input_encoding, stream->writepos - stream->readpos)); + memcpy(stream->readbuf.s, stream->readbuf.s + PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->readpos), PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->writepos - stream->readpos)); stream->writepos -= stream->readpos; stream->readpos = 0; } if (flushed_size > (stream->readbuflen - stream->writepos)) { /* Grow the buffer */ - stream->readbuf.v = perealloc(stream->readbuf.v, PS_ULEN(stream->input_encoding, stream->writepos + flushed_size + stream->chunk_size), stream->is_persistent); + stream->readbuf.v = perealloc(stream->readbuf.v, PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->writepos + flushed_size + stream->chunk_size), stream->is_persistent); } while ((bucket = inp->head)) { /* Convert if necessary */ - if (bucket->buf_type != IS_UNICODE && stream->input_encoding) { - /* Stream expects unicode, convert using stream encoding */ - php_stream_bucket_convert(bucket, IS_UNICODE, stream->input_encoding); - } else if (bucket->buf_type == IS_UNICODE && !stream->input_encoding) { - /* Stream expects binary, filter provided unicode, just take the buffer as is */ - php_stream_bucket_convert_notranscode(bucket, IS_STRING); + if (bucket->buf_type != stream->readbuf_type) { + /* Stream expects different type than what's in bucket, convert slopily */ + php_stream_bucket_convert_notranscode(bucket, stream->readbuf_type); } /* Append to readbuf */ - if (stream->input_encoding) { + if (stream->readbuf_type == IS_UNICODE) { memcpy(stream->readbuf.u + stream->writepos, bucket->buf.u, UBYTES(bucket->buflen)); } else { memcpy(stream->readbuf.s + stream->writepos, bucket->buf.s, bucket->buflen); @@ -632,13 +638,8 @@ while ((bucket = inp->head)) { /* Convert if necessary */ if (bucket->buf_type == IS_UNICODE) { - if (stream->output_encoding) { - /* Stream has a configured output encoding, convert to appropriate type */ - php_stream_bucket_convert(bucket, IS_STRING, stream->output_encoding); - } else { - /* Stream is binary, write ugly UChars as is */ - php_stream_bucket_convert_notranscode(bucket, IS_STRING); - } + /* Force data to binary, adjusting buflen */ + php_stream_bucket_convert_notranscode(bucket, IS_STRING); } /* Must be binary by this point */ @@ -654,6 +655,9 @@ PHPAPI php_stream_filter *php_stream_filter_remove(php_stream_filter *filter, int call_dtor TSRMLS_DC) { + /* UTODO: Figure out a sane way to "defilter" so that unicode converters can be swapped around + For now, at least fopen(,'b') + stream_encoding($fp, 'charset') works since there's nothing to remove */ + if (filter->prev) { filter->prev->next = filter->next; } else { @@ -770,6 +774,42 @@ return FAILURE; } +PHPAPI int _php_stream_encoding_apply(php_stream *stream, int writechain, const char *encoding, uint16_t error_mode, UChar *subst TSRMLS_DC) +{ + int encoding_len = strlen(encoding); + int buflen = sizeof("unicode.from.") + encoding_len - 1; /* might be "to", but "from" is long enough for both */ + char *buf = emalloc(buflen + 1); + php_stream_filter *filter; + zval *filterparams; + + if (writechain) { + memcpy(buf, "unicode.to.", sizeof("unicode.to.") - 1); + memcpy(buf + sizeof("unicode.to.") - 1, encoding, encoding_len + 1); + } else { + memcpy(buf, "unicode.from.", sizeof("unicode.from.") - 1); + memcpy(buf + sizeof("unicode.from.") - 1, encoding, encoding_len + 1); + } + + ALLOC_INIT_ZVAL(filterparams); + array_init(filterparams); + add_assoc_long(filterparams, "error_mode", error_mode); + if (subst) { + add_assoc_unicode(filterparams, "subst_char", subst, 1); + } + filter = php_stream_filter_create(buf, filterparams, php_stream_is_persistent(stream) TSRMLS_CC); + efree(buf); + zval_ptr_dtor(&filterparams); + + if (!filter) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to apply encoding for charset: %s\n", encoding); + return FAILURE; + } + + php_stream_filter_append(writechain ? &stream->writefilters : &stream->readfilters, filter); + + return SUCCESS; +} + /* * Local variables: * tab-width: 4 http://cvs.php.net/viewcvs.cgi/php-src/main/streams/php_stream_filter_api.h?r1=1.16&r2=1.17&diff_format=u Index: php-src/main/streams/php_stream_filter_api.h diff -u php-src/main/streams/php_stream_filter_api.h:1.16 php-src/main/streams/php_stream_filter_api.h:1.17 --- php-src/main/streams/php_stream_filter_api.h:1.16 Mon Mar 13 04:40:11 2006 +++ php-src/main/streams/php_stream_filter_api.h Wed Mar 29 01:20:43 2006 @@ -19,7 +19,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: php_stream_filter_api.h,v 1.16 2006/03/13 04:40:11 pollita Exp $ */ +/* $Id: php_stream_filter_api.h,v 1.17 2006/03/29 01:20:43 pollita Exp $ */ /* The filter API works on the principle of "Bucket-Brigades". This is * partially inspired by the Apache 2 method of doing things, although @@ -157,6 +157,7 @@ PHPAPI php_stream_filter *php_stream_filter_remove(php_stream_filter *filter, int call_dtor TSRMLS_DC); PHPAPI void php_stream_filter_free(php_stream_filter *filter TSRMLS_DC); PHPAPI php_stream_filter *_php_stream_filter_alloc(php_stream_filter_ops *fops, void *abstract, int persistent STREAMS_DC TSRMLS_DC); +PHPAPI int _php_stream_encoding_apply(php_stream *stream, int writechain, const char *encoding, uint16_t error_mode, UChar *subst TSRMLS_DC); END_EXTERN_C() #define php_stream_filter_alloc(fops, thisptr, persistent) _php_stream_filter_alloc((fops), (thisptr), (persistent) STREAMS_CC TSRMLS_CC) #define php_stream_filter_alloc_rel(fops, thisptr, persistent) _php_stream_filter_alloc((fops), (thisptr), (persistent) STREAMS_REL_CC TSRMLS_CC) @@ -165,6 +166,8 @@ #define php_stream_filter_flush(filter, finish) _php_stream_filter_flush((filter), (finish) TSRMLS_CC) #define php_stream_filter_check_chain(chain) _php_stream_filter_check_chain((chain) TSRMLS_CC) #define php_stream_filter_output_prefer_unicode(filter) _php_stream_filter_output_prefer_unicode((filter) TSRMLS_CC) +#define php_stream_encoding_apply(stream, writechain, encoding, error_mode, subst) \ + _php_stream_encoding_apply((stream), (writechain), (encoding), (error_mode), (subst) TSRMLS_CC) #define php_stream_is_filtered(stream) ((stream)->readfilters.head || (stream)->writefilters.head) @@ -179,6 +182,12 @@ PHPAPI php_stream_filter *php_stream_filter_create(const char *filtername, zval *filterparams, int persistent TSRMLS_DC); END_EXTERN_C() +/* unicode_filter.c exports */ +extern php_stream_filter_ops php_unicode_to_string_filter_ops; +extern php_stream_filter_ops php_unicode_from_string_filter_ops; +extern php_stream_filter_ops php_unicode_tidy_filter_ops; +extern php_stream_filter_factory php_unicode_filter_factory; + /* * Local variables: * tab-width: 4 http://cvs.php.net/viewcvs.cgi/php-src/main/streams/streams.c?r1=1.114&r2=1.115&diff_format=u Index: php-src/main/streams/streams.c diff -u php-src/main/streams/streams.c:1.114 php-src/main/streams/streams.c:1.115 --- php-src/main/streams/streams.c:1.114 Sun Mar 26 06:19:24 2006 +++ php-src/main/streams/streams.c Wed Mar 29 01:20:43 2006 @@ -19,7 +19,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: streams.c,v 1.114 2006/03/26 06:19:24 andrei Exp $ */ +/* $Id: streams.c,v 1.115 2006/03/29 01:20:43 pollita Exp $ */ #define _GNU_SOURCE #include "php.h" @@ -239,6 +239,7 @@ ret->abstract = abstract; ret->is_persistent = persistent_id ? 1 : 0; ret->chunk_size = FG(def_chunk_size); + ret->readbuf_type = IS_STRING; if (FG(auto_detect_line_endings)) { ret->flags |= PHP_STREAM_FLAG_DETECT_EOL; @@ -483,12 +484,9 @@ * stream read buffer */ while (brig_inp->head) { bucket = brig_inp->head; - if (bucket->buf_type != IS_UNICODE && stream->input_encoding) { - /* Stream expects unicode, convert using stream encoding */ - php_stream_bucket_convert(bucket, IS_UNICODE, stream->input_encoding); - } else if (bucket->buf_type == IS_UNICODE && !stream->input_encoding) { - /* Stream expects binary, filter provided unicode, just take the buffer as is */ - php_stream_bucket_convert_notranscode(bucket, IS_STRING); + if (bucket->buf_type != stream->readbuf_type) { + /* Stream expects different datatype than bucket has, convert slopily */ + php_stream_bucket_convert_notranscode(bucket, stream->readbuf_type); } /* Bucket type now matches stream type */ @@ -496,9 +494,9 @@ * TODO: this can fail for persistent streams */ if (stream->readbuflen - stream->writepos < bucket->buflen) { stream->readbuflen += bucket->buflen; - stream->readbuf.v = perealloc(stream->readbuf.v, PS_ULEN(stream->input_encoding, stream->readbuflen), stream->is_persistent); + stream->readbuf.v = perealloc(stream->readbuf.v, PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->readbuflen), stream->is_persistent); } - memcpy(stream->readbuf.s + stream->writepos, bucket->buf.s, PS_ULEN(stream->input_encoding, bucket->buflen)); + memcpy(stream->readbuf.s + stream->writepos, bucket->buf.s, PS_ULEN(stream->readbuf_type == IS_UNICODE, bucket->buflen)); stream->writepos += bucket->buflen; php_stream_bucket_unlink(bucket TSRMLS_CC); @@ -530,46 +528,6 @@ } efree(chunk_buf); - } else if (stream->input_encoding) { /* Unfiltered Unicode stream */ - /* is there enough data in the buffer ? */ - if (stream->writepos - stream->readpos < (off_t)size) { - char *binbuf; - UChar *ubuf; - int binbuf_len, ubuf_len; - size_t toread = (size > stream->chunk_size) ? size : stream->chunk_size; - UErrorCode status = U_ZERO_ERROR; - - /* Read stream data into temporary buffer, then convert to unicode - TODO: This can be improved */ - binbuf = emalloc(toread + 1); - binbuf_len = stream->ops->read(stream, binbuf, toread TSRMLS_CC); - if (binbuf_len == (size_t)-1) { - /* Failure */ - efree(binbuf); - return; - } - /* Convert to unicode */ - zend_convert_to_unicode(stream->input_encoding, &ubuf, &ubuf_len, binbuf, binbuf_len, &status); - efree(binbuf); - - /* reduce buffer memory consumption if possible, to avoid a realloc */ - if (stream->readbuf.u && stream->readbuflen - stream->writepos < stream->chunk_size) { - memmove(stream->readbuf.u, stream->readbuf.u + stream->readpos, UBYTES(stream->readbuflen - stream->readpos)); - stream->writepos -= stream->readpos; - stream->readpos = 0; - } - - /* grow the buffer if required - * TODO: this can fail for persistent streams */ - if (stream->readbuflen - stream->writepos < ubuf_len) { - stream->readbuflen += ((stream->chunk_size > ubuf_len) ? stream->chunk_size : ubuf_len); - stream->readbuf.u = (UChar*)perealloc(stream->readbuf.u, UBYTES(stream->readbuflen), stream->is_persistent); - } - - memcpy(stream->readbuf.u + stream->writepos, ubuf, UBYTES(ubuf_len)); - efree(ubuf); - stream->writepos += ubuf_len; - } } else { /* Unfiltered Binary stream */ /* is there enough data in the buffer ? */ if (stream->writepos - stream->readpos < (off_t)size) { @@ -609,13 +567,13 @@ * drain the remainder of the buffer before using the "raw" read mode for * the excess */ if (stream->writepos - stream->readpos > 0) { - toread = PS_ULEN(stream->input_encoding, stream->writepos - stream->readpos); + toread = PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->writepos - stream->readpos); if (toread > size) { toread = size; } - if (stream->input_encoding) { + if (stream->readbuf_type == IS_UNICODE) { /* Sloppy read, anyone using php_stream_read() on a unicode stream * had better know what they're doing */ @@ -647,7 +605,7 @@ } if (toread > 0) { - if (php_stream_reads_unicode(stream)) { + if (stream->readbuf_type == IS_UNICODE) { /* Sloppy read, anyone using php_stream_read() on a unicode stream * had better know what they're doing */ @@ -685,7 +643,7 @@ { size_t toread = 0, didread = 0, string_length = 0; - if (!stream->input_encoding) { + if (stream->readbuf_type != IS_UNICODE) { return -1; } @@ -763,7 +721,7 @@ int buflen = size; size_t toread = 0, didread = 0, string_length = 0; - if (!stream->input_encoding) { + if (stream->readbuf_type != IS_UNICODE) { return NULL; } @@ -921,7 +879,7 @@ char *readptr, *buf = zbuf.s; if (!buf) { - readptr = stream->readbuf.s + PS_ULEN(stream->input_encoding, stream->readpos); + readptr = stream->readbuf.s + PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->readpos); avail = stream->writepos - stream->readpos; } else { readptr = zbuf.s; @@ -929,7 +887,7 @@ } if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL) { - if (stream->input_encoding) { + if (stream->readbuf_type == IS_UNICODE) { cr = (char*)u_memchr((UChar*)readptr, '\r', avail); lf = (char*)u_memchr((UChar*)readptr, '\n', avail); } else { @@ -948,10 +906,10 @@ eol = lf; } } else if (stream->flags & PHP_STREAM_FLAG_EOL_MAC) { - eol = stream->input_encoding ? u_memchr((UChar*)readptr, '\r', avail) : memchr(readptr, '\r', avail); + eol = (stream->readbuf_type == IS_UNICODE) ? u_memchr((UChar*)readptr, '\r', avail) : memchr(readptr, '\r', avail); } else { /* unix (and dos) line endings */ - eol = stream->input_encoding ? u_memchr((UChar*)readptr, '\n', avail) : memchr(readptr, '\n', avail); + eol = (stream->readbuf_type == IS_UNICODE) ? u_memchr((UChar*)readptr, '\n', avail) : memchr(readptr, '\n', avail); } return (void*)eol; @@ -967,7 +925,7 @@ size_t current_buf_size = 0; size_t total_copied = 0; int grow_mode = 0; - int is_unicode = php_stream_reads_unicode(stream); + int is_unicode = stream->readbuf_type == IS_UNICODE; int split_surrogate = 0; zstr bufstart = buf; @@ -1042,8 +1000,8 @@ * than 8K, we waste 1 byte per additional 8K or so. * That seems acceptable to me, to avoid making this code * hard to follow */ - bufstart.s = erealloc(bufstart.s, PS_ULEN(stream->input_encoding, current_buf_size + cpysz + 1)); - buf.s = bufstart.s + PS_ULEN(stream->input_encoding, total_copied); + bufstart.s = erealloc(bufstart.s, PS_ULEN(stream->readbuf_type == IS_UNICODE, current_buf_size + cpysz + 1)); + buf.s = bufstart.s + PS_ULEN(stream->readbuf_type == IS_UNICODE, total_copied); current_buf_size += cpysz + 1; } else { if (cpysz >= maxlen - 1) { @@ -1177,7 +1135,7 @@ size_t toread; int skip = 0; - if (!php_stream_reads_unicode(stream)) { + if (stream->readbuf_type != IS_UNICODE) { return NULL; } @@ -1241,8 +1199,7 @@ /* Writes a buffer directly to a stream, using multiple of the chunk size */ static size_t _php_stream_write_buffer(php_stream *stream, int buf_type, zstr buf, int buflen TSRMLS_DC) { - size_t didwrite = 0, towrite, justwrote, shouldwrite, buflen_orig = buflen; - zstr buf_orig = buf; + size_t didwrite = 0, towrite, justwrote, shouldwrite; char *freeme = NULL; /* if we have a seekable stream we need to ensure that data is written at the @@ -1254,24 +1211,9 @@ stream->ops->seek(stream, stream->position, SEEK_SET, &stream->position TSRMLS_CC); } - if (stream->output_encoding && buf_type == IS_UNICODE) { - char *dest; - int destlen, num_conv; - UErrorCode status = U_ZERO_ERROR; - - num_conv = zend_convert_from_unicode(stream->output_encoding, &dest, &destlen, buf.u, buflen, &status); - if (U_FAILURE(status)) { - int32_t offset = u_countChar32(buf.u, num_conv); - - zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", stream->output_encoding, ZEND_FROM_UNICODE, offset, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); - } - freeme = buf.s = dest; - buflen = destlen; - } else { - /* Sloppy handling, make it a binary buffer */ - if (buf_type != IS_STRING) { - buflen = UBYTES(buflen); - } + /* Sloppy handling, make it a binary buffer */ + if (buf_type == IS_UNICODE) { + buflen = UBYTES(buflen); } shouldwrite = buflen; @@ -1300,32 +1242,7 @@ } } - - if (stream->output_encoding) { - /* Map didwrite back to the original character count */ - if (didwrite == shouldwrite) { - /* Everything wrote okay, no need to count */ - didwrite = buflen_orig; - } else { - UErrorCode status = U_ZERO_ERROR; - char *t = freeme; - const UChar *p = buf_orig.u; - - switch (ucnv_getType(stream->output_encoding)) { - case UCNV_SBCS: - case UCNV_LATIN_1: - case UCNV_US_ASCII: - /* 1:1 character->byte mapping, didwrite really does mean the number of characters written */ - break; - default: - /* Reconvert into junk buffer to see where conversion stops in source string */ - ucnv_resetFromUnicode(stream->output_encoding); - ucnv_fromUnicode(stream->output_encoding, &t, t + didwrite, &p, p + buflen_orig, NULL, TRUE, &status); - /* p stops at the first unconvertable UChar when t runs out of space */ - didwrite = p - buf_orig.u; - } - } - } else if (buf_type == IS_UNICODE) { + if (buf_type == IS_UNICODE) { /* Was slopily converted */ didwrite /= UBYTES(1); } @@ -2274,50 +2191,15 @@ if (stream && strchr(implicit_mode, 't') && UG(unicode)) { if (strchr(implicit_mode, 'w') || strchr(implicit_mode, 'a') || strchr(implicit_mode, '+')) { char *encoding = (context && context->output_encoding) ? context->output_encoding : "utf8"; - UErrorCode status = U_ZERO_ERROR; - stream->output_encoding = ucnv_open(encoding, &status); - if (U_FAILURE(status)) { - switch (status) { - case U_MEMORY_ALLOCATION_ERROR: - php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC, - "Unable to allocate memory for unicode output converter: %s", encoding); - break; - case U_FILE_ACCESS_ERROR: - php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC, - "Error loading unicode output converter: %s", encoding); - break; - default: - php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC, - "Unknown error starting unicode output converter: %s", encoding); - } - } else { - /* UTODO: (Maybe?) Allow overriding the default error handlers on a per-stream basis via context params */ - zend_set_converter_error_mode(stream->output_encoding, ZEND_FROM_UNICODE, UG(from_error_mode)); - zend_set_converter_subst_char(stream->output_encoding, UG(from_subst_char)); - } + /* UTODO: (Maybe?) Allow overriding the default error handlers on a per-stream basis via context params */ + php_stream_encoding_apply(stream, 1, encoding, UG(from_error_mode), UG(from_subst_char)); } if (strchr(implicit_mode, 'r') || strchr(implicit_mode, '+')) { char *encoding = (context && context->input_encoding) ? context->input_encoding : "utf8"; - UErrorCode status = U_ZERO_ERROR; - stream->input_encoding = ucnv_open(encoding, &status); - if (U_FAILURE(status)) { - switch (status) { - case U_MEMORY_ALLOCATION_ERROR: - php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC, - "Unable to allocate memory for unicode input converter: %s", encoding); - break; - case U_FILE_ACCESS_ERROR: - php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC, - "Error loading unicode input converter: %s", encoding); - break; - default: - php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC, - "Unknown error starting unicode input converter: %s", encoding); - } - } - /* UTODO: If/When Input error handling gets implemented, set the options on success */ + /* UTODO: (Maybe?) Allow overriding the default error handlers on a per-stream basis via context params */ + php_stream_encoding_apply(stream, 0, encoding, UG(to_error_mode), NULL); } } @@ -2334,6 +2216,7 @@ pefree(copy_of_path, persistent); } #endif + return stream; } /* }}} */ http://cvs.php.net/viewcvs.cgi/php-src/win32/build/config.w32?r1=1.52&r2=1.53&diff_format=u Index: php-src/win32/build/config.w32 diff -u php-src/win32/build/config.w32:1.52 php-src/win32/build/config.w32:1.53 --- php-src/win32/build/config.w32:1.52 Wed Mar 8 14:41:45 2006 +++ php-src/win32/build/config.w32 Wed Mar 29 01:20:43 2006 @@ -1,5 +1,5 @@ // vim:ft=javascript -// $Id: config.w32,v 1.52 2006/03/08 14:41:45 iliaa Exp $ +// $Id: config.w32,v 1.53 2006/03/29 01:20:43 pollita Exp $ // "Master" config file; think of it as a configure.in // equivalent. @@ -279,7 +279,7 @@ php_open_temporary_file.c php_logos.c output.c internal_functions.c php_sprintf.c"); ADD_SOURCES("main/streams", "streams.c cast.c memory.c filter.c plain_wrapper.c \ - userspace.c transports.c xp_socket.c mmap.c"); + userspace.c transports.c xp_socket.c mmap.c unicode_filter.c"); ADD_SOURCES("win32", "crypt_win32.c flock.c glob.c md5crypt.c pwd.c readdir.c \ registry.c select.c sendmail.c time.c wfile.c winutil.c wsyslog.c globals.c"); http://cvs.php.net/viewcvs.cgi/php-src/main/streams/unicode_filter.c?view=markup&rev=1.1 Index: php-src/main/streams/unicode_filter.c +++ php-src/main/streams/unicode_filter.c /* +----------------------------------------------------------------------+ | PHP Version 6 | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | | available through the world-wide-web at the following url: | | http://www.php.net/license/3_01.txt | | If you did not receive a copy of the PHP license and are unable to | | obtain it through the world-wide-web, please send a note to | | [EMAIL PROTECTED] so we can mail you a copy immediately. | +----------------------------------------------------------------------+ | Authors: Sara Golemon ([EMAIL PROTECTED]) | +----------------------------------------------------------------------+ */ /* $Id: unicode_filter.c,v 1.1 2006/03/29 01:20:43 pollita Exp $ */ #include "php.h" #include <unicode/ucnv.h> /* {{{ data structure */ typedef struct _php_unicode_filter_data { char is_persistent; UConverter *conv; char to_unicode; } php_unicode_filter_data; /* }}} */ /* {{{ unicode.* filter implementation */ /* unicode.to.* -- Expects String -- Returns Unicode */ static php_stream_filter_status_t php_unicode_to_string_filter( php_stream *stream, php_stream_filter *thisfilter, php_stream_bucket_brigade *buckets_in, php_stream_bucket_brigade *buckets_out, size_t *bytes_consumed, int flags TSRMLS_DC) { php_unicode_filter_data *data; php_stream_filter_status_t exit_status = PSFS_FEED_ME; size_t consumed = 0; if (!thisfilter || !thisfilter->abstract) { /* Should never happen */ return PSFS_ERR_FATAL; } data = (php_unicode_filter_data *)(thisfilter->abstract); while (buckets_in->head) { php_stream_bucket *bucket = buckets_in->head; UChar *src = bucket->buf.u; php_stream_bucket_unlink(bucket TSRMLS_CC); if (!bucket->buf_type == IS_UNICODE) { /* Already ASCII, can't really do anything with it */ consumed += bucket->buflen; php_stream_bucket_append(buckets_out, bucket TSRMLS_CC); exit_status = PSFS_PASS_ON; continue; } while (src < (bucket->buf.u + bucket->buflen)) { int remaining = bucket->buflen - (src - bucket->buf.u); char *destp, *destbuf; int32_t destlen = UCNV_GET_MAX_BYTES_FOR_STRING(remaining, ucnv_getMaxCharSize(data->conv)); UErrorCode errCode = U_ZERO_ERROR; php_stream_bucket *new_bucket; destp = destbuf = (char *)pemalloc(destlen, data->is_persistent); ucnv_fromUnicode(data->conv, &destp, destbuf + destlen, (const UChar**)&src, src + remaining, NULL, FALSE, &errCode); /* UTODO: Error catching */ new_bucket = php_stream_bucket_new(stream, destbuf, destp - destbuf, 1, data->is_persistent TSRMLS_CC); php_stream_bucket_append(buckets_out, new_bucket TSRMLS_CC); exit_status = PSFS_PASS_ON; } consumed += UBYTES(bucket->buflen); php_stream_bucket_delref(bucket TSRMLS_CC); } if (flags & PSFS_FLAG_FLUSH_CLOSE) { UErrorCode errCode = U_ZERO_ERROR; char d[64], *dest = d, *destp = d + 64; /* Spit it out! */ ucnv_fromUnicode(data->conv, &dest, destp, NULL, NULL, NULL, TRUE, &errCode); /* UTODO: Error catching */ if (dest > d) { php_stream_bucket *bucket = php_stream_bucket_new(stream, d, dest - d, 0, 0 TSRMLS_CC); php_stream_bucket_append(buckets_out, bucket TSRMLS_CC); exit_status = PSFS_PASS_ON; } } if (bytes_consumed) { *bytes_consumed = consumed; } return exit_status; } /* unicode.from.* -- Expects Unicode -- Returns String */ static php_stream_filter_status_t php_unicode_from_string_filter( php_stream *stream, php_stream_filter *thisfilter, php_stream_bucket_brigade *buckets_in, php_stream_bucket_brigade *buckets_out, size_t *bytes_consumed, int flags TSRMLS_DC) { php_unicode_filter_data *data; php_stream_filter_status_t exit_status = PSFS_FEED_ME; size_t consumed = 0; if (!thisfilter || !thisfilter->abstract) { /* Should never happen */ return PSFS_ERR_FATAL; } data = (php_unicode_filter_data *)(thisfilter->abstract); while (buckets_in->head) { php_stream_bucket *bucket = buckets_in->head; char *src = bucket->buf.s; php_stream_bucket_unlink(bucket TSRMLS_CC); if (bucket->buf_type == IS_UNICODE) { /* already in unicode, nothing to do */ consumed += UBYTES(bucket->buflen); php_stream_bucket_append(buckets_out, bucket TSRMLS_CC); exit_status = PSFS_PASS_ON; continue; } while (src < (bucket->buf.s + bucket->buflen)) { int remaining = bucket->buflen - (src - bucket->buf.s); UChar *destp, *destbuf; int32_t destlen = UCNV_GET_MAX_BYTES_FOR_STRING(remaining, ucnv_getMaxCharSize(data->conv)); UErrorCode errCode = U_ZERO_ERROR; php_stream_bucket *new_bucket; destp = destbuf = (UChar *)pemalloc(destlen, data->is_persistent); ucnv_toUnicode(data->conv, &destp, (UChar*)((char*)destbuf + destlen), (const char**)&src, src + remaining, NULL, FALSE, &errCode); /* UTODO: Error catching */ new_bucket = php_stream_bucket_new_unicode(stream, destbuf, destp - destbuf, 1, data->is_persistent TSRMLS_CC); php_stream_bucket_append(buckets_out, new_bucket TSRMLS_CC); exit_status = PSFS_PASS_ON; } consumed += bucket->buflen; php_stream_bucket_delref(bucket TSRMLS_CC); } if (flags & PSFS_FLAG_FLUSH_CLOSE) { UErrorCode errCode = U_ZERO_ERROR; UChar d[64], *dest = d, *destp = d + 64; /* Spit it out! */ ucnv_toUnicode(data->conv, &dest, destp, NULL, NULL, NULL, TRUE, &errCode); /* UTODO: Error catching */ if (dest > d) { php_stream_bucket *bucket = php_stream_bucket_new_unicode(stream, d, dest - d, 0, 0 TSRMLS_CC); php_stream_bucket_append(buckets_out, bucket TSRMLS_CC); exit_status = PSFS_PASS_ON; } } if (bytes_consumed) { *bytes_consumed = consumed; } return exit_status; } /* unicode.tidy.* -- Expects anything -- Returns whatever is preferred by subsequent filters Can be used to "magically" fix-up bucket messes */ static php_stream_filter_status_t php_unicode_tidy_filter( php_stream *stream, php_stream_filter *thisfilter, php_stream_bucket_brigade *buckets_in, php_stream_bucket_brigade *buckets_out, size_t *bytes_consumed, int flags TSRMLS_DC) { php_unicode_filter_data *data; int prefer_unicode; if (!thisfilter || !thisfilter->abstract) { /* Should never happen */ return PSFS_ERR_FATAL; } prefer_unicode = php_stream_filter_output_prefer_unicode(thisfilter); data = (php_unicode_filter_data *)(thisfilter->abstract); if (prefer_unicode) { if (!data->to_unicode) { ucnv_resetToUnicode(data->conv); data->to_unicode = prefer_unicode; } return php_unicode_from_string_filter(stream, thisfilter, buckets_in, buckets_out, bytes_consumed, flags TSRMLS_CC); } else { if (data->to_unicode) { ucnv_resetFromUnicode(data->conv); data->to_unicode = prefer_unicode; } return php_unicode_to_string_filter(stream, thisfilter, buckets_in, buckets_out, bytes_consumed, flags TSRMLS_CC); } } static void php_unicode_filter_dtor(php_stream_filter *thisfilter TSRMLS_DC) { if (thisfilter && thisfilter->abstract) { php_unicode_filter_data *data = (php_unicode_filter_data *)thisfilter->abstract; ucnv_close(data->conv); pefree(data, data->is_persistent); } } php_stream_filter_ops php_unicode_to_string_filter_ops = { php_unicode_to_string_filter, php_unicode_filter_dtor, "unicode.to.*", PSFO_FLAG_ACCEPTS_UNICODE | PSFO_FLAG_OUTPUTS_STRING }; php_stream_filter_ops php_unicode_from_string_filter_ops = { php_unicode_from_string_filter, php_unicode_filter_dtor, "unicode.from.*", PSFO_FLAG_ACCEPTS_STRING | PSFO_FLAG_OUTPUTS_UNICODE }; php_stream_filter_ops php_unicode_tidy_filter_ops = { php_unicode_tidy_filter, php_unicode_filter_dtor, "unicode.tidy.*", PSFO_FLAG_ACCEPTS_ANY | PSFO_FLAG_OUTPUTS_ANY }; /* }}} */ /* {{{ unicode.* factory */ static php_stream_filter *php_unicode_filter_create(const char *filtername, zval *filterparams, int persistent TSRMLS_DC) { php_unicode_filter_data *data; const char *charset, *direction; php_stream_filter_ops *fops; UErrorCode ucnvError = U_ZERO_ERROR; /* Note: from_error_mode means from unicode to charset. from filter means from charset to unicode */ uint16_t err_mode = UG(from_error_mode); char to_unicode = 0; zval **tmpzval; if (strncasecmp(filtername, "unicode.", sizeof("unicode.") - 1)) { /* Never happens */ return NULL; } direction = filtername + sizeof("unicode.") - 1; if (strncmp(direction, "to.", sizeof("to.") - 1) == 0) { fops = &php_unicode_to_string_filter_ops; charset = direction + sizeof("to.") - 1; } else if (strncmp(direction, "from.", sizeof("from.") - 1) == 0) { fops = &php_unicode_from_string_filter_ops; charset = direction + sizeof("from.") - 1; to_unicode = 1; err_mode = UG(to_error_mode); } else if (strncmp(direction, "tidy.", sizeof("tidy.") - 1) == 0) { fops = &php_unicode_tidy_filter_ops; charset = direction + sizeof("tidy.") - 1; } else if (strcmp(direction, "tidy") == 0) { fops = &php_unicode_tidy_filter_ops; charset = "utf8"; } else { /* Shouldn't happen */ return NULL; } /* Create this filter */ data = (php_unicode_filter_data *)pecalloc(1, sizeof(php_unicode_filter_data), persistent); if (!data) { php_error_docref(NULL TSRMLS_CC, E_ERROR, "Failed allocating %d bytes.", sizeof(php_unicode_filter_data)); return NULL; } data->conv = ucnv_open(charset, &ucnvError); data->to_unicode = to_unicode; if (!data->conv) { char *reason = "Unknown Error"; pefree(data, persistent); switch (ucnvError) { case U_MEMORY_ALLOCATION_ERROR: reason = "unable to allocate memory"; break; case U_FILE_ACCESS_ERROR: reason = "file access error"; break; default: ; } php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to open charset converter, %s", reason); return NULL; } if (filterparams && Z_TYPE_P(filterparams) == IS_ARRAY && zend_hash_find(Z_ARRVAL_P(filterparams), "error_mode", sizeof("error_mode"), (void**)&tmpzval) == SUCCESS && tmpzval && *tmpzval) { if (Z_TYPE_PP(tmpzval) == IS_LONG) { err_mode = Z_LVAL_PP(tmpzval); } else { zval copyval = **tmpzval; zval_copy_ctor(©val); convert_to_long(©val); err_mode = Z_LVAL(copyval); } } zend_set_converter_error_mode(data->conv, to_unicode ? ZEND_TO_UNICODE : ZEND_FROM_UNICODE, err_mode); if (!to_unicode) { UChar *freeme = NULL; UChar *subst_char = UG(from_subst_char); if (filterparams && Z_TYPE_P(filterparams) == IS_ARRAY && zend_hash_find(Z_ARRVAL_P(filterparams), "subst_char", sizeof("subst_char"), (void**)&tmpzval) == SUCCESS && tmpzval && *tmpzval) { if (Z_TYPE_PP(tmpzval) == IS_UNICODE) { subst_char = Z_USTRVAL_PP(tmpzval); } else { zval copyval = **tmpzval; zval_copy_ctor(©val); convert_to_unicode(©val); subst_char = freeme = Z_USTRVAL(copyval); } } zend_set_converter_subst_char(data->conv, subst_char); if (freeme) { efree(freeme); } } return php_stream_filter_alloc(fops, data, persistent); } php_stream_filter_factory php_unicode_filter_factory = { php_unicode_filter_create }; /* }}} */ /* * Local variables: * tab-width: 4 * c-basic-offset: 4 * End: * vim600: sw=4 ts=4 fdm=marker * vim<600: sw=4 ts=4 */
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php