pollita         Wed Mar 29 01:20:43 2006 UTC

  Added files:                 
    /php-src/main/streams       unicode_filter.c 

  Removed files:               
    /php-src/ext/unicode        unicode_filter.c 

  Modified files:              
    /php-src/ext/standard       basic_functions.c file.c streamsfuncs.c 
                                streamsfuncs.h 
    /php-src/ext/unicode        config.m4 config.w32 php_unicode.h unicode.c 
    /php-src/main       main.c php_streams.h 
    /php-src/main/streams       filter.c php_stream_filter_api.h streams.c 
    /php-src/win32/build        config.w32 
  Log:
  Another (and hopefully last) major streams commit.
  This moves unicode conversion to the filter layer
  (rather than at the lower streams layer)
  unicode_filter.c has been moved from ext/unicode to main/streams
  as it's an integral part of the streams unicode conversion process.
  
  There are now three ways to set encoding on a stream:
  
  (1) By context
  $ctx = stream_context_create(NULL,array('encoding'=>'latin1'));
  $fp = fopen('somefile', 'r+t', false, $ctx);
  
  (2) By stream_encoding()
  $fp = fopen('somefile', 'r+');
  stream_encoding($fp, 'latin1');
  
  (3) By filter
  $fp = fopen('somefile', 'r+');
  stream_filter_append($fp, 'unicode.from.latin1', STREAM_FILTER_READ);
  stream_filter_append($fp, 'unicode.to.latin1', STREAM_FILTER_WRITE);
  
  Note: Methods 1 and 2 are convenience wrappers around method 3.
  
  
http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/basic_functions.c?r1=1.766&r2=1.767&diff_format=u
Index: php-src/ext/standard/basic_functions.c
diff -u php-src/ext/standard/basic_functions.c:1.766 
php-src/ext/standard/basic_functions.c:1.767
--- php-src/ext/standard/basic_functions.c:1.766        Wed Mar 22 10:20:20 2006
+++ php-src/ext/standard/basic_functions.c      Wed Mar 29 01:20:42 2006
@@ -17,7 +17,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: basic_functions.c,v 1.766 2006/03/22 10:20:20 derick Exp $ */
+/* $Id: basic_functions.c,v 1.767 2006/03/29 01:20:42 pollita Exp $ */
 
 #include "php.h"
 #include "php_streams.h"
@@ -589,6 +589,7 @@
        PHP_FE(stream_filter_prepend,                                           
                                        NULL)
        PHP_FE(stream_filter_append,                                            
                                        NULL)
        PHP_FE(stream_filter_remove,                                            
                                        NULL)
+       PHP_FE(stream_encoding,                                                 
                                                NULL)
        PHP_FE(stream_socket_client,                             
second_and_third_args_force_ref)
        PHP_FE(stream_socket_server,                             
second_and_third_args_force_ref)
        PHP_FE(stream_socket_accept,                                            
   third_arg_force_ref)
http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/file.c?r1=1.430&r2=1.431&diff_format=u
Index: php-src/ext/standard/file.c
diff -u php-src/ext/standard/file.c:1.430 php-src/ext/standard/file.c:1.431
--- php-src/ext/standard/file.c:1.430   Mon Mar 27 23:41:05 2006
+++ php-src/ext/standard/file.c Wed Mar 29 01:20:42 2006
@@ -21,7 +21,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: file.c,v 1.430 2006/03/27 23:41:05 iliaa Exp $ */
+/* $Id: file.c,v 1.431 2006/03/29 01:20:42 pollita Exp $ */
 
 /* Synced with php 3.0 revision 1.218 1999-06-16 [ssb] */
 
@@ -1008,14 +1008,14 @@
 
        php_stream_from_zval(stream, &zstream);
 
-       buf.v = php_stream_get_line_ex(stream, php_stream_reads_unicode(stream) 
? IS_UNICODE : IS_STRING, NULL_ZSTR, 0, length, &retlen);
+       buf.v = php_stream_get_line_ex(stream, stream->readbuf_type, NULL_ZSTR, 
0, length, &retlen);
        if (!buf.v) {
                RETURN_FALSE;
        }
 
-       if (php_stream_reads_unicode(stream)) {
+       if (stream->readbuf_type == IS_UNICODE) {
                RETURN_UNICODEL(buf.u, retlen, 0);
-       } else {
+       } else { /* IS_STRING */
                RETURN_STRINGL(buf.s, retlen, 0);
        }
 }
@@ -1034,7 +1034,7 @@
 
        PHP_STREAM_TO_ZVAL(stream, arg1);
 
-       if (php_stream_reads_unicode(stream)) {
+       if (stream->readbuf_type == IS_UNICODE) {
                int buflen = 1;
                UChar *buf = php_stream_read_unicode_chars(stream, &buflen);
 
@@ -1042,7 +1042,7 @@
                        RETURN_FALSE;
                }
                RETURN_UNICODEL(buf, buflen, 0);
-       } else {
+       } else { /* IS_STRING */
                char buf[2];
 
                buf[0] = php_stream_getc(stream);
@@ -1068,7 +1068,7 @@
 
        php_stream_from_zval(stream, &zstream);
 
-       if (php_stream_reads_unicode(stream)) {
+       if (stream->readbuf_type == IS_UNICODE) {
                UChar *buf = php_stream_get_line_ex(stream, IS_UNICODE, 
NULL_ZSTR, 0, length, &retlen);
                UChar *allowed = NULL;
                int allowed_len = 0;
@@ -1085,7 +1085,7 @@
                retlen = php_u_strip_tags(buf, retlen, &stream->fgetss_state, 
allowed, allowed_len TSRMLS_CC);
 
                RETURN_UNICODEL(buf, retlen, 0);
-       } else {
+       } else { /* IS_STRING */
                char *buf = php_stream_get_line_ex(stream, IS_STRING, 
NULL_ZSTR, 0, length, &retlen);
                char *allowed = NULL;
                int allowed_len = 0;
@@ -1752,7 +1752,7 @@
                RETURN_FALSE;
        }
 
-       if (php_stream_reads_unicode(stream)) {
+       if (stream->readbuf_type == IS_UNICODE) {
                int buflen = len;
                UChar *buf = php_stream_read_unicode_chars(stream, &buflen);
 
@@ -1761,7 +1761,7 @@
                }
 
                RETURN_UNICODEL(buf, buflen, 0);
-       } else {
+       } else { /* IS_STRING */
                char *buf = emalloc(len + 1);
                int buflen = php_stream_read(stream, buf, len);
 
http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/streamsfuncs.c?r1=1.72&r2=1.73&diff_format=u
Index: php-src/ext/standard/streamsfuncs.c
diff -u php-src/ext/standard/streamsfuncs.c:1.72 
php-src/ext/standard/streamsfuncs.c:1.73
--- php-src/ext/standard/streamsfuncs.c:1.72    Sun Mar 26 04:40:11 2006
+++ php-src/ext/standard/streamsfuncs.c Wed Mar 29 01:20:42 2006
@@ -17,7 +17,7 @@
   +----------------------------------------------------------------------+
 */
 
-/* $Id: streamsfuncs.c,v 1.72 2006/03/26 04:40:11 pollita Exp $ */
+/* $Id: streamsfuncs.c,v 1.73 2006/03/29 01:20:42 pollita Exp $ */
 
 #include "php.h"
 #include "php_globals.h"
@@ -489,11 +489,11 @@
                add_assoc_zval(return_value, "write_filters", newval);
        }
        
-       if (php_stream_reads_unicode(stream)) {
+       if (stream->readbuf_type == IS_UNICODE) {
                int readbuf_len = u_countChar32(stream->readbuf.u + 
stream->readpos, stream->writepos - stream->readpos);
                add_assoc_long(return_value, "unread_bytes", 
UBYTES(stream->writepos - stream->readpos));
                add_assoc_long(return_value, "unread_chars", readbuf_len);
-       } else {
+       } else { /* IS_STRING */
                add_assoc_long(return_value, "unread_bytes", stream->writepos - 
stream->readpos);
                add_assoc_long(return_value, "unread_chars", stream->writepos - 
stream->readpos);
        }
@@ -1275,7 +1275,7 @@
 
        php_stream_from_zval(stream, &zstream);
 
-       if (php_stream_reads_unicode(stream)) {
+       if (stream->readbuf_type == IS_UNICODE) {
                UChar *buf;
                UChar *d = NULL;
                int dlen = 0;
@@ -1294,7 +1294,7 @@
                }
 
                RETURN_UNICODEL(buf, buf_size, 0);
-       } else {
+       } else { /* IS_STRING */
                char *buf;
                char *d = NULL;
                int dlen = 0;
@@ -1462,6 +1462,67 @@
 }
 /* }}} */
 
+/* {{{ proto void stream_encoding(resource stream[, string encoding])
+Set character set for stream encoding
+UTODO: Return current encoding charset
+*/
+PHP_FUNCTION(stream_encoding)
+{
+       zval *zstream;
+       php_stream *stream;
+       char *encoding = NULL;
+       int encoding_len = 0;
+       int remove_read_tail = 0, remove_write_tail = 0;
+
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "r|s", &zstream, 
&encoding, &encoding_len) == FAILURE) {
+               return;
+       }
+
+       php_stream_from_zval(stream, &zstream);
+
+       /* Double check that the target encoding is legal before attempting 
anything */
+
+       if (stream->readfilters.tail) {
+               if (stream->readfilters.tail->fops == 
&php_unicode_from_string_filter_ops) {
+                       /* Remove the current unicode.from.* filter, 
+               the filter layer will transcode anything in the read buffer 
back to binary 
+               or invalidate the read buffer */
+                       remove_read_tail = 1;
+               } else if (stream->readbuf_type == IS_UNICODE) {
+                       /* There's an encoding on the stream already, but then 
there's filtering happening after that point
+                          It's asking too much for PHP to figure out what the 
user wants, throw an error back in their face */
+                       php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot 
change encoding on filtered stream");
+                       RETURN_FALSE;
+               }
+       }
+
+       if (stream->writefilters.tail) {
+               if (stream->writefilters.tail->fops == 
&php_unicode_to_string_filter_ops) {
+                       /* Remove the current unicode.to.* filter */
+                       remove_write_tail = 1;
+               } else if ((stream->writefilters.tail->fops->flags & 
PSFO_FLAG_OUTPUTS_UNICODE) == 0) {
+                       /* conversion to binary is happening, them another 
filter is doing something
+                          bailout for same reason as read filters */
+                       php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot 
change encoding on filtered stream");
+                       RETURN_FALSE;
+               }
+       }
+
+       if (remove_read_tail) {
+               php_stream_filter_remove(stream->readfilters.tail, 1 TSRMLS_CC);
+       }
+       if (remove_write_tail) {
+               php_stream_filter_remove(stream->writefilters.tail, 1 
TSRMLS_CC);
+       }
+
+       /* UTODO: Allow overriding error handling for converters */
+       php_stream_encoding_apply(stream, 1, encoding, UG(from_error_mode), 
UG(from_subst_char));
+       php_stream_encoding_apply(stream, 0, encoding, UG(to_error_mode), NULL);
+
+       RETURN_TRUE;
+}
+/* }}} */
+
 /*
  * Local variables:
  * tab-width: 4
http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/streamsfuncs.h?r1=1.14&r2=1.15&diff_format=u
Index: php-src/ext/standard/streamsfuncs.h
diff -u php-src/ext/standard/streamsfuncs.h:1.14 
php-src/ext/standard/streamsfuncs.h:1.15
--- php-src/ext/standard/streamsfuncs.h:1.14    Sun Jan  1 13:09:55 2006
+++ php-src/ext/standard/streamsfuncs.h Wed Mar 29 01:20:42 2006
@@ -16,7 +16,7 @@
   +----------------------------------------------------------------------+
 */
 
-/* $Id: streamsfuncs.h,v 1.14 2006/01/01 13:09:55 sniper Exp $ */
+/* $Id: streamsfuncs.h,v 1.15 2006/03/29 01:20:42 pollita Exp $ */
 
 /* Flags for stream_socket_client */
 #define PHP_STREAM_CLIENT_PERSISTENT   1
@@ -53,6 +53,7 @@
 PHP_FUNCTION(stream_filter_prepend);
 PHP_FUNCTION(stream_filter_append);
 PHP_FUNCTION(stream_filter_remove);
+PHP_FUNCTION(stream_encoding);
 PHP_FUNCTION(stream_socket_enable_crypto);
 PHP_FUNCTION(stream_socket_pair);
 
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/config.m4?r1=1.6&r2=1.7&diff_format=u
Index: php-src/ext/unicode/config.m4
diff -u php-src/ext/unicode/config.m4:1.6 php-src/ext/unicode/config.m4:1.7
--- php-src/ext/unicode/config.m4:1.6   Sun Mar 26 11:06:24 2006
+++ php-src/ext/unicode/config.m4       Wed Mar 29 01:20:43 2006
@@ -1,7 +1,7 @@
 dnl
-dnl $Id: config.m4,v 1.6 2006/03/26 11:06:24 derick Exp $
+dnl $Id: config.m4,v 1.7 2006/03/29 01:20:43 pollita Exp $
 dnl
 
 PHP_SUBST(UNICODE_SHARED_LIBADD)
 AC_DEFINE(HAVE_UNICODE, 1, [ ])
-PHP_NEW_EXTENSION(unicode, unicode.c locale.c unicode_filter.c 
unicode_iterators.c collator.c, $ext_shared)
+PHP_NEW_EXTENSION(unicode, unicode.c locale.c unicode_iterators.c collator.c, 
$ext_shared)
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/config.w32?r1=1.6&r2=1.7&diff_format=u
Index: php-src/ext/unicode/config.w32
diff -u php-src/ext/unicode/config.w32:1.6 php-src/ext/unicode/config.w32:1.7
--- php-src/ext/unicode/config.w32:1.6  Sun Mar 26 11:06:24 2006
+++ php-src/ext/unicode/config.w32      Wed Mar 29 01:20:43 2006
@@ -1,5 +1,5 @@
-// $Id: config.w32,v 1.6 2006/03/26 11:06:24 derick Exp $
+// $Id: config.w32,v 1.7 2006/03/29 01:20:43 pollita Exp $
 // vim:ft=javascript
 
-EXTENSION("unicode", "unicode.c unicode_filter.c unicode_iterators.c 
collator.c locale.c");
+EXTENSION("unicode", "unicode.c unicode_iterators.c collator.c locale.c");
 AC_DEFINE('HAVE_UNICODE', 1, 'ICU API extension');
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/php_unicode.h?r1=1.6&r2=1.7&diff_format=u
Index: php-src/ext/unicode/php_unicode.h
diff -u php-src/ext/unicode/php_unicode.h:1.6 
php-src/ext/unicode/php_unicode.h:1.7
--- php-src/ext/unicode/php_unicode.h:1.6       Sun Mar 26 11:06:24 2006
+++ php-src/ext/unicode/php_unicode.h   Wed Mar 29 01:20:43 2006
@@ -14,7 +14,7 @@
    +----------------------------------------------------------------------+
 */
 
-/* $Id: php_unicode.h,v 1.6 2006/03/26 11:06:24 derick Exp $ */ 
+/* $Id: php_unicode.h,v 1.7 2006/03/29 01:20:43 pollita Exp $ */ 
 
 #ifndef PHP_UNICODE_H
 #define PHP_UNICODE_H
@@ -67,7 +67,6 @@
 PHP_METHOD(collator, __construct);
 
 void php_init_collation(TSRMLS_D);
-extern php_stream_filter_factory php_unicode_filter_factory;
 
 #ifdef  __cplusplus
 } // extern "C" 
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode.c?r1=1.18&r2=1.19&diff_format=u
Index: php-src/ext/unicode/unicode.c
diff -u php-src/ext/unicode/unicode.c:1.18 php-src/ext/unicode/unicode.c:1.19
--- php-src/ext/unicode/unicode.c:1.18  Mon Mar 27 03:19:30 2006
+++ php-src/ext/unicode/unicode.c       Wed Mar 29 01:20:43 2006
@@ -15,7 +15,7 @@
   +----------------------------------------------------------------------+
 */
 
-/* $Id: unicode.c,v 1.18 2006/03/27 03:19:30 andrei Exp $ */ 
+/* $Id: unicode.c,v 1.19 2006/03/29 01:20:43 pollita Exp $ */ 
 
 #include "php_unicode.h"
 #if HAVE_UNICODE
@@ -273,10 +273,6 @@
 /* {{{ PHP_MINIT_FUNCTION */
 PHP_MINIT_FUNCTION(unicode)
 {
-       if (php_stream_filter_register_factory("unicode.*", 
&php_unicode_filter_factory TSRMLS_CC) == FAILURE) {
-               return FAILURE;
-       }
-
        php_register_unicode_iterators(TSRMLS_C);
        php_init_collation(TSRMLS_C);
        
@@ -287,9 +283,6 @@
 /* {{{ PHP_MSHUTDOWN_FUNCTION */
 PHP_MSHUTDOWN_FUNCTION(unicode)
 {
-       if (php_stream_filter_unregister_factory("unicode.*" TSRMLS_CC) == 
FAILURE) {
-               return FAILURE;
-       }
        /* add your stuff here */
 
   
http://cvs.php.net/viewcvs.cgi/php-src/main/main.c?r1=1.683&r2=1.684&diff_format=u
Index: php-src/main/main.c
diff -u php-src/main/main.c:1.683 php-src/main/main.c:1.684
--- php-src/main/main.c:1.683   Sun Mar 26 06:19:24 2006
+++ php-src/main/main.c Wed Mar 29 01:20:43 2006
@@ -18,7 +18,7 @@
    +----------------------------------------------------------------------+
 */
 
-/* $Id: main.c,v 1.683 2006/03/26 06:19:24 andrei Exp $ */
+/* $Id: main.c,v 1.684 2006/03/29 01:20:43 pollita Exp $ */
 
 /* {{{ includes
  */
@@ -1611,6 +1611,12 @@
                return FAILURE;
        }
 
+       /* Initialize unicode filters */
+       if (php_stream_filter_register_factory("unicode.*", 
&php_unicode_filter_factory TSRMLS_CC) == FAILURE) {
+               php_printf("PHP:  Unable to initialize unicode stream 
filters.\n");
+               return FAILURE;
+       }
+
        /* initialize registry for images to be used in phpinfo()
           (this uses configuration parameters from php.ini)
         */
@@ -1744,6 +1750,7 @@
 
        zend_shutdown(TSRMLS_C);
 
+       /* Destroys filter & transport registries too */
        php_shutdown_stream_wrappers(module_number TSRMLS_CC);
 
        php_shutdown_info_logos();
http://cvs.php.net/viewcvs.cgi/php-src/main/php_streams.h?r1=1.108&r2=1.109&diff_format=u
Index: php-src/main/php_streams.h
diff -u php-src/main/php_streams.h:1.108 php-src/main/php_streams.h:1.109
--- php-src/main/php_streams.h:1.108    Fri Mar 24 19:22:24 2006
+++ php-src/main/php_streams.h  Wed Mar 29 01:20:43 2006
@@ -16,7 +16,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: php_streams.h,v 1.108 2006/03/24 19:22:24 pollita Exp $ */
+/* $Id: php_streams.h,v 1.109 2006/03/29 01:20:43 pollita Exp $ */
 
 #ifndef PHP_STREAMS_H
 #define PHP_STREAMS_H
@@ -206,12 +206,9 @@
        php_stream_context *context;
        int flags;      /* PHP_STREAM_FLAG_XXX */
 
-       /* unicode */
-       UConverter *input_encoding;
-       UConverter *output_encoding;
-
        /* buffer */
        off_t position; /* of underlying stream */
+       zend_uchar readbuf_type;
        zstr readbuf; /* readbuf.s or readbuf.u */
        size_t readbuflen; /* Length in units (char or UChar) */
        off_t readpos; /* Position in units (char or UChar) */
@@ -252,8 +249,6 @@
 #define php_stream_from_zval_no_verify(xstr, ppzval)   (xstr) = 
(php_stream*)zend_fetch_resource((ppzval) TSRMLS_CC, -1, "stream", NULL, 2, 
php_file_le_stream(), php_file_le_pstream())
 
 #define PS_ULEN(is_unicode, len)       ((is_unicode) ? UBYTES(len) : (len))
-#define php_stream_reads_unicode(stream)       ((stream->input_encoding) ? 1 : 
0)
-#define php_stream_writes_unicode(stream)      ((stream->output_encoding) ? 1 
: 0)
 
 BEGIN_EXTERN_C()
 PHPAPI int php_stream_from_persistent_id(const char *persistent_id, php_stream 
**stream TSRMLS_DC);
http://cvs.php.net/viewcvs.cgi/php-src/main/streams/filter.c?r1=1.27&r2=1.28&diff_format=u
Index: php-src/main/streams/filter.c
diff -u php-src/main/streams/filter.c:1.27 php-src/main/streams/filter.c:1.28
--- php-src/main/streams/filter.c:1.27  Sun Mar 26 06:19:24 2006
+++ php-src/main/streams/filter.c       Wed Mar 29 01:20:43 2006
@@ -16,7 +16,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: filter.c,v 1.27 2006/03/26 06:19:24 andrei Exp $ */
+/* $Id: filter.c,v 1.28 2006/03/29 01:20:43 pollita Exp $ */
 
 #include "php.h"
 #include "php_globals.h"
@@ -396,50 +396,63 @@
        chain->tail = filter;
        filter->chain = chain;
 
-       if (&(stream->readfilters) == chain && (stream->writepos - 
stream->readpos) > 0) {
+       if (&(stream->readfilters) == chain) {
                /* Let's going ahead and wind anything in the buffer through 
this filter */
                php_stream_bucket_brigade brig_in = { NULL, NULL }, brig_out = 
{ NULL, NULL };
                php_stream_bucket_brigade *brig_inp = &brig_in, *brig_outp = 
&brig_out;
-               php_stream_filter_status_t status;
+               php_stream_filter_status_t status = PSFS_FEED_ME;
                php_stream_bucket *bucket;
                size_t consumed = 0;
 
-               if (stream->input_encoding) {
-                       bucket = php_stream_bucket_new_unicode(stream, 
stream->readbuf.u + stream->readpos, stream->writepos - stream->readpos, 0, 0 
TSRMLS_CC);
-               } else {
-                       bucket = php_stream_bucket_new(stream, 
stream->readbuf.s + stream->readpos, stream->writepos - stream->readpos, 0, 0 
TSRMLS_CC);
+               if ((stream->writepos - stream->readpos) > 0) {
+                       if (stream->readbuf_type == IS_UNICODE) {
+                               bucket = php_stream_bucket_new_unicode(stream, 
stream->readbuf.u + stream->readpos, stream->writepos - stream->readpos, 0, 0 
TSRMLS_CC);
+                       } else {
+                               bucket = php_stream_bucket_new(stream, 
stream->readbuf.s + stream->readpos, stream->writepos - stream->readpos, 0, 0 
TSRMLS_CC);
+                       }
+                       php_stream_bucket_append(brig_inp, bucket TSRMLS_CC);
+                       status = filter->fops->filter(stream, filter, brig_inp, 
brig_outp, &consumed, PSFS_FLAG_NORMAL TSRMLS_CC);
+
+                       if (stream->readpos + consumed > stream->writepos || 
consumed < 0) {
+                               /* No behaving filter should cause this. */
+                               status = PSFS_ERR_FATAL;
+                       }
                }
-               php_stream_bucket_append(brig_inp, bucket TSRMLS_CC);
-               status = filter->fops->filter(stream, filter, brig_inp, 
brig_outp, &consumed, PSFS_FLAG_NORMAL TSRMLS_CC);
 
-               if (stream->readpos + consumed > stream->writepos || consumed < 
0) {
-                       /* No behaving filter should cause this. */
-                       status = PSFS_ERR_FATAL;
-               }
-
-               switch (status) {
-                       case PSFS_ERR_FATAL:
-                               /* If this first cycle simply fails then 
there's something wrong with the filter.
-                                  Pull the filter off the chain and leave the 
read buffer alone. */
-                               if (chain->head == filter) {
-                                       chain->head = NULL;
-                                       chain->tail = NULL;
-                               } else {
-                                       filter->prev->next = NULL;
-                                       chain->tail = filter->prev;
-                               }
-                               php_stream_bucket_unlink(bucket TSRMLS_CC);
-                               php_stream_bucket_delref(bucket TSRMLS_CC);
-                               php_error_docref(NULL TSRMLS_CC, E_WARNING, 
"Filter failed to process pre-buffered data.  Not adding to filterchain.");
-                               break;
-                       case PSFS_FEED_ME:
+               if (status == PSFS_ERR_FATAL) {
+                       /* If this first cycle simply fails then there's 
something wrong with the filter.
+                          Pull the filter off the chain and leave the read 
buffer alone. */
+                       if (chain->head == filter) {
+                               chain->head = NULL;
+                               chain->tail = NULL;
+                       } else {
+                               filter->prev->next = NULL;
+                               chain->tail = filter->prev;
+                       }
+                       php_stream_bucket_unlink(bucket TSRMLS_CC);
+                       php_stream_bucket_delref(bucket TSRMLS_CC);
+                       php_error_docref(NULL TSRMLS_CC, E_WARNING, "Filter 
failed to process pre-buffered data.  Not adding to filterchain.");
+               } else {
+                       /* This filter addition may change the readbuffer type.
+                          Since all the previously held data is in the bucket 
brigade,
+                          we can reappropriate the buffer that already exists 
(if one does) */
+                       if (stream->readbuf_type == IS_UNICODE && 
(filter->fops->flags & PSFO_FLAG_OUTPUTS_UNICODE) == 0) {
+                               /* Buffer is currently based on unicode 
characters, but filter only outputs STRING adjust counting */
+                               stream->readbuf_type = IS_STRING;
+                               stream->readbuflen *= UBYTES(1);
+                       } else if (stream->readbuf_type == IS_STRING && 
(filter->fops->flags & PSFO_FLAG_OUTPUTS_STRING) == 0) {
+                               /* Buffer is currently based on binary 
characters, but filter only outputs UNICODE adjust counting */
+                               stream->readbuf_type = IS_UNICODE;
+                               stream->readbuflen /= UBYTES(1);
+                       }
+
+                       if (status == PSFS_FEED_ME) {
                                /* We don't actually need data yet,
                                   leave this filter in a feed me state until 
data is needed. 
                                   Reset stream's internal read buffer since 
the filter is "holding" it. */
                                stream->readpos = 0;
                                stream->writepos = 0;
-                               break;
-                       case PSFS_PASS_ON:
+                       } else if (status == PSFS_PASS_ON) {
                                /* Put any filtered data onto the readbuffer 
stack.
                                   Previously read data has been at least 
partially consumed. */
                                stream->readpos += consumed;
@@ -454,23 +467,20 @@
                                        bucket = brig_outp->head;
 
                                        /* Convert for stream type */
-                                       if (bucket->buf_type != IS_UNICODE && 
stream->input_encoding) {
-                                               /* Stream expects unicode, 
convert using stream encoding */
-                                               
php_stream_bucket_convert(bucket, IS_UNICODE, stream->input_encoding);
-                                       } else if (bucket->buf_type == 
IS_UNICODE && !stream->input_encoding) {
-                                               /* Stream expects binary, 
filter provided unicode, just take the buffer as is */
-                                               
php_stream_bucket_convert_notranscode(bucket, IS_STRING);
+                                       if (bucket->buf_type != 
stream->readbuf_type) {
+                                               /* Stream expects different 
type than bucket contains, convert slopily */
+                                               
php_stream_bucket_convert_notranscode(bucket, stream->readbuf_type);
                                        }
 
                                        /* Grow buffer to hold this bucket if 
need be.
                                           TODO: See warning in 
main/stream/streams.c::php_stream_fill_read_buffer */
                                        if (stream->readbuflen - 
stream->writepos < bucket->buflen) {
                                                stream->readbuflen += 
bucket->buflen;
-                                               stream->readbuf.v = 
perealloc(stream->readbuf.v, PS_ULEN(stream->input_encoding, 
stream->readbuflen), stream->is_persistent);
+                                               stream->readbuf.v = 
perealloc(stream->readbuf.v, PS_ULEN(stream->readbuf_type == IS_UNICODE, 
stream->readbuflen), stream->is_persistent);
                                        }
 
                                        /* Append to readbuf */
-                                       if (stream->input_encoding) {
+                                       if (stream->readbuf_type == IS_UNICODE) 
{
                                                memcpy(stream->readbuf.u + 
stream->writepos, bucket->buf.u, UBYTES(bucket->buflen));
                                        } else {
                                                memcpy(stream->readbuf.s + 
stream->writepos, bucket->buf.s, bucket->buflen);
@@ -480,10 +490,9 @@
                                        php_stream_bucket_unlink(bucket 
TSRMLS_CC);
                                        php_stream_bucket_delref(bucket 
TSRMLS_CC);
                                }
-                               break;
+                       }
                }
-               
-       }
+       } /* end of readfilters specific code */
 }
 
 PHPAPI int _php_stream_filter_check_chain(php_stream_filter_chain *chain 
TSRMLS_DC)
@@ -597,26 +606,23 @@
                /* Dump any newly flushed data to the read buffer */
                if (stream->readpos > stream->chunk_size) {
                        /* Back the buffer up */
-                       memcpy(stream->readbuf.s, stream->readbuf.s + 
PS_ULEN(stream->input_encoding, stream->readpos), 
PS_ULEN(stream->input_encoding, stream->writepos - stream->readpos));
+                       memcpy(stream->readbuf.s, stream->readbuf.s + 
PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->readpos), 
PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->writepos - 
stream->readpos));
                        stream->writepos -= stream->readpos;
                        stream->readpos = 0;
                }
                if (flushed_size > (stream->readbuflen - stream->writepos)) {
                        /* Grow the buffer */
-                       stream->readbuf.v = perealloc(stream->readbuf.v, 
PS_ULEN(stream->input_encoding, stream->writepos + flushed_size + 
stream->chunk_size), stream->is_persistent);
+                       stream->readbuf.v = perealloc(stream->readbuf.v, 
PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->writepos + flushed_size + 
stream->chunk_size), stream->is_persistent);
                }
                while ((bucket = inp->head)) {
                        /* Convert if necessary */
-                       if (bucket->buf_type != IS_UNICODE && 
stream->input_encoding) {
-                               /* Stream expects unicode, convert using stream 
encoding */
-                               php_stream_bucket_convert(bucket, IS_UNICODE, 
stream->input_encoding);
-                       } else if (bucket->buf_type == IS_UNICODE && 
!stream->input_encoding) {
-                               /* Stream expects binary, filter provided 
unicode, just take the buffer as is */
-                               php_stream_bucket_convert_notranscode(bucket, 
IS_STRING);
+                       if (bucket->buf_type != stream->readbuf_type) {
+                               /* Stream expects different type than what's in 
bucket, convert slopily */
+                               php_stream_bucket_convert_notranscode(bucket, 
stream->readbuf_type);
                        }
 
                        /* Append to readbuf */
-                       if (stream->input_encoding) {
+                       if (stream->readbuf_type == IS_UNICODE) {
                                 memcpy(stream->readbuf.u + stream->writepos, 
bucket->buf.u, UBYTES(bucket->buflen));
                        } else {
                                 memcpy(stream->readbuf.s + stream->writepos, 
bucket->buf.s, bucket->buflen);
@@ -632,13 +638,8 @@
                while ((bucket = inp->head)) {
                        /* Convert if necessary */
                        if (bucket->buf_type == IS_UNICODE) {
-                               if (stream->output_encoding) {
-                                       /* Stream has a configured output 
encoding, convert to appropriate type */
-                                       php_stream_bucket_convert(bucket, 
IS_STRING, stream->output_encoding);
-                               } else {
-                                       /* Stream is binary, write ugly UChars 
as is */
-                                       
php_stream_bucket_convert_notranscode(bucket, IS_STRING);
-                               }
+                               /* Force data to binary, adjusting buflen */
+                               php_stream_bucket_convert_notranscode(bucket, 
IS_STRING);
                        }
 
                        /* Must be binary by this point */
@@ -654,6 +655,9 @@
 
 PHPAPI php_stream_filter *php_stream_filter_remove(php_stream_filter *filter, 
int call_dtor TSRMLS_DC)
 {
+       /* UTODO: Figure out a sane way to "defilter" so that unicode 
converters can be swapped around
+          For now, at least fopen(,'b') + stream_encoding($fp, 'charset') 
works since there's nothing to remove */
+
        if (filter->prev) {
                filter->prev->next = filter->next;
        } else {
@@ -770,6 +774,42 @@
        return FAILURE;
 }
 
+PHPAPI int _php_stream_encoding_apply(php_stream *stream, int writechain, 
const char *encoding, uint16_t error_mode, UChar *subst TSRMLS_DC)
+{
+       int encoding_len = strlen(encoding);
+       int buflen = sizeof("unicode.from.") + encoding_len - 1; /* might be 
"to", but "from" is long enough for both */
+       char *buf = emalloc(buflen + 1);
+       php_stream_filter *filter;
+       zval *filterparams;
+
+       if (writechain) {
+               memcpy(buf, "unicode.to.", sizeof("unicode.to.") - 1);
+               memcpy(buf + sizeof("unicode.to.") - 1, encoding, encoding_len 
+ 1);
+       } else {
+               memcpy(buf, "unicode.from.", sizeof("unicode.from.") - 1);
+               memcpy(buf + sizeof("unicode.from.") - 1, encoding, 
encoding_len + 1);
+       }
+
+       ALLOC_INIT_ZVAL(filterparams);
+       array_init(filterparams);
+       add_assoc_long(filterparams, "error_mode", error_mode);
+       if (subst) {
+               add_assoc_unicode(filterparams, "subst_char", subst, 1);
+       }
+       filter = php_stream_filter_create(buf, filterparams, 
php_stream_is_persistent(stream) TSRMLS_CC);
+       efree(buf);
+       zval_ptr_dtor(&filterparams);
+
+       if (!filter) {
+               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to apply 
encoding for charset: %s\n", encoding);
+               return FAILURE;
+       }
+
+       php_stream_filter_append(writechain ? &stream->writefilters : 
&stream->readfilters, filter);
+
+       return SUCCESS;
+}
+
 /*
  * Local variables:
  * tab-width: 4
http://cvs.php.net/viewcvs.cgi/php-src/main/streams/php_stream_filter_api.h?r1=1.16&r2=1.17&diff_format=u
Index: php-src/main/streams/php_stream_filter_api.h
diff -u php-src/main/streams/php_stream_filter_api.h:1.16 
php-src/main/streams/php_stream_filter_api.h:1.17
--- php-src/main/streams/php_stream_filter_api.h:1.16   Mon Mar 13 04:40:11 2006
+++ php-src/main/streams/php_stream_filter_api.h        Wed Mar 29 01:20:43 2006
@@ -19,7 +19,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: php_stream_filter_api.h,v 1.16 2006/03/13 04:40:11 pollita Exp $ */
+/* $Id: php_stream_filter_api.h,v 1.17 2006/03/29 01:20:43 pollita Exp $ */
 
 /* The filter API works on the principle of "Bucket-Brigades".  This is
  * partially inspired by the Apache 2 method of doing things, although
@@ -157,6 +157,7 @@
 PHPAPI php_stream_filter *php_stream_filter_remove(php_stream_filter *filter, 
int call_dtor TSRMLS_DC);
 PHPAPI void php_stream_filter_free(php_stream_filter *filter TSRMLS_DC);
 PHPAPI php_stream_filter *_php_stream_filter_alloc(php_stream_filter_ops 
*fops, void *abstract, int persistent STREAMS_DC TSRMLS_DC);
+PHPAPI int _php_stream_encoding_apply(php_stream *stream, int writechain, 
const char *encoding, uint16_t error_mode, UChar *subst TSRMLS_DC);
 END_EXTERN_C()
 #define php_stream_filter_alloc(fops, thisptr, persistent) 
_php_stream_filter_alloc((fops), (thisptr), (persistent) STREAMS_CC TSRMLS_CC)
 #define php_stream_filter_alloc_rel(fops, thisptr, persistent) 
_php_stream_filter_alloc((fops), (thisptr), (persistent) STREAMS_REL_CC 
TSRMLS_CC)
@@ -165,6 +166,8 @@
 #define php_stream_filter_flush(filter, finish) 
_php_stream_filter_flush((filter), (finish) TSRMLS_CC)
 #define php_stream_filter_check_chain(chain) 
_php_stream_filter_check_chain((chain) TSRMLS_CC)
 #define php_stream_filter_output_prefer_unicode(filter) 
_php_stream_filter_output_prefer_unicode((filter) TSRMLS_CC)
+#define php_stream_encoding_apply(stream, writechain, encoding, error_mode, 
subst) \
+               _php_stream_encoding_apply((stream), (writechain), (encoding), 
(error_mode), (subst) TSRMLS_CC)
 
 #define php_stream_is_filtered(stream) ((stream)->readfilters.head || 
(stream)->writefilters.head)
 
@@ -179,6 +182,12 @@
 PHPAPI php_stream_filter *php_stream_filter_create(const char *filtername, 
zval *filterparams, int persistent TSRMLS_DC);
 END_EXTERN_C()
 
+/* unicode_filter.c exports */
+extern php_stream_filter_ops php_unicode_to_string_filter_ops;
+extern php_stream_filter_ops php_unicode_from_string_filter_ops;
+extern php_stream_filter_ops php_unicode_tidy_filter_ops;
+extern php_stream_filter_factory php_unicode_filter_factory;
+
 /*
  * Local variables:
  * tab-width: 4
http://cvs.php.net/viewcvs.cgi/php-src/main/streams/streams.c?r1=1.114&r2=1.115&diff_format=u
Index: php-src/main/streams/streams.c
diff -u php-src/main/streams/streams.c:1.114 
php-src/main/streams/streams.c:1.115
--- php-src/main/streams/streams.c:1.114        Sun Mar 26 06:19:24 2006
+++ php-src/main/streams/streams.c      Wed Mar 29 01:20:43 2006
@@ -19,7 +19,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: streams.c,v 1.114 2006/03/26 06:19:24 andrei Exp $ */
+/* $Id: streams.c,v 1.115 2006/03/29 01:20:43 pollita Exp $ */
 
 #define _GNU_SOURCE
 #include "php.h"
@@ -239,6 +239,7 @@
        ret->abstract = abstract;
        ret->is_persistent = persistent_id ? 1 : 0;
        ret->chunk_size = FG(def_chunk_size);
+       ret->readbuf_type = IS_STRING;
 
        if (FG(auto_detect_line_endings)) {
                ret->flags |= PHP_STREAM_FLAG_DETECT_EOL;
@@ -483,12 +484,9 @@
                                         * stream read buffer */
                                        while (brig_inp->head) {
                                                bucket = brig_inp->head;
-                                               if (bucket->buf_type != 
IS_UNICODE && stream->input_encoding) {
-                                                       /* Stream expects 
unicode, convert using stream encoding */
-                                                       
php_stream_bucket_convert(bucket, IS_UNICODE, stream->input_encoding);
-                                               } else if (bucket->buf_type == 
IS_UNICODE && !stream->input_encoding) {
-                                                       /* Stream expects 
binary, filter provided unicode, just take the buffer as is */
-                                                       
php_stream_bucket_convert_notranscode(bucket, IS_STRING);
+                                               if (bucket->buf_type != 
stream->readbuf_type) {
+                                                       /* Stream expects 
different datatype than bucket has, convert slopily */
+                                                       
php_stream_bucket_convert_notranscode(bucket, stream->readbuf_type);
                                                }
                                                /* Bucket type now matches 
stream type */
 
@@ -496,9 +494,9 @@
                                                 * TODO: this can fail for 
persistent streams */
                                                if (stream->readbuflen - 
stream->writepos < bucket->buflen) {
                                                        stream->readbuflen += 
bucket->buflen;
-                                                       stream->readbuf.v = 
perealloc(stream->readbuf.v, PS_ULEN(stream->input_encoding, 
stream->readbuflen), stream->is_persistent);
+                                                       stream->readbuf.v = 
perealloc(stream->readbuf.v, PS_ULEN(stream->readbuf_type == IS_UNICODE, 
stream->readbuflen), stream->is_persistent);
                                                }
-                                               memcpy(stream->readbuf.s + 
stream->writepos, bucket->buf.s, PS_ULEN(stream->input_encoding, 
bucket->buflen));
+                                               memcpy(stream->readbuf.s + 
stream->writepos, bucket->buf.s, PS_ULEN(stream->readbuf_type == IS_UNICODE, 
bucket->buflen));
                                                stream->writepos += 
bucket->buflen;
 
                                                php_stream_bucket_unlink(bucket 
TSRMLS_CC);
@@ -530,46 +528,6 @@
                }
 
                efree(chunk_buf);
-       } else if (stream->input_encoding) { /* Unfiltered Unicode stream */
-               /* is there enough data in the buffer ? */
-               if (stream->writepos - stream->readpos < (off_t)size) {
-                       char *binbuf;
-                       UChar *ubuf;
-                       int binbuf_len, ubuf_len;
-                       size_t toread = (size > stream->chunk_size) ? size : 
stream->chunk_size;
-                       UErrorCode status = U_ZERO_ERROR;
-
-                       /* Read stream data into temporary buffer, then convert 
to unicode
-                          TODO: This can be improved */
-                       binbuf = emalloc(toread + 1);
-                       binbuf_len = stream->ops->read(stream, binbuf, toread 
TSRMLS_CC);
-                       if (binbuf_len == (size_t)-1) {
-                               /* Failure */
-                               efree(binbuf);
-                               return;
-                       }
-                       /* Convert to unicode */
-                       zend_convert_to_unicode(stream->input_encoding, &ubuf, 
&ubuf_len, binbuf, binbuf_len, &status);
-                       efree(binbuf);
-
-                       /* reduce buffer memory consumption if possible, to 
avoid a realloc */
-                       if (stream->readbuf.u && stream->readbuflen - 
stream->writepos < stream->chunk_size) {
-                               memmove(stream->readbuf.u, stream->readbuf.u + 
stream->readpos, UBYTES(stream->readbuflen - stream->readpos));
-                               stream->writepos -= stream->readpos;
-                               stream->readpos = 0;
-                       }
-
-                       /* grow the buffer if required
-                        * TODO: this can fail for persistent streams */
-                       if (stream->readbuflen - stream->writepos < ubuf_len) {
-                               stream->readbuflen += ((stream->chunk_size > 
ubuf_len) ? stream->chunk_size : ubuf_len);
-                               stream->readbuf.u = 
(UChar*)perealloc(stream->readbuf.u, UBYTES(stream->readbuflen), 
stream->is_persistent);
-                       }
-
-                       memcpy(stream->readbuf.u + stream->writepos, ubuf, 
UBYTES(ubuf_len));
-                       efree(ubuf);
-                       stream->writepos += ubuf_len;
-               }
        } else {        /* Unfiltered Binary stream */
                /* is there enough data in the buffer ? */
                if (stream->writepos - stream->readpos < (off_t)size) {
@@ -609,13 +567,13 @@
                 * drain the remainder of the buffer before using the "raw" 
read mode for
                 * the excess */
                if (stream->writepos - stream->readpos > 0) {
-                       toread = PS_ULEN(stream->input_encoding, 
stream->writepos - stream->readpos);
+                       toread = PS_ULEN(stream->readbuf_type == IS_UNICODE, 
stream->writepos - stream->readpos);
 
                        if (toread > size) {
                                toread = size;
                        }
 
-                       if (stream->input_encoding) {
+                       if (stream->readbuf_type == IS_UNICODE) {
                                /* Sloppy read, anyone using php_stream_read() 
on a unicode stream
                                 * had better know what they're doing */
                                
@@ -647,7 +605,7 @@
                        }
 
                        if (toread > 0) {
-                               if (php_stream_reads_unicode(stream)) {
+                               if (stream->readbuf_type == IS_UNICODE) {
                                        /* Sloppy read, anyone using 
php_stream_read() on a unicode stream
                                         * had better know what they're doing */
                                
@@ -685,7 +643,7 @@
 {
        size_t toread = 0, didread = 0, string_length = 0;
 
-       if (!stream->input_encoding) {
+       if (stream->readbuf_type != IS_UNICODE) {
                return -1;
        }
 
@@ -763,7 +721,7 @@
        int buflen = size;
        size_t toread = 0, didread = 0, string_length = 0;
 
-       if (!stream->input_encoding) {
+       if (stream->readbuf_type != IS_UNICODE) {
                return NULL;
        }
 
@@ -921,7 +879,7 @@
        char *readptr, *buf = zbuf.s;
 
        if (!buf) {
-               readptr = stream->readbuf.s + PS_ULEN(stream->input_encoding, 
stream->readpos);
+               readptr = stream->readbuf.s + PS_ULEN(stream->readbuf_type == 
IS_UNICODE, stream->readpos);
                avail = stream->writepos - stream->readpos;
        } else {
                readptr = zbuf.s;
@@ -929,7 +887,7 @@
        }
 
        if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL) {
-               if (stream->input_encoding) {
+               if (stream->readbuf_type == IS_UNICODE) {
                        cr = (char*)u_memchr((UChar*)readptr, '\r', avail);
                        lf = (char*)u_memchr((UChar*)readptr, '\n', avail);
                } else {
@@ -948,10 +906,10 @@
                        eol = lf;
                }
        } else if (stream->flags & PHP_STREAM_FLAG_EOL_MAC) {
-               eol = stream->input_encoding ? u_memchr((UChar*)readptr, '\r', 
avail) : memchr(readptr, '\r', avail);
+               eol = (stream->readbuf_type == IS_UNICODE) ? 
u_memchr((UChar*)readptr, '\r', avail) : memchr(readptr, '\r', avail);
        } else {
                /* unix (and dos) line endings */
-               eol = stream->input_encoding ? u_memchr((UChar*)readptr, '\n', 
avail) : memchr(readptr, '\n', avail);
+               eol = (stream->readbuf_type == IS_UNICODE) ? 
u_memchr((UChar*)readptr, '\n', avail) : memchr(readptr, '\n', avail);
        }
 
        return (void*)eol;
@@ -967,7 +925,7 @@
        size_t current_buf_size = 0;
        size_t total_copied = 0;
        int grow_mode = 0;
-       int is_unicode = php_stream_reads_unicode(stream);
+       int is_unicode = stream->readbuf_type == IS_UNICODE;
        int split_surrogate = 0;
        zstr bufstart = buf;
 
@@ -1042,8 +1000,8 @@
                                 * than 8K, we waste 1 byte per additional 8K 
or so.
                                 * That seems acceptable to me, to avoid making 
this code
                                 * hard to follow */
-                               bufstart.s = erealloc(bufstart.s, 
PS_ULEN(stream->input_encoding, current_buf_size + cpysz + 1));
-                               buf.s = bufstart.s + 
PS_ULEN(stream->input_encoding, total_copied);
+                               bufstart.s = erealloc(bufstart.s, 
PS_ULEN(stream->readbuf_type == IS_UNICODE, current_buf_size + cpysz + 1));
+                               buf.s = bufstart.s + 
PS_ULEN(stream->readbuf_type == IS_UNICODE, total_copied);
                                current_buf_size += cpysz + 1;
                        } else {
                                if (cpysz >= maxlen - 1) {
@@ -1177,7 +1135,7 @@
        size_t toread;
        int skip = 0;
 
-       if (!php_stream_reads_unicode(stream)) {
+       if (stream->readbuf_type != IS_UNICODE) {
                return NULL;
        }
 
@@ -1241,8 +1199,7 @@
 /* Writes a buffer directly to a stream, using multiple of the chunk size */
 static size_t _php_stream_write_buffer(php_stream *stream, int buf_type, zstr 
buf, int buflen TSRMLS_DC)
 {
-       size_t didwrite = 0, towrite, justwrote, shouldwrite, buflen_orig = 
buflen;
-       zstr buf_orig = buf;
+       size_t didwrite = 0, towrite, justwrote, shouldwrite;
        char *freeme = NULL;
 
        /* if we have a seekable stream we need to ensure that data is written 
at the
@@ -1254,24 +1211,9 @@
                stream->ops->seek(stream, stream->position, SEEK_SET, 
&stream->position TSRMLS_CC);
        }
 
-       if (stream->output_encoding && buf_type == IS_UNICODE) {
-               char *dest;
-               int destlen, num_conv;
-               UErrorCode status = U_ZERO_ERROR;
-
-               num_conv = zend_convert_from_unicode(stream->output_encoding, 
&dest, &destlen, buf.u, buflen, &status);
-               if (U_FAILURE(status)) {
-                       int32_t offset = u_countChar32(buf.u, num_conv);
-
-                       zend_raise_conversion_error_ex("Could not convert 
Unicode string to binary string", stream->output_encoding, ZEND_FROM_UNICODE, 
offset, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
-               }
-               freeme = buf.s = dest;
-               buflen = destlen;
-       } else {
-               /* Sloppy handling, make it a binary buffer */
-               if (buf_type != IS_STRING) {
-                       buflen = UBYTES(buflen);
-               }
+       /* Sloppy handling, make it a binary buffer */
+       if (buf_type == IS_UNICODE) {
+               buflen = UBYTES(buflen);
        }
 
        shouldwrite = buflen;
@@ -1300,32 +1242,7 @@
                }
        }
 
-
-       if (stream->output_encoding) {
-               /* Map didwrite back to the original character count */
-               if (didwrite == shouldwrite) {
-                       /* Everything wrote okay, no need to count */
-                       didwrite = buflen_orig;
-               } else {
-                       UErrorCode status = U_ZERO_ERROR;
-                       char *t = freeme;
-                       const UChar *p = buf_orig.u;
-
-                       switch (ucnv_getType(stream->output_encoding)) {
-                               case UCNV_SBCS:
-                               case UCNV_LATIN_1:
-                               case UCNV_US_ASCII:
-                                       /* 1:1 character->byte mapping, 
didwrite really does mean the number of characters written */
-                                       break;
-                               default:
-                                       /* Reconvert into junk buffer to see 
where conversion stops in source string */
-                                       
ucnv_resetFromUnicode(stream->output_encoding);
-                                       
ucnv_fromUnicode(stream->output_encoding, &t, t + didwrite, &p, p + 
buflen_orig, NULL, TRUE, &status);
-                                       /* p stops at the first unconvertable 
UChar when t runs out of space */
-                                       didwrite = p - buf_orig.u;
-                       }
-               }
-       } else if (buf_type == IS_UNICODE) {
+       if (buf_type == IS_UNICODE) {
                /* Was slopily converted */
                didwrite /= UBYTES(1);
        }
@@ -2274,50 +2191,15 @@
        if (stream && strchr(implicit_mode, 't') && UG(unicode)) {
                if (strchr(implicit_mode, 'w') || strchr(implicit_mode, 'a') || 
strchr(implicit_mode, '+')) {
                        char *encoding = (context && context->output_encoding) 
? context->output_encoding : "utf8";
-                       UErrorCode status = U_ZERO_ERROR;
 
-                       stream->output_encoding = ucnv_open(encoding, &status);
-                       if (U_FAILURE(status)) {
-                               switch (status) {
-                                       case U_MEMORY_ALLOCATION_ERROR:
-                                               
php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC,
-                                                       "Unable to allocate 
memory for unicode output converter: %s", encoding);
-                                               break;
-                                       case U_FILE_ACCESS_ERROR:
-                                               
php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC,
-                                                       "Error loading unicode 
output converter: %s", encoding);
-                                               break;
-                                       default:
-                                               
php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC,
-                                                       "Unknown error starting 
unicode output converter: %s", encoding);
-                               }
-                       } else {
-                               /* UTODO: (Maybe?) Allow overriding the default 
error handlers on a per-stream basis via context params */
-                               
zend_set_converter_error_mode(stream->output_encoding, ZEND_FROM_UNICODE, 
UG(from_error_mode));
-                               
zend_set_converter_subst_char(stream->output_encoding, UG(from_subst_char));
-                       }
+                       /* UTODO: (Maybe?) Allow overriding the default error 
handlers on a per-stream basis via context params */
+                       php_stream_encoding_apply(stream, 1, encoding, 
UG(from_error_mode), UG(from_subst_char));
                }
                if (strchr(implicit_mode, 'r') || strchr(implicit_mode, '+')) {
                        char *encoding = (context && context->input_encoding) ? 
context->input_encoding : "utf8";
-                       UErrorCode status = U_ZERO_ERROR;
 
-                       stream->input_encoding = ucnv_open(encoding, &status);
-                       if (U_FAILURE(status)) {
-                               switch (status) {
-                                       case U_MEMORY_ALLOCATION_ERROR:
-                                               
php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC,
-                                                       "Unable to allocate 
memory for unicode input converter: %s", encoding);
-                                               break;
-                                       case U_FILE_ACCESS_ERROR:
-                                               
php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC,
-                                                       "Error loading unicode 
input converter: %s", encoding);
-                                               break;
-                                       default:
-                                               
php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC,
-                                                       "Unknown error starting 
unicode input converter: %s", encoding);
-                               }
-                       }
-                       /* UTODO: If/When Input error handling gets 
implemented, set the options on success */
+                       /* UTODO: (Maybe?) Allow overriding the default error 
handlers on a per-stream basis via context params */
+                       php_stream_encoding_apply(stream, 0, encoding, 
UG(to_error_mode), NULL);
                }
        }
 
@@ -2334,6 +2216,7 @@
                pefree(copy_of_path, persistent);
        }
 #endif
+
        return stream;
 }
 /* }}} */
http://cvs.php.net/viewcvs.cgi/php-src/win32/build/config.w32?r1=1.52&r2=1.53&diff_format=u
Index: php-src/win32/build/config.w32
diff -u php-src/win32/build/config.w32:1.52 php-src/win32/build/config.w32:1.53
--- php-src/win32/build/config.w32:1.52 Wed Mar  8 14:41:45 2006
+++ php-src/win32/build/config.w32      Wed Mar 29 01:20:43 2006
@@ -1,5 +1,5 @@
 // vim:ft=javascript
-// $Id: config.w32,v 1.52 2006/03/08 14:41:45 iliaa Exp $
+// $Id: config.w32,v 1.53 2006/03/29 01:20:43 pollita Exp $
 // "Master" config file; think of it as a configure.in
 // equivalent.
 
@@ -279,7 +279,7 @@
        php_open_temporary_file.c php_logos.c output.c internal_functions.c 
php_sprintf.c");
 
 ADD_SOURCES("main/streams", "streams.c cast.c memory.c filter.c 
plain_wrapper.c \
-       userspace.c transports.c xp_socket.c mmap.c");
+       userspace.c transports.c xp_socket.c mmap.c unicode_filter.c");
 
 ADD_SOURCES("win32", "crypt_win32.c flock.c glob.c md5crypt.c pwd.c readdir.c \
        registry.c select.c sendmail.c time.c wfile.c winutil.c wsyslog.c 
globals.c");

http://cvs.php.net/viewcvs.cgi/php-src/main/streams/unicode_filter.c?view=markup&rev=1.1
Index: php-src/main/streams/unicode_filter.c
+++ php-src/main/streams/unicode_filter.c
/*
   +----------------------------------------------------------------------+
   | PHP Version 6                                                        |
   +----------------------------------------------------------------------+
   | This source file is subject to version 3.01 of the PHP license,      |
   | that is bundled with this package in the file LICENSE, and is        |
   | available through the world-wide-web at the following url:           |
   | http://www.php.net/license/3_01.txt                                  |
   | If you did not receive a copy of the PHP license and are unable to   |
   | obtain it through the world-wide-web, please send a note to          |
   | [EMAIL PROTECTED] so we can mail you a copy immediately.               |
   +----------------------------------------------------------------------+
   | Authors: Sara Golemon ([EMAIL PROTECTED])                              |
   +----------------------------------------------------------------------+
*/

/* $Id: unicode_filter.c,v 1.1 2006/03/29 01:20:43 pollita Exp $ */


#include "php.h"
#include <unicode/ucnv.h>

/* {{{ data structure */
typedef struct _php_unicode_filter_data {
        char is_persistent;
        UConverter *conv;

        char to_unicode;
} php_unicode_filter_data;
/* }}} */

/* {{{ unicode.* filter implementation */

/* unicode.to.* -- Expects String -- Returns Unicode */
static php_stream_filter_status_t php_unicode_to_string_filter(
        php_stream *stream,
        php_stream_filter *thisfilter,
        php_stream_bucket_brigade *buckets_in,
        php_stream_bucket_brigade *buckets_out,
        size_t *bytes_consumed,
        int flags
        TSRMLS_DC)
{
        php_unicode_filter_data *data;
        php_stream_filter_status_t exit_status = PSFS_FEED_ME;
        size_t consumed = 0;

        if (!thisfilter || !thisfilter->abstract) {
                /* Should never happen */
                return PSFS_ERR_FATAL;
        }

        data = (php_unicode_filter_data *)(thisfilter->abstract);
        while (buckets_in->head) {
                php_stream_bucket *bucket = buckets_in->head;
                UChar *src = bucket->buf.u;

                php_stream_bucket_unlink(bucket TSRMLS_CC);
                if (!bucket->buf_type == IS_UNICODE) {
                        /* Already ASCII, can't really do anything with it */
                        consumed += bucket->buflen;
                        php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
                        exit_status = PSFS_PASS_ON;
                        continue;
                }

                while (src < (bucket->buf.u + bucket->buflen)) {
                        int remaining = bucket->buflen - (src - bucket->buf.u);
                        char *destp, *destbuf;
                        int32_t destlen = 
UCNV_GET_MAX_BYTES_FOR_STRING(remaining, ucnv_getMaxCharSize(data->conv));
                        UErrorCode errCode = U_ZERO_ERROR;
                        php_stream_bucket *new_bucket;

                        destp = destbuf = (char *)pemalloc(destlen, 
data->is_persistent);

                        ucnv_fromUnicode(data->conv, &destp, destbuf + destlen, 
(const UChar**)&src, src + remaining, NULL, FALSE, &errCode);
                        /* UTODO: Error catching */
                        new_bucket = php_stream_bucket_new(stream, destbuf, 
destp - destbuf, 1, data->is_persistent TSRMLS_CC);
                        php_stream_bucket_append(buckets_out, new_bucket 
TSRMLS_CC);
                        exit_status = PSFS_PASS_ON;
                }
                consumed += UBYTES(bucket->buflen);
                php_stream_bucket_delref(bucket TSRMLS_CC);
        }

        if (flags & PSFS_FLAG_FLUSH_CLOSE) {
                UErrorCode errCode = U_ZERO_ERROR;
                char d[64], *dest = d, *destp = d + 64;
                /* Spit it out! */

                ucnv_fromUnicode(data->conv, &dest, destp, NULL, NULL, NULL, 
TRUE, &errCode);
                /* UTODO: Error catching */
                if (dest > d) {
                        php_stream_bucket *bucket = 
php_stream_bucket_new(stream, d, dest - d, 0, 0 TSRMLS_CC);
                        php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
                        exit_status = PSFS_PASS_ON;
                }
        }

        if (bytes_consumed) {
                *bytes_consumed = consumed;
        }

        return exit_status;
}

/* unicode.from.* -- Expects Unicode -- Returns String */
static php_stream_filter_status_t php_unicode_from_string_filter(
        php_stream *stream,
        php_stream_filter *thisfilter,
        php_stream_bucket_brigade *buckets_in,
        php_stream_bucket_brigade *buckets_out,
        size_t *bytes_consumed,
        int flags
        TSRMLS_DC)
{
        php_unicode_filter_data *data;
        php_stream_filter_status_t exit_status = PSFS_FEED_ME;
        size_t consumed = 0;

        if (!thisfilter || !thisfilter->abstract) {
                /* Should never happen */
                return PSFS_ERR_FATAL;
        }

        data = (php_unicode_filter_data *)(thisfilter->abstract);
        while (buckets_in->head) {
                php_stream_bucket *bucket = buckets_in->head;
                char *src = bucket->buf.s;

                php_stream_bucket_unlink(bucket TSRMLS_CC);
                if (bucket->buf_type == IS_UNICODE) {
                        /* already in unicode, nothing to do */
                        consumed += UBYTES(bucket->buflen);
                        php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
                        exit_status = PSFS_PASS_ON;
                        continue;
                }

                while (src < (bucket->buf.s + bucket->buflen)) {
                        int remaining = bucket->buflen - (src - bucket->buf.s);
                        UChar *destp, *destbuf;
                        int32_t destlen = 
UCNV_GET_MAX_BYTES_FOR_STRING(remaining, ucnv_getMaxCharSize(data->conv));
                        UErrorCode errCode = U_ZERO_ERROR;
                        php_stream_bucket *new_bucket;

                        destp = destbuf = (UChar *)pemalloc(destlen, 
data->is_persistent);

                        ucnv_toUnicode(data->conv, &destp, 
(UChar*)((char*)destbuf + destlen), (const char**)&src, src + remaining, NULL, 
FALSE, &errCode);
                        /* UTODO: Error catching */

                        new_bucket = php_stream_bucket_new_unicode(stream, 
destbuf, destp - destbuf, 1, data->is_persistent TSRMLS_CC);
                        php_stream_bucket_append(buckets_out, new_bucket 
TSRMLS_CC);
                        exit_status = PSFS_PASS_ON;
                }
                consumed += bucket->buflen;
                php_stream_bucket_delref(bucket TSRMLS_CC);
        }

        if (flags & PSFS_FLAG_FLUSH_CLOSE) {
                UErrorCode errCode = U_ZERO_ERROR;
                UChar d[64], *dest = d, *destp = d + 64;
                /* Spit it out! */

                ucnv_toUnicode(data->conv, &dest, destp, NULL, NULL, NULL, 
TRUE, &errCode);
                /* UTODO: Error catching */
                if (dest > d) {
                        php_stream_bucket *bucket = 
php_stream_bucket_new_unicode(stream, d, dest - d, 0, 0 TSRMLS_CC);
                        php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
                        exit_status = PSFS_PASS_ON;
                }
        }

        if (bytes_consumed) {
                *bytes_consumed = consumed;
        }

        return exit_status;
}

/* unicode.tidy.* -- Expects anything -- Returns whatever is preferred by 
subsequent filters
   Can be used to "magically" fix-up bucket messes */
static php_stream_filter_status_t php_unicode_tidy_filter(
        php_stream *stream,
        php_stream_filter *thisfilter,
        php_stream_bucket_brigade *buckets_in,
        php_stream_bucket_brigade *buckets_out,
        size_t *bytes_consumed,
        int flags
        TSRMLS_DC)
{
        php_unicode_filter_data *data;
        int prefer_unicode;

        if (!thisfilter || !thisfilter->abstract) {
                /* Should never happen */
                return PSFS_ERR_FATAL;
        }

        prefer_unicode = php_stream_filter_output_prefer_unicode(thisfilter);
        data = (php_unicode_filter_data *)(thisfilter->abstract);

        if (prefer_unicode) {
                if (!data->to_unicode) {
                        ucnv_resetToUnicode(data->conv);
                        data->to_unicode = prefer_unicode;
                }
                return php_unicode_from_string_filter(stream, thisfilter, 
buckets_in, buckets_out, bytes_consumed, flags TSRMLS_CC);
        } else {
                if (data->to_unicode) {
                        ucnv_resetFromUnicode(data->conv);
                        data->to_unicode = prefer_unicode;
                }
                return php_unicode_to_string_filter(stream, thisfilter, 
buckets_in, buckets_out, bytes_consumed, flags TSRMLS_CC);
        }
}

static void php_unicode_filter_dtor(php_stream_filter *thisfilter TSRMLS_DC)
{
        if (thisfilter && thisfilter->abstract) {
                php_unicode_filter_data *data = (php_unicode_filter_data 
*)thisfilter->abstract;
                ucnv_close(data->conv);
                pefree(data, data->is_persistent);
        }
}

php_stream_filter_ops php_unicode_to_string_filter_ops = {
        php_unicode_to_string_filter,
        php_unicode_filter_dtor,
        "unicode.to.*",
        PSFO_FLAG_ACCEPTS_UNICODE | PSFO_FLAG_OUTPUTS_STRING
};

php_stream_filter_ops php_unicode_from_string_filter_ops = {
        php_unicode_from_string_filter,
        php_unicode_filter_dtor,
        "unicode.from.*",
        PSFO_FLAG_ACCEPTS_STRING | PSFO_FLAG_OUTPUTS_UNICODE
};

php_stream_filter_ops php_unicode_tidy_filter_ops = {
        php_unicode_tidy_filter,
        php_unicode_filter_dtor,
        "unicode.tidy.*",
        PSFO_FLAG_ACCEPTS_ANY | PSFO_FLAG_OUTPUTS_ANY
};
/* }}} */


/* {{{ unicode.* factory */

static php_stream_filter *php_unicode_filter_create(const char *filtername, 
zval *filterparams, int persistent TSRMLS_DC)
{
        php_unicode_filter_data *data;
        const char *charset, *direction;
        php_stream_filter_ops *fops;
        UErrorCode ucnvError = U_ZERO_ERROR;
        /* Note: from_error_mode means from unicode to charset.  from filter 
means from charset to unicode */
        uint16_t err_mode = UG(from_error_mode);
        char to_unicode = 0;
        zval **tmpzval;

        if (strncasecmp(filtername, "unicode.", sizeof("unicode.") - 1)) {
                /* Never happens */
                return NULL;
        }

        direction = filtername + sizeof("unicode.") - 1;
        if (strncmp(direction, "to.", sizeof("to.") - 1) == 0) {
                fops = &php_unicode_to_string_filter_ops;
                charset = direction + sizeof("to.") - 1;
        } else if (strncmp(direction, "from.", sizeof("from.") - 1) == 0) {
                fops = &php_unicode_from_string_filter_ops;
                charset = direction + sizeof("from.") - 1;
                to_unicode = 1;
                err_mode = UG(to_error_mode);
        } else if (strncmp(direction, "tidy.", sizeof("tidy.") - 1) == 0) {
                fops = &php_unicode_tidy_filter_ops;
                charset = direction + sizeof("tidy.") - 1;
        } else if (strcmp(direction, "tidy") == 0) {
                fops = &php_unicode_tidy_filter_ops;
                charset = "utf8";
        } else {
                /* Shouldn't happen */
                return NULL;
        }

        /* Create this filter */
        data = (php_unicode_filter_data *)pecalloc(1, 
sizeof(php_unicode_filter_data), persistent);
        if (!data) {
                php_error_docref(NULL TSRMLS_CC, E_ERROR, "Failed allocating %d 
bytes.", sizeof(php_unicode_filter_data));
                return NULL;
        }

        data->conv = ucnv_open(charset, &ucnvError);
        data->to_unicode = to_unicode;
        if (!data->conv) {
                char *reason = "Unknown Error";
                pefree(data, persistent);
                switch (ucnvError) {
                        case U_MEMORY_ALLOCATION_ERROR:
                                reason = "unable to allocate memory";
                                break;
                        case U_FILE_ACCESS_ERROR:
                                reason = "file access error";
                                break;
                        default:
                                ;
                }
                php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to open 
charset converter, %s", reason);
                return NULL;
        }

        if (filterparams &&
                Z_TYPE_P(filterparams) == IS_ARRAY &&
                zend_hash_find(Z_ARRVAL_P(filterparams), "error_mode", 
sizeof("error_mode"), (void**)&tmpzval) == SUCCESS &&
                tmpzval && *tmpzval) {
                if (Z_TYPE_PP(tmpzval) == IS_LONG) {
                        err_mode = Z_LVAL_PP(tmpzval);
                } else {
                        zval copyval = **tmpzval;
                        zval_copy_ctor(&copyval);
                        convert_to_long(&copyval);
                        err_mode = Z_LVAL(copyval);
                }
        }

        zend_set_converter_error_mode(data->conv, to_unicode ? ZEND_TO_UNICODE 
: ZEND_FROM_UNICODE, err_mode);
        if (!to_unicode) {
                UChar *freeme = NULL;
                UChar *subst_char = UG(from_subst_char);

                if (filterparams &&
                        Z_TYPE_P(filterparams) == IS_ARRAY &&
                        zend_hash_find(Z_ARRVAL_P(filterparams), "subst_char", 
sizeof("subst_char"), (void**)&tmpzval) == SUCCESS &&
                        tmpzval && *tmpzval) {
                        if (Z_TYPE_PP(tmpzval) == IS_UNICODE) {
                                subst_char = Z_USTRVAL_PP(tmpzval);
                        } else {
                                zval copyval = **tmpzval;
                                zval_copy_ctor(&copyval);
                                convert_to_unicode(&copyval);
                                subst_char = freeme = Z_USTRVAL(copyval);
                        }
                }

                zend_set_converter_subst_char(data->conv, subst_char);

                if (freeme) {
                        efree(freeme);
                }
        }

        return php_stream_filter_alloc(fops, data, persistent);
}

php_stream_filter_factory php_unicode_filter_factory = {
        php_unicode_filter_create
};
/* }}} */

/*
 * Local variables:
 * tab-width: 4
 * c-basic-offset: 4
 * End:
 * vim600: sw=4 ts=4 fdm=marker
 * vim<600: sw=4 ts=4
 */


-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to