pollita         Wed Apr 12 22:40:56 2006 UTC

  Modified files:              
    /php-src/ext/standard       http_fopen_wrapper.c 
    /php-src/main/streams       filter.c php_stream_filter_api.h streams.c 
  Log:
  Allow http:// wrapper to automatically apply correct unicode.from.* filter 
based on content-type header
  
http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/http_fopen_wrapper.c?r1=1.112&r2=1.113&diff_format=u
Index: php-src/ext/standard/http_fopen_wrapper.c
diff -u php-src/ext/standard/http_fopen_wrapper.c:1.112 
php-src/ext/standard/http_fopen_wrapper.c:1.113
--- php-src/ext/standard/http_fopen_wrapper.c:1.112     Sun Mar 26 17:12:26 2006
+++ php-src/ext/standard/http_fopen_wrapper.c   Wed Apr 12 22:40:56 2006
@@ -19,7 +19,7 @@
    |          Sara Golemon <[EMAIL PROTECTED]>                              |
    +----------------------------------------------------------------------+
  */
-/* $Id: http_fopen_wrapper.c,v 1.112 2006/03/26 17:12:26 iliaa Exp $ */ 
+/* $Id: http_fopen_wrapper.c,v 1.113 2006/04/12 22:40:56 pollita Exp $ */ 
 
 #include "php.h"
 #include "php_globals.h"
@@ -81,6 +81,47 @@
 #define HTTP_HEADER_CONTENT_LENGTH     16
 #define HTTP_HEADER_TYPE                       32
 
+static inline char *php_http_detect_charset(char *http_header_line)
+{
+       char *s;
+
+       /* Note: This is a fairly remedial parser which could be easily 
confused by invalid data
+          The worst case scenario from such confusion should only result in 
the unicode filter not
+          being applied.  While unfortunate, it's more an issue of the server 
sending a bad header */
+       for (s = strchr(http_header_line, ';'); s; s = strchr(s + 1, ';')) {
+               char *p = s;
+
+               while (*(++p) == ' ');
+               if (strncmp(p, "charset", sizeof("charset") - 1) != 0) {
+                       continue;
+               }
+               p += sizeof("charset") - 1;
+
+               while (*p == ' ') p++;
+               if (*p != '=') {
+                       continue;
+               }
+
+               while (*(++p) == ' ');
+               if (*p == '"') {
+                       s = p + 1;
+                       if (!(p = strchr(s, '"'))) {
+                               /* Bad things, unmatched quote */
+                               return NULL;
+                       }
+                       return estrndup(s, p - s);
+                       break;
+               }
+
+               /* Unquoted value */
+               s = p;
+               while (*p && *p != ' ' && *p != ';') p++;
+               return estrndup(s, p - s);
+       }
+
+       return NULL;
+}
+
 php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, char 
*path, char *mode, int options, char **opened_path, php_stream_context 
*context, int redirect_max, int header_init STREAMS_DC TSRMLS_DC)
 {
        php_stream *stream = NULL;
@@ -104,6 +145,7 @@
        int transport_len, have_header = 0, request_fulluri = 0;
        char *protocol_version = NULL;
        int protocol_version_len = 3; /* Default: "1.0" */
+       char *charset = NULL;
 
        tmp_line[0] = '\0';
 
@@ -543,6 +585,11 @@
                        if (!strncasecmp(http_header_line, "Location: ", 10)) {
                                strlcpy(location, http_header_line + 10, 
sizeof(location));
                        } else if (!strncasecmp(http_header_line, 
"Content-Type: ", 14)) {
+
+                               if (UG(unicode) && strchr(mode, 't')) {
+                                       charset = 
php_http_detect_charset(http_header_line + sizeof("Content-type: "));
+                               }
+
                                php_stream_notify_info(context, 
PHP_STREAM_NOTIFY_MIME_TYPE_IS, http_header_line + 14, 0);
                        } else if (!strncasecmp(http_header_line, 
"Content-Length: ", 16)) {
                                file_size = atoi(http_header_line + 16);
@@ -572,6 +619,11 @@
                php_stream_close(stream);
                stream = NULL;
 
+               if (charset) {
+                       efree(charset);
+                       charset = NULL;
+               }
+
                if (location[0] != '\0')        {
 
                        char new_path[HTTP_HEADER_BLOCK_SIZE];
@@ -684,6 +736,13 @@
 
        }
 
+       if (charset) {
+               if (stream && UG(unicode) && strchr(mode, 't')) {
+                       php_stream_encoding_apply(stream, 0, charset, 
UG(to_error_mode), NULL);
+               }
+               efree(charset);
+       }
+
        return stream;
 }
 
http://cvs.php.net/viewcvs.cgi/php-src/main/streams/filter.c?r1=1.28&r2=1.29&diff_format=u
Index: php-src/main/streams/filter.c
diff -u php-src/main/streams/filter.c:1.28 php-src/main/streams/filter.c:1.29
--- php-src/main/streams/filter.c:1.28  Wed Mar 29 01:20:43 2006
+++ php-src/main/streams/filter.c       Wed Apr 12 22:40:56 2006
@@ -16,7 +16,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: filter.c,v 1.28 2006/03/29 01:20:43 pollita Exp $ */
+/* $Id: filter.c,v 1.29 2006/04/12 22:40:56 pollita Exp $ */
 
 #include "php.h"
 #include "php_globals.h"
@@ -550,6 +550,35 @@
        return preferred ^ inverted;
 }
 
+PHPAPI int _php_stream_filter_product(php_stream_filter_chain *chain, int type 
TSRMLS_DC)
+{
+       php_stream_filter *f;   
+
+       for (f = chain->head; f; f = f->next) {
+               if ((type == IS_STRING && (f->fops->flags & 
PSFO_FLAG_ACCEPTS_STRING) == 0) ||
+                       (type == IS_UNICODE && (f->fops->flags & 
PSFO_FLAG_ACCEPTS_UNICODE) == 0)) {
+                       /* At some point, the type produced conflicts with the 
type accepted */
+                       return 0;
+               }
+
+               if (f->fops->flags & PSFO_FLAG_OUTPUTS_OPPOSITE) {
+                       type = (type == IS_STRING) ? IS_UNICODE : IS_STRING;
+                       continue;
+               }
+               if ((f->fops->flags & PSFO_FLAG_OUTPUTS_SAME) ||
+                       (f->fops->flags & PSFO_FLAG_OUTPUTS_ANY)) {
+                       continue;
+               }
+               if (f->fops->flags & PSFO_FLAG_OUTPUTS_UNICODE) {
+                       type = IS_UNICODE;
+                       continue;
+               }
+               type = IS_STRING;
+       }
+
+       return type;
+}
+
 PHPAPI int _php_stream_filter_flush(php_stream_filter *filter, int finish 
TSRMLS_DC)
 {
        php_stream_bucket_brigade brig_a = { NULL, NULL }, brig_b = { NULL, 
NULL }, *inp = &brig_a, *outp = &brig_b, *brig_temp;
http://cvs.php.net/viewcvs.cgi/php-src/main/streams/php_stream_filter_api.h?r1=1.17&r2=1.18&diff_format=u
Index: php-src/main/streams/php_stream_filter_api.h
diff -u php-src/main/streams/php_stream_filter_api.h:1.17 
php-src/main/streams/php_stream_filter_api.h:1.18
--- php-src/main/streams/php_stream_filter_api.h:1.17   Wed Mar 29 01:20:43 2006
+++ php-src/main/streams/php_stream_filter_api.h        Wed Apr 12 22:40:56 2006
@@ -19,7 +19,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: php_stream_filter_api.h,v 1.17 2006/03/29 01:20:43 pollita Exp $ */
+/* $Id: php_stream_filter_api.h,v 1.18 2006/04/12 22:40:56 pollita Exp $ */
 
 /* The filter API works on the principle of "Bucket-Brigades".  This is
  * partially inspired by the Apache 2 method of doing things, although
@@ -153,6 +153,7 @@
 PHPAPI void _php_stream_filter_append(php_stream_filter_chain *chain, 
php_stream_filter *filter TSRMLS_DC);
 PHPAPI int _php_stream_filter_check_chain(php_stream_filter_chain *chain 
TSRMLS_DC);
 PHPAPI int _php_stream_filter_output_prefer_unicode(php_stream_filter *filter 
TSRMLS_DC);
+PHPAPI int _php_stream_filter_product(php_stream_filter_chain *chain, int type 
TSRMLS_DC);
 PHPAPI int _php_stream_filter_flush(php_stream_filter *filter, int finish 
TSRMLS_DC);
 PHPAPI php_stream_filter *php_stream_filter_remove(php_stream_filter *filter, 
int call_dtor TSRMLS_DC);
 PHPAPI void php_stream_filter_free(php_stream_filter *filter TSRMLS_DC);
@@ -166,6 +167,7 @@
 #define php_stream_filter_flush(filter, finish) 
_php_stream_filter_flush((filter), (finish) TSRMLS_CC)
 #define php_stream_filter_check_chain(chain) 
_php_stream_filter_check_chain((chain) TSRMLS_CC)
 #define php_stream_filter_output_prefer_unicode(filter) 
_php_stream_filter_output_prefer_unicode((filter) TSRMLS_CC)
+#define php_stream_filter_product(chain, type) 
_php_stream_filter_product((chain), (type) TSRMLS_CC)
 #define php_stream_encoding_apply(stream, writechain, encoding, error_mode, 
subst) \
                _php_stream_encoding_apply((stream), (writechain), (encoding), 
(error_mode), (subst) TSRMLS_CC)
 
http://cvs.php.net/viewcvs.cgi/php-src/main/streams/streams.c?r1=1.119&r2=1.120&diff_format=u
Index: php-src/main/streams/streams.c
diff -u php-src/main/streams/streams.c:1.119 
php-src/main/streams/streams.c:1.120
--- php-src/main/streams/streams.c:1.119        Thu Apr  6 19:39:11 2006
+++ php-src/main/streams/streams.c      Wed Apr 12 22:40:56 2006
@@ -19,7 +19,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: streams.c,v 1.119 2006/04/06 19:39:11 pollita Exp $ */
+/* $Id: streams.c,v 1.120 2006/04/12 22:40:56 pollita Exp $ */
 
 #define _GNU_SOURCE
 #include "php.h"
@@ -2305,13 +2305,17 @@
 
        /* Output encoding on text mode streams defaults to utf8 unless 
specified in context parameter */
        if (stream && strchr(implicit_mode, 't') && UG(unicode)) {
-               if (strchr(implicit_mode, 'w') || strchr(implicit_mode, 'a') || 
strchr(implicit_mode, '+')) {
+               /* Only apply implicit unicode.to. filter if the wrapper didn't 
do it for us */
+               if ((php_stream_filter_product(&stream->writefilters, 
IS_UNICODE) == IS_STRING) && 
+                       (strchr(implicit_mode, 'w') || strchr(implicit_mode, 
'a') || strchr(implicit_mode, '+'))) {
                        char *encoding = (context && context->output_encoding) 
? context->output_encoding : "utf8";
 
                        /* UTODO: (Maybe?) Allow overriding the default error 
handlers on a per-stream basis via context params */
                        php_stream_encoding_apply(stream, 1, encoding, 
UG(from_error_mode), UG(from_subst_char));
                }
-               if (strchr(implicit_mode, 'r') || strchr(implicit_mode, '+')) {
+
+               /* Only apply implicit unicode.from. filter if the wrapper 
didn't do it for us */
+               if ((stream->readbuf_type == IS_STRING) && 
(strchr(implicit_mode, 'r') || strchr(implicit_mode, '+'))) {
                        char *encoding = (context && context->input_encoding) ? 
context->input_encoding : "utf8";
 
                        /* UTODO: (Maybe?) Allow overriding the default error 
handlers on a per-stream basis via context params */

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to