Hi,

I know this may cause a potential BC problem, but I think htmlentities() 
should be more consistent with mbstring modules.

The attached patch changes the behaviour of htmlentities() or its internal 
counterparts, to take the character set of the characters as the value of 
mbstring.internal_encoding, rather than iso-8859-1, when the charset 
parameter is not specified.

And although it is not fully confirmed, this patch is also expected to fix 
the phpinfo() displaying problem in multibyte environments.


Any comments are welcome.

Moriyoshi Koizumi
Index: html.c
===================================================================
RCS file: /repository/php4/ext/standard/html.c,v
retrieving revision 1.54
diff -u -r1.54 html.c
--- html.c      3 Oct 2002 12:06:52 -0000       1.54
+++ html.c      16 Oct 2002 20:31:01 -0000
@@ -31,6 +31,11 @@
 #include <langinfo.h>
 #endif
 
+#if HAVE_MBSTRING
+# include "ext/mbstring/mbstring.h"
+ZEND_EXTERN_MODULE_GLOBALS(mbstring)
+#endif
+
 enum entity_charset { cs_terminator, cs_8859_1, cs_cp1252,
                                          cs_8859_15, cs_utf_8, cs_big5, cs_gb2312, 
                                          cs_big5hkscs, cs_sjis, cs_eucjp};
@@ -520,9 +525,45 @@
        enum entity_charset charset = cs_8859_1;
        int len = 0;
 
+#if HAVE_MBSTRING
+       /* XXX: Ugly things. Why don't we look for a more sophisticated way? */
+       if (charset_hint == NULL) {
+               switch (MBSTRG(internal_encoding)) {
+                       case mbfl_no_encoding_utf8:
+                               return cs_utf_8;
+
+                       case mbfl_no_encoding_euc_jp:
+                       case mbfl_no_encoding_eucjp_win:
+                               return cs_eucjp;
+
+                       case mbfl_no_encoding_sjis:
+                       case mbfl_no_encoding_sjis_win:
+                       case mbfl_no_encoding_sjis_mac:
+                               return cs_sjis;
+
+                       case mbfl_no_encoding_cp1252:
+                               return cs_cp1252;
+
+                       case mbfl_no_encoding_8859_15:
+                               return cs_8859_15;
+
+                       case mbfl_no_encoding_big5:
+                               return cs_big5;
+
+                       case mbfl_no_encoding_euc_cn:
+                       case mbfl_no_encoding_hz:
+                       case mbfl_no_encoding_cp936:
+                               return cs_gb2312;
+
+                       default:
+                               return cs_8859_1;
+               }
+       }
+#else
        /* Guarantee default behaviour for backwards compatibility */
        if (charset_hint == NULL)
                return cs_8859_1;
+#endif
 
        if (strlen(charset_hint) == 0)  {
                /* try to detect the charset for the locale */
-- 
PHP Development Mailing List <http://www.php.net/>
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to