moriyoshi Wed, 29 Jul 2009 04:44:08 +0000
Revision: http://svn.php.net/viewvc?view=revision&revision=286483
Log:
* Fix bug #48645 (mb_convert_encoding() doesn't understand hexadecimal
html-entities)
Bug: http://bugs.php.net/48645 (Assigned) mb_convert_encoding() doesn't
understand hexadecimal html-entities
Changed paths:
_U php/php-src/branches/PHP_5_2/
U php/php-src/branches/PHP_5_2/NEWS
U
php/php-src/branches/PHP_5_2/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c
A php/php-src/branches/PHP_5_2/ext/mbstring/tests/bug48645.phpt
_U php/php-src/branches/PHP_5_3/
U
php/php-src/branches/PHP_5_3/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c
A php/php-src/branches/PHP_5_3/ext/mbstring/tests/bug48645.phpt
U php/php-src/trunk/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c
A php/php-src/trunk/ext/mbstring/tests/bug48645.phpt
Property changes on: php/php-src/branches/PHP_5_2
___________________________________________________________________
Modified: svn:mergeinfo
- /php/php-src/branches/PHP_5_3:284120
+ /php/php-src/branches/PHP_5_3:284120
/php/php-src/trunk:284726
Modified: php/php-src/branches/PHP_5_2/NEWS
===================================================================
--- php/php-src/branches/PHP_5_2/NEWS 2009-07-29 04:29:30 UTC (rev 286482)
+++ php/php-src/branches/PHP_5_2/NEWS 2009-07-29 04:44:08 UTC (rev 286483)
@@ -1,6 +1,7 @@
PHP NEWS
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
?? ??? 2009, PHP 5.2.11
+- Fixed bug #48645 (mb_convert_encoding() doesn't understand hexadecimal html-entities). (Moriyoshi)
- Fixed regression in cURL extension that prevented flush of data to output
defined as a file handle. (Ilia)
Modified: php/php-src/branches/PHP_5_2/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c
===================================================================
--- php/php-src/branches/PHP_5_2/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c 2009-07-29 04:29:30 UTC (rev 286482)
+++ php/php-src/branches/PHP_5_2/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c 2009-07-29 04:44:08 UTC (rev 286483)
@@ -186,18 +186,58 @@
}
} else {
if (c == ';') {
- buffer[filter->status] = 0;
if (buffer[1]=='#') {
- /* numeric entity */
- for (pos=2; pos<filter->status; pos++) {
- ent = ent*10 + (buffer[pos] - '0');
+ if (filter->status > 2 && (buffer[2] == 'x' || buffer[2] == 'X')) {
+ if (filter->status > 3) {
+ /* numeric entity */
+ for (pos=3; pos<filter->status; pos++) {
+ int v = buffer[pos];
+ if (v >= '0' && v <= '9') {
+ v = v - '0';
+ } else if (v >= 'A' && v <= 'F') {
+ v = v - 'A' + 10;
+ } else if (v >= 'a' && v <= 'f') {
+ v = v - 'a' + 10;
+ } else {
+ ent = -1;
+ break;
+ }
+ ent = ent * 16 + v;
+ }
+ } else {
+ ent = -1;
+ }
+ } else {
+ /* numeric entity */
+ if (filter->status > 2) {
+ for (pos=2; pos<filter->status; pos++) {
+ int v = buffer[pos];
+ if (v >= '0' && v <= '9') {
+ v = v - '0';
+ } else {
+ ent = -1;
+ break;
+ }
+ ent = ent*10 + v;
+ }
+ } else {
+ ent = -1;
+ }
}
- CK((*filter->output_function)(ent, filter->data));
+ if (ent >= 0 && ent < 0x110000) {
+ CK((*filter->output_function)(ent, filter->data));
+ } else {
+ for (pos = 0; pos < filter->status; pos++) {
+ CK((*filter->output_function)(buffer[pos], filter->data));
+ }
+ CK((*filter->output_function)(c, filter->data));
+ }
filter->status = 0;
/*php_error_docref("ref.mbstring" TSRMLS_CC, E_NOTICE, "mbstring decoded '%s'=%d", buffer, ent);*/
} else {
/* named entity */
- entity = (mbfl_html_entity_entry *)mbfl_html_entity_list;
+ buffer[filter->status] = 0;
+ entity = (mbfl_html_entity_entry *)mbfl_html_entity_list;
while (entity->name) {
if (!strcmp(buffer+1, entity->name)) {
ent = entity->code;
Added: php/php-src/branches/PHP_5_2/ext/mbstring/tests/bug48645.phpt
===================================================================
--- php/php-src/branches/PHP_5_2/ext/mbstring/tests/bug48645.phpt (rev 0)
+++ php/php-src/branches/PHP_5_2/ext/mbstring/tests/bug48645.phpt 2009-07-29 04:44:08 UTC (rev 286483)
@@ -0,0 +1,162 @@
+--TEST--
+Bug #48645 (mb_convert_encoding() doesn't understand hexadecimal html-entities)
+--SKIPIF--
+<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?>
+--FILE--
+<?php
+var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("	", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x/;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x:;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x@;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x`;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xG;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xg;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("	", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X/;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X:;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X@;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X`;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XG;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xg;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("	", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#/;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#:;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES")));
+?>
+--EXPECT--
+string(2) "00"
+string(2) "01"
+string(2) "02"
+string(2) "03"
+string(2) "04"
+string(2) "05"
+string(2) "06"
+string(2) "07"
+string(2) "08"
+string(2) "09"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(10) "2623782f3b"
+string(10) "2623783a3b"
+string(10) "262378403b"
+string(10) "262378603b"
+string(10) "262378473b"
+string(10) "262378673b"
+string(8) "2623783b"
+string(2) "00"
+string(2) "01"
+string(2) "02"
+string(2) "03"
+string(2) "04"
+string(2) "05"
+string(2) "06"
+string(2) "07"
+string(2) "08"
+string(2) "09"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(10) "2623582f3b"
+string(10) "2623583a3b"
+string(10) "262358403b"
+string(10) "262358603b"
+string(10) "262358473b"
+string(10) "262358673b"
+string(8) "2623583b"
+string(2) "00"
+string(2) "01"
+string(2) "02"
+string(2) "03"
+string(2) "04"
+string(2) "05"
+string(2) "06"
+string(2) "07"
+string(2) "08"
+string(2) "09"
+string(8) "26232f3b"
+string(8) "26233a3b"
+string(6) "26233b"
+string(8) "f48fbfbf"
+string(20) "2623783131303030303b"
+string(8) "f48fbfbf"
+string(20) "2623583131303030303b"
+string(8) "f48fbfbf"
+string(20) "2623313131343131323b"
Property changes on: php/php-src/branches/PHP_5_3
___________________________________________________________________
Added: svn:mergeinfo
+ /php/php-src/trunk:284726
Modified: php/php-src/branches/PHP_5_3/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c
===================================================================
--- php/php-src/branches/PHP_5_3/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c 2009-07-29 04:29:30 UTC (rev 286482)
+++ php/php-src/branches/PHP_5_3/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c 2009-07-29 04:44:08 UTC (rev 286483)
@@ -186,18 +186,58 @@
}
} else {
if (c == ';') {
- buffer[filter->status] = 0;
if (buffer[1]=='#') {
- /* numeric entity */
- for (pos=2; pos<filter->status; pos++) {
- ent = ent*10 + (buffer[pos] - '0');
+ if (filter->status > 2 && (buffer[2] == 'x' || buffer[2] == 'X')) {
+ if (filter->status > 3) {
+ /* numeric entity */
+ for (pos=3; pos<filter->status; pos++) {
+ int v = buffer[pos];
+ if (v >= '0' && v <= '9') {
+ v = v - '0';
+ } else if (v >= 'A' && v <= 'F') {
+ v = v - 'A' + 10;
+ } else if (v >= 'a' && v <= 'f') {
+ v = v - 'a' + 10;
+ } else {
+ ent = -1;
+ break;
+ }
+ ent = ent * 16 + v;
+ }
+ } else {
+ ent = -1;
+ }
+ } else {
+ /* numeric entity */
+ if (filter->status > 2) {
+ for (pos=2; pos<filter->status; pos++) {
+ int v = buffer[pos];
+ if (v >= '0' && v <= '9') {
+ v = v - '0';
+ } else {
+ ent = -1;
+ break;
+ }
+ ent = ent*10 + v;
+ }
+ } else {
+ ent = -1;
+ }
}
- CK((*filter->output_function)(ent, filter->data));
+ if (ent >= 0 && ent < 0x110000) {
+ CK((*filter->output_function)(ent, filter->data));
+ } else {
+ for (pos = 0; pos < filter->status; pos++) {
+ CK((*filter->output_function)(buffer[pos], filter->data));
+ }
+ CK((*filter->output_function)(c, filter->data));
+ }
filter->status = 0;
/*php_error_docref("ref.mbstring" TSRMLS_CC, E_NOTICE, "mbstring decoded '%s'=%d", buffer, ent);*/
} else {
/* named entity */
- entity = (mbfl_html_entity_entry *)mbfl_html_entity_list;
+ buffer[filter->status] = 0;
+ entity = (mbfl_html_entity_entry *)mbfl_html_entity_list;
while (entity->name) {
if (!strcmp(buffer+1, entity->name)) {
ent = entity->code;
Added: php/php-src/branches/PHP_5_3/ext/mbstring/tests/bug48645.phpt
===================================================================
--- php/php-src/branches/PHP_5_3/ext/mbstring/tests/bug48645.phpt (rev 0)
+++ php/php-src/branches/PHP_5_3/ext/mbstring/tests/bug48645.phpt 2009-07-29 04:44:08 UTC (rev 286483)
@@ -0,0 +1,162 @@
+--TEST--
+Bug #48645 (mb_convert_encoding() doesn't understand hexadecimal html-entities)
+--SKIPIF--
+<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?>
+--FILE--
+<?php
+var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("	", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x/;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x:;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x@;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x`;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xG;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xg;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("	", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X/;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X:;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X@;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X`;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XG;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xg;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("	", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#/;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#:;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES")));
+?>
+--EXPECT--
+string(2) "00"
+string(2) "01"
+string(2) "02"
+string(2) "03"
+string(2) "04"
+string(2) "05"
+string(2) "06"
+string(2) "07"
+string(2) "08"
+string(2) "09"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(10) "2623782f3b"
+string(10) "2623783a3b"
+string(10) "262378403b"
+string(10) "262378603b"
+string(10) "262378473b"
+string(10) "262378673b"
+string(8) "2623783b"
+string(2) "00"
+string(2) "01"
+string(2) "02"
+string(2) "03"
+string(2) "04"
+string(2) "05"
+string(2) "06"
+string(2) "07"
+string(2) "08"
+string(2) "09"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(10) "2623582f3b"
+string(10) "2623583a3b"
+string(10) "262358403b"
+string(10) "262358603b"
+string(10) "262358473b"
+string(10) "262358673b"
+string(8) "2623583b"
+string(2) "00"
+string(2) "01"
+string(2) "02"
+string(2) "03"
+string(2) "04"
+string(2) "05"
+string(2) "06"
+string(2) "07"
+string(2) "08"
+string(2) "09"
+string(8) "26232f3b"
+string(8) "26233a3b"
+string(6) "26233b"
+string(8) "f48fbfbf"
+string(20) "2623783131303030303b"
+string(8) "f48fbfbf"
+string(20) "2623583131303030303b"
+string(8) "f48fbfbf"
+string(20) "2623313131343131323b"
Modified: php/php-src/trunk/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c
===================================================================
--- php/php-src/trunk/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c 2009-07-29 04:29:30 UTC (rev 286482)
+++ php/php-src/trunk/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c 2009-07-29 04:44:08 UTC (rev 286483)
@@ -186,18 +186,58 @@
}
} else {
if (c == ';') {
- buffer[filter->status] = 0;
if (buffer[1]=='#') {
- /* numeric entity */
- for (pos=2; pos<filter->status; pos++) {
- ent = ent*10 + (buffer[pos] - '0');
+ if (filter->status > 2 && (buffer[2] == 'x' || buffer[2] == 'X')) {
+ if (filter->status > 3) {
+ /* numeric entity */
+ for (pos=3; pos<filter->status; pos++) {
+ int v = buffer[pos];
+ if (v >= '0' && v <= '9') {
+ v = v - '0';
+ } else if (v >= 'A' && v <= 'F') {
+ v = v - 'A' + 10;
+ } else if (v >= 'a' && v <= 'f') {
+ v = v - 'a' + 10;
+ } else {
+ ent = -1;
+ break;
+ }
+ ent = ent * 16 + v;
+ }
+ } else {
+ ent = -1;
+ }
+ } else {
+ /* numeric entity */
+ if (filter->status > 2) {
+ for (pos=2; pos<filter->status; pos++) {
+ int v = buffer[pos];
+ if (v >= '0' && v <= '9') {
+ v = v - '0';
+ } else {
+ ent = -1;
+ break;
+ }
+ ent = ent*10 + v;
+ }
+ } else {
+ ent = -1;
+ }
}
- CK((*filter->output_function)(ent, filter->data));
+ if (ent >= 0 && ent < 0x110000) {
+ CK((*filter->output_function)(ent, filter->data));
+ } else {
+ for (pos = 0; pos < filter->status; pos++) {
+ CK((*filter->output_function)(buffer[pos], filter->data));
+ }
+ CK((*filter->output_function)(c, filter->data));
+ }
filter->status = 0;
/*php_error_docref("ref.mbstring" TSRMLS_CC, E_NOTICE, "mbstring decoded '%s'=%d", buffer, ent);*/
} else {
/* named entity */
- entity = (mbfl_html_entity_entry *)mbfl_html_entity_list;
+ buffer[filter->status] = 0;
+ entity = (mbfl_html_entity_entry *)mbfl_html_entity_list;
while (entity->name) {
if (!strcmp(buffer+1, entity->name)) {
ent = entity->code;
Added: php/php-src/trunk/ext/mbstring/tests/bug48645.phpt
===================================================================
--- php/php-src/trunk/ext/mbstring/tests/bug48645.phpt (rev 0)
+++ php/php-src/trunk/ext/mbstring/tests/bug48645.phpt 2009-07-29 04:44:08 UTC (rev 286483)
@@ -0,0 +1,162 @@
+--TEST--
+Bug #48645 (mb_convert_encoding() doesn't understand hexadecimal html-entities)
+--SKIPIF--
+<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?>
+--FILE--
+<?php
+var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("	", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x/;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x:;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x@;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x`;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xG;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xg;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("	", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X/;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X:;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X@;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X`;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XG;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xg;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("	", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#/;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#:;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES")));
+?>
+--EXPECT--
+string(2) "00"
+string(2) "01"
+string(2) "02"
+string(2) "03"
+string(2) "04"
+string(2) "05"
+string(2) "06"
+string(2) "07"
+string(2) "08"
+string(2) "09"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(10) "2623782f3b"
+string(10) "2623783a3b"
+string(10) "262378403b"
+string(10) "262378603b"
+string(10) "262378473b"
+string(10) "262378673b"
+string(8) "2623783b"
+string(2) "00"
+string(2) "01"
+string(2) "02"
+string(2) "03"
+string(2) "04"
+string(2) "05"
+string(2) "06"
+string(2) "07"
+string(2) "08"
+string(2) "09"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(10) "2623582f3b"
+string(10) "2623583a3b"
+string(10) "262358403b"
+string(10) "262358603b"
+string(10) "262358473b"
+string(10) "262358673b"
+string(8) "2623583b"
+string(2) "00"
+string(2) "01"
+string(2) "02"
+string(2) "03"
+string(2) "04"
+string(2) "05"
+string(2) "06"
+string(2) "07"
+string(2) "08"
+string(2) "09"
+string(8) "26232f3b"
+string(8) "26233a3b"
+string(6) "26233b"
+string(8) "f48fbfbf"
+string(20) "2623783131303030303b"
+string(8) "f48fbfbf"
+string(20) "2623583131303030303b"
+string(8) "f48fbfbf"
+string(20) "2623313131343131323b"
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php