dmitry Tue Jul 10 15:12:15 2007 UTC Modified files: /php-src/ext/standard html.c Log: MFB htmlentities() / htmlspecialchars() "don't double encode" flag support http://cvs.php.net/viewvc.cgi/php-src/ext/standard/html.c?r1=1.122&r2=1.123&diff_format=u Index: php-src/ext/standard/html.c diff -u php-src/ext/standard/html.c:1.122 php-src/ext/standard/html.c:1.123 --- php-src/ext/standard/html.c:1.122 Thu Jan 18 16:21:00 2007 +++ php-src/ext/standard/html.c Tue Jul 10 15:12:15 2007 @@ -18,7 +18,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: html.c,v 1.122 2007/01/18 16:21:00 tony2001 Exp $ */ +/* $Id: html.c,v 1.123 2007/07/10 15:12:15 dmitry Exp $ */ /* * HTML entity resources: @@ -1092,9 +1092,8 @@ /* {{{ php_escape_html_entities */ -PHPAPI char *php_escape_html_entities(char *orig, int oldlen, int *newlen, int all, int quote_style, char *hint_charset TSRMLS_DC) +PHPAPI char *php_escape_html_entities_ex(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset, zend_bool double_encode TSRMLS_DC) { - unsigned char *old = (unsigned char *)orig; int i, j, maxlen, len; char *replaced; enum entity_charset charset = determine_charset(hint_charset TSRMLS_CC); @@ -1155,8 +1154,34 @@ int is_basic = 0; if (this_char == '&') { - memcpy(replaced + len, "&", sizeof("&") - 1); - len += sizeof("&") - 1; + if (double_encode) { +encode_amp: + memcpy(replaced + len, "&", sizeof("&") - 1); + len += sizeof("&") - 1; + } else { + char *e = memchr(old + i, ';', oldlen - i); + char *s = (char*)old + i; + + if (!e || (e - s) > 10) { /* minor optimization to avoid "entities" over 10 chars in length */ + goto encode_amp; + } else { + if (*s == '#') { /* numeric entities */ + s++; + while (s < e) { + if (!isdigit(*s++)) { + goto encode_amp; + } + } + } else { /* text entities */ + while (s < e) { + if (!isalnum(*s++)) { + goto encode_amp; + } + } + } + replaced[len++] = '&'; + } + } is_basic = 1; } else { for (j = 0; basic_entities[j].charcode != 0; j++) { @@ -1194,6 +1219,11 @@ } /* }}} */ +PHPAPI char *php_escape_html_entities(char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset TSRMLS_DC) +{ + return php_escape_html_entities_ex((unsigned char*)old, oldlen, newlen, all, quote_style, hint_charset, 1 TSRMLS_CC); +} + /* {{{ php_html_entities */ static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all) @@ -1207,8 +1237,9 @@ long quote_style = ENT_COMPAT; zend_uchar type; char *replaced; + zend_bool double_encode = 1; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t|ls", &str, &str_len, &type, "e_style, &hint_charset, &hint_charset_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t|ls!b", &str, &str_len, &type, "e_style, &hint_charset, &hint_charset_len, &double_encode) == FAILURE) { return; } @@ -1219,7 +1250,7 @@ hint_charset = "utf-8"; } - replaced = php_escape_html_entities(str.s, str_len, &len, all, quote_style, hint_charset TSRMLS_CC); + replaced = php_escape_html_entities_ex((unsigned char*)str.s, str_len, &len, all, quote_style, hint_charset, double_encode TSRMLS_CC); if (type == IS_UNICODE) { RETVAL_U_STRINGL(UG(utf8_conv), replaced, len, ZSTR_AUTOFREE); @@ -1245,7 +1276,7 @@ } /* }}} */ -/* {{{ proto string htmlspecialchars(string string [, int quote_style][, string charset]) U +/* {{{ proto string htmlspecialchars(string string [, int quote_style[, string charset[, bool double_encode]]]) Convert special characters to HTML entities */ PHP_FUNCTION(htmlspecialchars) { @@ -1380,7 +1411,7 @@ /* }}} */ -/* {{{ proto string htmlentities(string string [, int quote_style][, string charset]) U +/* {{{ proto string htmlentities(string string [, int quote_style[, string charset[, bool double_encode]]]) Convert all applicable characters to HTML entities */ PHP_FUNCTION(htmlentities) {
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php