dmitry Tue Jul 10 15:12:15 2007 UTC
Modified files:
/php-src/ext/standard html.c
Log:
MFB htmlentities() / htmlspecialchars() "don't double encode" flag support
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/html.c?r1=1.122&r2=1.123&diff_format=u
Index: php-src/ext/standard/html.c
diff -u php-src/ext/standard/html.c:1.122 php-src/ext/standard/html.c:1.123
--- php-src/ext/standard/html.c:1.122 Thu Jan 18 16:21:00 2007
+++ php-src/ext/standard/html.c Tue Jul 10 15:12:15 2007
@@ -18,7 +18,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: html.c,v 1.122 2007/01/18 16:21:00 tony2001 Exp $ */
+/* $Id: html.c,v 1.123 2007/07/10 15:12:15 dmitry Exp $ */
/*
* HTML entity resources:
@@ -1092,9 +1092,8 @@
/* {{{ php_escape_html_entities
*/
-PHPAPI char *php_escape_html_entities(char *orig, int oldlen, int *newlen, int
all, int quote_style, char *hint_charset TSRMLS_DC)
+PHPAPI char *php_escape_html_entities_ex(unsigned char *old, int oldlen, int
*newlen, int all, int quote_style, char *hint_charset, zend_bool double_encode
TSRMLS_DC)
{
- unsigned char *old = (unsigned char *)orig;
int i, j, maxlen, len;
char *replaced;
enum entity_charset charset = determine_charset(hint_charset TSRMLS_CC);
@@ -1155,8 +1154,34 @@
int is_basic = 0;
if (this_char == '&') {
- memcpy(replaced + len, "&", sizeof("&")
- 1);
- len += sizeof("&") - 1;
+ if (double_encode) {
+encode_amp:
+ memcpy(replaced + len, "&",
sizeof("&") - 1);
+ len += sizeof("&") - 1;
+ } else {
+ char *e = memchr(old + i, ';', oldlen -
i);
+ char *s = (char*)old + i;
+
+ if (!e || (e - s) > 10) { /* minor
optimization to avoid "entities" over 10 chars in length */
+ goto encode_amp;
+ } else {
+ if (*s == '#') { /* numeric
entities */
+ s++;
+ while (s < e) {
+ if
(!isdigit(*s++)) {
+ goto
encode_amp;
+ }
+ }
+ } else { /* text entities */
+ while (s < e) {
+ if
(!isalnum(*s++)) {
+ goto
encode_amp;
+ }
+ }
+ }
+ replaced[len++] = '&';
+ }
+ }
is_basic = 1;
} else {
for (j = 0; basic_entities[j].charcode != 0;
j++) {
@@ -1194,6 +1219,11 @@
}
/* }}} */
+PHPAPI char *php_escape_html_entities(char *old, int oldlen, int *newlen, int
all, int quote_style, char *hint_charset TSRMLS_DC)
+{
+ return php_escape_html_entities_ex((unsigned char*)old, oldlen, newlen,
all, quote_style, hint_charset, 1 TSRMLS_CC);
+}
+
/* {{{ php_html_entities
*/
static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all)
@@ -1207,8 +1237,9 @@
long quote_style = ENT_COMPAT;
zend_uchar type;
char *replaced;
+ zend_bool double_encode = 1;
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t|ls", &str,
&str_len, &type, "e_style, &hint_charset, &hint_charset_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t|ls!b", &str,
&str_len, &type, "e_style, &hint_charset, &hint_charset_len,
&double_encode) == FAILURE) {
return;
}
@@ -1219,7 +1250,7 @@
hint_charset = "utf-8";
}
- replaced = php_escape_html_entities(str.s, str_len, &len, all,
quote_style, hint_charset TSRMLS_CC);
+ replaced = php_escape_html_entities_ex((unsigned char*)str.s, str_len,
&len, all, quote_style, hint_charset, double_encode TSRMLS_CC);
if (type == IS_UNICODE) {
RETVAL_U_STRINGL(UG(utf8_conv), replaced, len, ZSTR_AUTOFREE);
@@ -1245,7 +1276,7 @@
}
/* }}} */
-/* {{{ proto string htmlspecialchars(string string [, int quote_style][,
string charset]) U
+/* {{{ proto string htmlspecialchars(string string [, int quote_style[, string
charset[, bool double_encode]]])
Convert special characters to HTML entities */
PHP_FUNCTION(htmlspecialchars)
{
@@ -1380,7 +1411,7 @@
/* }}} */
-/* {{{ proto string htmlentities(string string [, int quote_style][, string
charset]) U
+/* {{{ proto string htmlentities(string string [, int quote_style[, string
charset[, bool double_encode]]])
Convert all applicable characters to HTML entities */
PHP_FUNCTION(htmlentities)
{
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php