dmitry          Tue Jul 10 15:12:15 2007 UTC

  Modified files:              
    /php-src/ext/standard       html.c 
  Log:
  MFB htmlentities() / htmlspecialchars() "don't double encode" flag support
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/html.c?r1=1.122&r2=1.123&diff_format=u
Index: php-src/ext/standard/html.c
diff -u php-src/ext/standard/html.c:1.122 php-src/ext/standard/html.c:1.123
--- php-src/ext/standard/html.c:1.122   Thu Jan 18 16:21:00 2007
+++ php-src/ext/standard/html.c Tue Jul 10 15:12:15 2007
@@ -18,7 +18,7 @@
    +----------------------------------------------------------------------+
 */
 
-/* $Id: html.c,v 1.122 2007/01/18 16:21:00 tony2001 Exp $ */
+/* $Id: html.c,v 1.123 2007/07/10 15:12:15 dmitry Exp $ */
 
 /*
  * HTML entity resources:
@@ -1092,9 +1092,8 @@
 
 /* {{{ php_escape_html_entities
  */
-PHPAPI char *php_escape_html_entities(char *orig, int oldlen, int *newlen, int 
all, int quote_style, char *hint_charset TSRMLS_DC)
+PHPAPI char *php_escape_html_entities_ex(unsigned char *old, int oldlen, int 
*newlen, int all, int quote_style, char *hint_charset, zend_bool double_encode 
TSRMLS_DC)
 {
-       unsigned char *old = (unsigned char *)orig;
        int i, j, maxlen, len;
        char *replaced;
        enum entity_charset charset = determine_charset(hint_charset TSRMLS_CC);
@@ -1155,8 +1154,34 @@
                        int is_basic = 0;
 
                        if (this_char == '&') {
-                               memcpy(replaced + len, "&", sizeof("&") 
- 1);
-                               len += sizeof("&") - 1;
+                               if (double_encode) {
+encode_amp:
+                                       memcpy(replaced + len, "&", 
sizeof("&") - 1);
+                                       len += sizeof("&") - 1;
+                               } else {
+                                       char *e = memchr(old + i, ';', oldlen - 
i);
+                                       char *s = (char*)old + i;
+
+                                       if (!e || (e - s) > 10) { /* minor 
optimization to avoid "entities" over 10 chars in length */
+                                               goto encode_amp;
+                                       } else {
+                                               if (*s == '#') { /* numeric 
entities */
+                                                       s++;
+                                                       while (s < e) {
+                                                               if 
(!isdigit(*s++)) {
+                                                                       goto 
encode_amp;
+                                                               }
+                                                       }
+                                               } else { /* text entities */
+                                                       while (s < e) {
+                                                               if 
(!isalnum(*s++)) {
+                                                                       goto 
encode_amp;
+                                                               }
+                                                       }
+                                               }
+                                               replaced[len++] = '&';
+                                       }
+                               }
                                is_basic = 1;
                        } else {
                                for (j = 0; basic_entities[j].charcode != 0; 
j++) {
@@ -1194,6 +1219,11 @@
 }
 /* }}} */
 
+PHPAPI char *php_escape_html_entities(char *old, int oldlen, int *newlen, int 
all, int quote_style, char *hint_charset TSRMLS_DC)
+{
+       return php_escape_html_entities_ex((unsigned char*)old, oldlen, newlen, 
all, quote_style, hint_charset, 1 TSRMLS_CC);
+}
+
 /* {{{ php_html_entities
  */
 static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all)
@@ -1207,8 +1237,9 @@
        long quote_style = ENT_COMPAT;
        zend_uchar type;
        char *replaced;
+       zend_bool double_encode = 1;
 
-       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t|ls", &str, 
&str_len, &type, &quote_style, &hint_charset, &hint_charset_len) == FAILURE) {
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t|ls!b", &str, 
&str_len, &type, &quote_style, &hint_charset, &hint_charset_len, 
&double_encode) == FAILURE) {
                return;
        }
 
@@ -1219,7 +1250,7 @@
                hint_charset = "utf-8";
        }
 
-       replaced = php_escape_html_entities(str.s, str_len, &len, all, 
quote_style, hint_charset TSRMLS_CC);
+       replaced = php_escape_html_entities_ex((unsigned char*)str.s, str_len, 
&len, all, quote_style, hint_charset, double_encode TSRMLS_CC);
 
        if (type == IS_UNICODE) {
                RETVAL_U_STRINGL(UG(utf8_conv), replaced, len, ZSTR_AUTOFREE);
@@ -1245,7 +1276,7 @@
 }
 /* }}} */
 
-/* {{{ proto string htmlspecialchars(string string [, int quote_style][, 
string charset]) U
+/* {{{ proto string htmlspecialchars(string string [, int quote_style[, string 
charset[, bool double_encode]]])
    Convert special characters to HTML entities */
 PHP_FUNCTION(htmlspecialchars)
 {
@@ -1380,7 +1411,7 @@
 /* }}} */
 
 
-/* {{{ proto string htmlentities(string string [, int quote_style][, string 
charset]) U
+/* {{{ proto string htmlentities(string string [, int quote_style[, string 
charset[, bool double_encode]]])
    Convert all applicable characters to HTML entities */
 PHP_FUNCTION(htmlentities)
 {

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to