hi Rui! It would rock if you could use a sparse checkout and commits :)
Thanks, On Mon, Jul 18, 2011 at 10:21 AM, Rui Hirokawa <hirok...@php.net> wrote: > hirokawa Mon, 18 Jul 2011 08:21:48 +0000 > > Revision: http://svn.php.net/viewvc?view=revision&revision=313365 > > Log: > added numeric entities encode/decode in hex format. > > Changed paths: > U php/php-src/branches/PHP_5_4/ext/mbstring/libmbfl/mbfl/mbfilter.c > U php/php-src/branches/PHP_5_4/ext/mbstring/mbstring.c > > Modified: php/php-src/branches/PHP_5_4/ext/mbstring/libmbfl/mbfl/mbfilter.c > =================================================================== > --- php/php-src/branches/PHP_5_4/ext/mbstring/libmbfl/mbfl/mbfilter.c > 2011-07-18 07:55:03 UTC (rev 313364) > +++ php/php-src/branches/PHP_5_4/ext/mbstring/libmbfl/mbfl/mbfilter.c > 2011-07-18 08:21:48 UTC (rev 313365) > @@ -2746,7 +2746,9 @@ > } > break; > case 2: > - if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */ > + if (c == 0x78) { /* 'x' */ > + pc->status = 4; > + } else if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */ > pc->cache = c - 0x30; > pc->status = 3; > pc->digit = 1; > @@ -2810,6 +2812,89 @@ > (*pc->decoder->filter_function)(c, pc->decoder); > } > break; > + case 4: > + if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */ > + pc->cache = c - 0x30; > + pc->status = 5; > + pc->digit = 1; > + } else if (c >= 0x41 && c <= 0x46) { /* 'A' - 'F' */ > + pc->cache = c - 0x41 + 10; > + pc->status = 5; > + pc->digit = 1; > + } else if (c >= 0x61 && c <= 0x66) { /* 'a' - 'f' */ > + pc->cache = c - 0x61 + 10; > + pc->status = 5; > + pc->digit = 1; > + } else { > + pc->status = 0; > + (*pc->decoder->filter_function)(0x26, pc->decoder); > /* '&' */ > + (*pc->decoder->filter_function)(0x23, pc->decoder); > /* '#' */ > + (*pc->decoder->filter_function)(0x78, pc->decoder); > /* 'x' */ > + (*pc->decoder->filter_function)(c, pc->decoder); > + } > + break; > + case 5: > + s = 0; > + f = 0; > + if ((c >= 0x30 && c <= 0x39) || > + (c >= 0x41 && c <= 0x46) || > + (c >= 0x61 && c <= 0x66)) { /* '0' - '9' or 'a' - > 'f' */ > + if (pc->digit > 9) { > + pc->status = 0; > + s = pc->cache; > + f = 1; > + } else { > + if (c >= 0x30 && c <= 0x39) { > + s = pc->cache*16 + (c - 0x30); > + } else if (c >= 0x41 && c <= 0x46) { > + s = pc->cache*16 + (c - 0x41 + 10); > + } else { > + s = pc->cache*16 + (c - 0x61 + 10); > + } > + pc->cache = s; > + pc->digit++; > + } > + } else { > + pc->status = 0; > + s = pc->cache; > + f = 1; > + n = 0; > + size = pc->mapsize; > + while (n < size) { > + mapelm = &(pc->convmap[n*4]); > + d = s - mapelm[2]; > + if (d >= mapelm[0] && d <= mapelm[1]) { > + f = 0; > + (*pc->decoder->filter_function)(d, > pc->decoder); > + if (c != 0x3b) { /* ';' */ > + > (*pc->decoder->filter_function)(c, pc->decoder); > + } > + break; > + } > + n++; > + } > + } > + if (f) { > + (*pc->decoder->filter_function)(0x26, pc->decoder); > /* '&' */ > + (*pc->decoder->filter_function)(0x23, pc->decoder); > /* '#' */ > + (*pc->decoder->filter_function)(0x78, pc->decoder); > /* 'x' */ > + r = 1; > + n = pc->digit; > + while (n > 0) { > + r *= 16; > + n--; > + } > + s %= r; > + r /= 16; > + while (r > 0) { > + d = s/r; > + s %= r; > + r /= 16; > + > (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder); > + } > + (*pc->decoder->filter_function)(c, pc->decoder); > + } > + break; > default: > if (c == 0x26) { /* '&' */ > pc->status = 1; > @@ -2822,6 +2907,53 @@ > return c; > } > > +static int > +collector_encode_hex_htmlnumericentity(int c, void *data) > +{ > + struct collector_htmlnumericentity_data *pc = (struct > collector_htmlnumericentity_data *)data; > + int f, n, s, r, d, size, *mapelm; > + > + size = pc->mapsize; > + f = 0; > + n = 0; > + while (n < size) { > + mapelm = &(pc->convmap[n*4]); > + if (c >= mapelm[0] && c <= mapelm[1]) { > + s = (c + mapelm[2]) & mapelm[3]; > + if (s >= 0) { > + (*pc->decoder->filter_function)(0x26, > pc->decoder); /* '&' */ > + (*pc->decoder->filter_function)(0x23, > pc->decoder); /* '#' */ > + (*pc->decoder->filter_function)(0x78, > pc->decoder); /* 'x' */ > + r = 0x1000000; > + s %= r; > + while (r > 0) { > + d = s/r; > + if (d || f) { > + f = 1; > + s %= r; > + > (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder); > + } > + r /= 16; > + } > + if (!f) { > + f = 1; > + > (*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder); > + } > + (*pc->decoder->filter_function)(0x3b, > pc->decoder); /* ';' */ > + } > + } > + if (f) { > + break; > + } > + n++; > + } > + if (!f) { > + (*pc->decoder->filter_function)(c, pc->decoder); > + } > + > + return c; > +} > + > mbfl_string * > mbfl_html_numeric_entity( > mbfl_string *string, > @@ -2850,15 +2982,20 @@ > string->no_encoding, > mbfl_memory_device_output, 0, &device); > /* wchar filter */ > - if (type == 0) { > + if (type == 0) { /* decimal output */ > encoder = mbfl_convert_filter_new( > string->no_encoding, > mbfl_no_encoding_wchar, > collector_encode_htmlnumericentity, 0, &pc); > - } else { > + } else if (type == 2) { /* hex output */ > encoder = mbfl_convert_filter_new( > string->no_encoding, > mbfl_no_encoding_wchar, > + collector_encode_hex_htmlnumericentity, 0, &pc); > + } else { /* type == 1: decimal/hex input */ > + encoder = mbfl_convert_filter_new( > + string->no_encoding, > + mbfl_no_encoding_wchar, > collector_decode_htmlnumericentity, 0, &pc); > } > if (pc.decoder == NULL || encoder == NULL) { > > Modified: php/php-src/branches/PHP_5_4/ext/mbstring/mbstring.c > =================================================================== > --- php/php-src/branches/PHP_5_4/ext/mbstring/mbstring.c 2011-07-18 > 07:55:03 UTC (rev 313364) > +++ php/php-src/branches/PHP_5_4/ext/mbstring/mbstring.c 2011-07-18 > 08:21:48 UTC (rev 313365) > @@ -412,6 +412,7 @@ > ZEND_ARG_INFO(0, string) > ZEND_ARG_INFO(0, convmap) > ZEND_ARG_INFO(0, encoding) > + ZEND_ARG_INFO(0, is_hex) > ZEND_END_ARG_INFO() > > ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2) > @@ -3682,10 +3683,11 @@ > HashTable *target_hash; > size_t argc = ZEND_NUM_ARGS(); > int i, *convmap, *mapelm, mapsize=0; > + zend_bool is_hex = 0; > mbfl_string string, result, *ret; > enum mbfl_no_encoding no_encoding; > > - if (zend_parse_parameters(argc TSRMLS_CC, "szs", &str, &str_len, > &zconvmap, &encoding, &encoding_len) == FAILURE) { > + if (zend_parse_parameters(argc TSRMLS_CC, "sz|sb", &str, &str_len, > &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) { > return; > } > > @@ -3696,7 +3698,7 @@ > string.len = str_len; > > /* encoding */ > - if (argc == 3) { > + if ((argc == 3 || argc == 4) && encoding_len > 0) { > no_encoding = mbfl_name2no_encoding(encoding); > if (no_encoding == mbfl_no_encoding_invalid) { > php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown > encoding \"%s\"", encoding); > @@ -3706,6 +3708,12 @@ > } > } > > + if (argc == 4) { > + if (type == 0 && is_hex) { > + type = 2; /* output in hex format */ > + } > + } > + > /* conversion map */ > convmap = NULL; > if (Z_TYPE_P(zconvmap) == IS_ARRAY) { > @@ -3743,7 +3751,7 @@ > } > /* }}} */ > > -/* {{{ proto string mb_encode_numericentity(string string, array convmap [, > string encoding]) > +/* {{{ proto string mb_encode_numericentity(string string, array convmap [, > string encoding [, bool is_hex]]) > Converts specified characters to HTML numeric entities */ > PHP_FUNCTION(mb_encode_numericentity) > { > > > -- > PHP CVS Mailing List (http://www.php.net/) > To unsubscribe, visit: http://www.php.net/unsub.php > -- Pierre @pierrejoye | http://blog.thepimp.net | http://www.libgd.org