gwynne Mon, 29 Aug 2011 14:56:19 +0000 Revision: http://svn.php.net/viewvc?view=revision&revision=315707
Log: Add unescaped Unicode encoding to json_encode(). Closes bug #53946. Patch by Irker and Gwynne. Bug: https://bugs.php.net/53946 (Assigned) add json_encode option for not escaping unnecessary character Changed paths: U php/php-src/branches/PHP_5_4/ext/json/json.c U php/php-src/branches/PHP_5_4/ext/json/php_json.h U php/php-src/branches/PHP_5_4/ext/json/utf8_to_utf16.c U php/php-src/branches/PHP_5_4/ext/json/utf8_to_utf16.h U php/php-src/trunk/ext/json/json.c U php/php-src/trunk/ext/json/php_json.h A php/php-src/trunk/ext/json/tests/bug53946.phpt U php/php-src/trunk/ext/json/utf8_to_utf16.c U php/php-src/trunk/ext/json/utf8_to_utf16.h
Modified: php/php-src/branches/PHP_5_4/ext/json/json.c =================================================================== --- php/php-src/branches/PHP_5_4/ext/json/json.c 2011-08-29 14:32:46 UTC (rev 315706) +++ php/php-src/branches/PHP_5_4/ext/json/json.c 2011-08-29 14:56:19 UTC (rev 315707) @@ -95,6 +95,7 @@ REGISTER_LONG_CONSTANT("JSON_NUMERIC_CHECK", PHP_JSON_NUMERIC_CHECK, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("JSON_UNESCAPED_SLASHES", PHP_JSON_UNESCAPED_SLASHES, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("JSON_PRETTY_PRINT", PHP_JSON_PRETTY_PRINT, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("JSON_UNESCAPED_UNICODE", PHP_JSON_UNESCAPED_UNICODE, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("JSON_ERROR_NONE", PHP_JSON_ERROR_NONE, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("JSON_ERROR_DEPTH", PHP_JSON_ERROR_DEPTH, CONST_CS | CONST_PERSISTENT); @@ -346,7 +347,7 @@ static void json_escape_string(smart_str *buf, char *s, int len, int options TSRMLS_DC) /* {{{ */ { - int pos = 0; + int pos = 0, ulen = 0; unsigned short us; unsigned short *utf16; @@ -378,15 +379,14 @@ } } - - utf16 = (unsigned short *) safe_emalloc(len, sizeof(unsigned short), 0); - - len = utf8_to_utf16(utf16, s, len); - if (len <= 0) { + + utf16 = (options & PHP_JSON_UNESCAPED_UNICODE) ? NULL : (unsigned short *) safe_emalloc(len, sizeof(unsigned short), 0); + ulen = utf8_to_utf16(utf16, s, len); + if (ulen <= 0) { if (utf16) { efree(utf16); } - if (len < 0) { + if (ulen < 0) { JSON_G(error_code) = PHP_JSON_ERROR_UTF8; if (!PG(display_errors)) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid UTF-8 sequence in argument"); @@ -397,12 +397,15 @@ } return; } + if (!(options & PHP_JSON_UNESCAPED_UNICODE)) { + len = ulen; + } smart_str_appendc(buf, '"'); while (pos < len) { - us = utf16[pos++]; + us = (options & PHP_JSON_UNESCAPED_UNICODE) ? s[pos++] : utf16[pos++]; switch (us) { @@ -479,7 +482,7 @@ break; default: - if (us >= ' ' && (us & 127) == us) { + if (us >= ' ' && ((options & PHP_JSON_UNESCAPED_UNICODE) || (us & 127) == us)) { smart_str_appendc(buf, (unsigned char) us); } else { smart_str_appendl(buf, "\\u", 2); @@ -498,7 +501,9 @@ } smart_str_appendc(buf, '"'); - efree(utf16); + if (utf16) { + efree(utf16); + } } /* }}} */ Modified: php/php-src/branches/PHP_5_4/ext/json/php_json.h =================================================================== --- php/php-src/branches/PHP_5_4/ext/json/php_json.h 2011-08-29 14:32:46 UTC (rev 315706) +++ php/php-src/branches/PHP_5_4/ext/json/php_json.h 2011-08-29 14:56:19 UTC (rev 315707) @@ -62,6 +62,7 @@ #define PHP_JSON_NUMERIC_CHECK (1<<5) #define PHP_JSON_UNESCAPED_SLASHES (1<<6) #define PHP_JSON_PRETTY_PRINT (1<<7) +#define PHP_JSON_UNESCAPED_UNICODE (1<<8) /* Internal flags */ #define PHP_JSON_OUTPUT_ARRAY 0 Modified: php/php-src/branches/PHP_5_4/ext/json/utf8_to_utf16.c =================================================================== --- php/php-src/branches/PHP_5_4/ext/json/utf8_to_utf16.c 2011-08-29 14:32:46 UTC (rev 315706) +++ php/php-src/branches/PHP_5_4/ext/json/utf8_to_utf16.c 2011-08-29 14:56:19 UTC (rev 315707) @@ -30,7 +30,7 @@ #include "utf8_decode.h" int -utf8_to_utf16(unsigned short w[], char p[], int length) +utf8_to_utf16(unsigned short *w, char p[], int length) { int c; int the_index = 0; @@ -43,14 +43,17 @@ return (c == UTF8_END) ? the_index : UTF8_ERROR; } if (c < 0x10000) { - w[the_index] = (unsigned short)c; + if (w) { + w[the_index] = (unsigned short)c; + } the_index += 1; } else { c -= 0x10000; - w[the_index] = (unsigned short)(0xD800 | (c >> 10)); - the_index += 1; - w[the_index] = (unsigned short)(0xDC00 | (c & 0x3FF)); - the_index += 1; + if (w) { + w[the_index] = (unsigned short)(0xD800 | (c >> 10)); + w[the_index + 1] = (unsigned short)(0xDC00 | (c & 0x3FF)); + } + the_index += 2; } } } Modified: php/php-src/branches/PHP_5_4/ext/json/utf8_to_utf16.h =================================================================== --- php/php-src/branches/PHP_5_4/ext/json/utf8_to_utf16.h 2011-08-29 14:32:46 UTC (rev 315706) +++ php/php-src/branches/PHP_5_4/ext/json/utf8_to_utf16.h 2011-08-29 14:56:19 UTC (rev 315707) @@ -1,3 +1,3 @@ /* utf8_to_utf16.h */ -extern int utf8_to_utf16(unsigned short w[], char p[], int length); +extern int utf8_to_utf16(unsigned short *w, char p[], int length); Modified: php/php-src/trunk/ext/json/json.c =================================================================== --- php/php-src/trunk/ext/json/json.c 2011-08-29 14:32:46 UTC (rev 315706) +++ php/php-src/trunk/ext/json/json.c 2011-08-29 14:56:19 UTC (rev 315707) @@ -95,6 +95,7 @@ REGISTER_LONG_CONSTANT("JSON_NUMERIC_CHECK", PHP_JSON_NUMERIC_CHECK, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("JSON_UNESCAPED_SLASHES", PHP_JSON_UNESCAPED_SLASHES, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("JSON_PRETTY_PRINT", PHP_JSON_PRETTY_PRINT, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("JSON_UNESCAPED_UNICODE", PHP_JSON_UNESCAPED_UNICODE, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("JSON_ERROR_NONE", PHP_JSON_ERROR_NONE, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("JSON_ERROR_DEPTH", PHP_JSON_ERROR_DEPTH, CONST_CS | CONST_PERSISTENT); @@ -346,7 +347,7 @@ static void json_escape_string(smart_str *buf, char *s, int len, int options TSRMLS_DC) /* {{{ */ { - int pos = 0; + int pos = 0, ulen = 0; unsigned short us; unsigned short *utf16; @@ -378,15 +379,14 @@ } } - - utf16 = (unsigned short *) safe_emalloc(len, sizeof(unsigned short), 0); - - len = utf8_to_utf16(utf16, s, len); - if (len <= 0) { + + utf16 = (options & PHP_JSON_UNESCAPED_UNICODE) ? NULL : (unsigned short *) safe_emalloc(len, sizeof(unsigned short), 0); + ulen = utf8_to_utf16(utf16, s, len); + if (ulen <= 0) { if (utf16) { efree(utf16); } - if (len < 0) { + if (ulen < 0) { JSON_G(error_code) = PHP_JSON_ERROR_UTF8; if (!PG(display_errors)) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid UTF-8 sequence in argument"); @@ -397,12 +397,15 @@ } return; } + if (!(options & PHP_JSON_UNESCAPED_UNICODE)) { + len = ulen; + } smart_str_appendc(buf, '"'); while (pos < len) { - us = utf16[pos++]; + us = (options & PHP_JSON_UNESCAPED_UNICODE) ? s[pos++] : utf16[pos++]; switch (us) { @@ -479,7 +482,7 @@ break; default: - if (us >= ' ' && (us & 127) == us) { + if (us >= ' ' && ((options & PHP_JSON_UNESCAPED_UNICODE) || (us & 127) == us)) { smart_str_appendc(buf, (unsigned char) us); } else { smart_str_appendl(buf, "\\u", 2); @@ -498,7 +501,9 @@ } smart_str_appendc(buf, '"'); - efree(utf16); + if (utf16) { + efree(utf16); + } } /* }}} */ Modified: php/php-src/trunk/ext/json/php_json.h =================================================================== --- php/php-src/trunk/ext/json/php_json.h 2011-08-29 14:32:46 UTC (rev 315706) +++ php/php-src/trunk/ext/json/php_json.h 2011-08-29 14:56:19 UTC (rev 315707) @@ -62,6 +62,7 @@ #define PHP_JSON_NUMERIC_CHECK (1<<5) #define PHP_JSON_UNESCAPED_SLASHES (1<<6) #define PHP_JSON_PRETTY_PRINT (1<<7) +#define PHP_JSON_UNESCAPED_UNICODE (1<<8) /* Internal flags */ #define PHP_JSON_OUTPUT_ARRAY 0 Added: php/php-src/trunk/ext/json/tests/bug53946.phpt =================================================================== --- php/php-src/trunk/ext/json/tests/bug53946.phpt (rev 0) +++ php/php-src/trunk/ext/json/tests/bug53946.phpt 2011-08-29 14:56:19 UTC (rev 315707) @@ -0,0 +1,16 @@ +--TEST-- +bug #53946 (json_encode() with JSON_UNESCAPED_UNICODE) +--SKIPIF-- +<?php if (!extension_loaded("json")) print "skip"; ?> +--FILE-- +<?php +var_dump(json_encode("latin 1234 -/ russian мама мыла раму specialchars \x02 \x08 \n U+1D11E >𝄞<")); +var_dump(json_encode("latin 1234 -/ russian мама мыла раму specialchars \x02 \x08 \n U+1D11E >𝄞<", JSON_UNESCAPED_UNICODE)); +var_dump(json_encode("ab\xE0")); +var_dump(json_encode("ab\xE0", JSON_UNESCAPED_UNICODE)); +?> +--EXPECT-- +string(156) ""latin 1234 -\/ russian \u043c\u0430\u043c\u0430 \u043c\u044b\u043b\u0430 \u0440\u0430\u043c\u0443 specialchars \u0002 \b \n U+1D11E >\ud834\udd1e<"" +string(100) ""latin 1234 -\/ russian мама мыла раму specialchars \u0002 \b \n U+1D11E >𝄞<"" +string(4) "null" +string(4) "null" Modified: php/php-src/trunk/ext/json/utf8_to_utf16.c =================================================================== --- php/php-src/trunk/ext/json/utf8_to_utf16.c 2011-08-29 14:32:46 UTC (rev 315706) +++ php/php-src/trunk/ext/json/utf8_to_utf16.c 2011-08-29 14:56:19 UTC (rev 315707) @@ -30,7 +30,7 @@ #include "utf8_decode.h" int -utf8_to_utf16(unsigned short w[], char p[], int length) +utf8_to_utf16(unsigned short *w, char p[], int length) { int c; int the_index = 0; @@ -43,14 +43,17 @@ return (c == UTF8_END) ? the_index : UTF8_ERROR; } if (c < 0x10000) { - w[the_index] = (unsigned short)c; + if (w) { + w[the_index] = (unsigned short)c; + } the_index += 1; } else { c -= 0x10000; - w[the_index] = (unsigned short)(0xD800 | (c >> 10)); - the_index += 1; - w[the_index] = (unsigned short)(0xDC00 | (c & 0x3FF)); - the_index += 1; + if (w) { + w[the_index] = (unsigned short)(0xD800 | (c >> 10)); + w[the_index + 1] = (unsigned short)(0xDC00 | (c & 0x3FF)); + } + the_index += 2; } } } Modified: php/php-src/trunk/ext/json/utf8_to_utf16.h =================================================================== --- php/php-src/trunk/ext/json/utf8_to_utf16.h 2011-08-29 14:32:46 UTC (rev 315706) +++ php/php-src/trunk/ext/json/utf8_to_utf16.h 2011-08-29 14:56:19 UTC (rev 315707) @@ -1,3 +1,3 @@ /* utf8_to_utf16.h */ -extern int utf8_to_utf16(unsigned short w[], char p[], int length); +extern int utf8_to_utf16(unsigned short *w, char p[], int length);
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php