Hi, Excuse me for the late reply.
I reviewed the patch and adjusted the style to the coding standards. Attached is the revised version diff'ed against HEAD. Please verify it. And please be sure to check out CODING_STANDARDS included in the source package before submitting the patch from now on. BTW, your code doesn't seem to handle the string that might result in a string longer than 256 bytes. IMO erealloc() is lacking somewhere. As for the other part, I see no obvious problems. Moriyoshi "Adrian Gartland" <[EMAIL PROTECTED]> wrote: > New patch applied against the current "php4-latest.tar.gz", > same location: > http://support.oregan.net/php/php_htmlspecialchars_iso_2022-jp.patch > > On 11 Nov 02, "Moriyoshi Koizumi" <[EMAIL PROTECTED]> wrote: > > Could you make a patch diff'ed against the latest version of html.c in HEAD > > branch? determine_charset() issue which you pointed out seems to have been > > fixed already. > > > > Moriyoshi > > > > "Adrian Gartland" <[EMAIL PROTECTED]> wrote: > > > > > http://support.oregan.net/php/php_htmlspecialchars_iso_2022-jp.patch > > > > > > On 11 Nov 02, "Jan Schneider" <[EMAIL PROTECTED]> wrote: > > > > Zitat von Adrian Gartland <[EMAIL PROTECTED]>: > > > > > > > > > Attached is a patch which allows iso-2022-jp (jis) encoded text to be > > > > > passed through htmlspecialchars when the character set is > > > > > set to ISO-2022-JP. > > > > > > > > > > It should also fix a tiny bug I found in "determine_charset" > > > > > code where "len" hadn't been set and then doing its > > > > > charset map walk. > > > > > > > > Your attachment didn't go through the mailing list filters. Please post a > > > > link where the patch can be downloaded. > > > > > > > > Jan. > > > > > > > > -- > > > > http://www.horde.org - The Horde Project > > > > http://www.ammma.de - discover your knowledge > > > > http://www.tip4all.de - Deine private Tippgemeinschaft > > > > > > > > -- > > > > PHP Development Mailing List <http://www.php.net/> > > > > To unsubscribe, visit: http://www.php.net/unsub.php > > > > > > > > > > > > > > > > > > > > > -- > > PHP Development Mailing List <http://www.php.net/> > > To unsubscribe, visit: http://www.php.net/unsub.php > > > > > > > > -- > Adrian Gartland - Senior Systems Engineer - TV Portal Team > Oregan Networks UK Ltd Tel: +44 (0) 20 8846 0990 > The White Building, 52-54 Glentham Road Fax: +44 (0) 20 8646 0999 > Barnes, London. SW13 9JJ, United Kingdom WWW: http://www.oregan.net/
--- html.c Mon Nov 18 04:11:27 2002 +++ html.c.next Tue Nov 19 05:51:43 2002 @@ -18,7 +18,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: html.c,v 1.65 2002/11/16 08:30:31 sebastian Exp $ */ +/* $Id: html.c,v 1.63 2002/11/11 13:31:08 moriyoshi Exp $ */ #include "php.h" #if PHP_WIN32 @@ -43,7 +43,7 @@ #endif enum entity_charset { cs_terminator, cs_8859_1, cs_cp1252, - cs_8859_15, cs_utf_8, cs_big5, cs_gb2312, + cs_8859_15, cs_2022_jp, cs_utf_8, cs_big5, +cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp}; typedef const char *entity_table_t; @@ -288,6 +288,7 @@ } charset_map[] = { { "ISO-8859-1", cs_8859_1 }, { "ISO8859-1", cs_8859_1 }, + { "ISO-2022-JP", cs_2022_jp }, { "ISO-8859-15", cs_8859_15 }, { "ISO8859-15", cs_8859_15 }, { "utf-8", cs_utf_8 }, @@ -728,8 +729,138 @@ } /* }}} */ +/* {{{ next_iso2022_segment + * updates whatever psIn is pointing to the end of the multi-byte run + * esc$bxxxxxesc(byyyyy ; psIn = yyyy + */ +static const char *next_iso2022_segment(const unsigned char **psIn, int iInLen, const +char *pcEscapeSafeEnd) +{ + const char *sIn = *psIn; + const char *pcNextEsc; + static const char cEsc = 033; + int iSegmentLength; + int iRemaining = iInLen; + + pcNextEsc = sIn; + if (sIn > pcEscapeSafeEnd) { + /* Buffer overrun if we try and spot the escape chars */ + *psIn = sIn + iInLen; + return sIn; + } else { + while(1) { + pcNextEsc++; /* step past the current escape */ + + /* search for the closing escape sequence */ + while (cEsc != *pcNextEsc && iRemaining) { + iRemaining--; + pcNextEsc++; + } + + if (cEsc != *pcNextEsc) { + pcNextEsc = NULL; + } + + + if (NULL == pcNextEsc || pcNextEsc > pcEscapeSafeEnd) { + *psIn = sIn + iInLen; + return sIn; + } else { + if ('(' == pcNextEsc[1]) { + /*End of multi-byte run. */ + + iSegmentLength = (pcNextEsc - sIn) + 3; + *psIn = sIn + iSegmentLength; + return sIn; + } + } + } + } +} +/* }}} */ + +/* {{{ next_iso2022_segment + * updates whatever psIn is pointing to the end of the multi-byte run + * esc$bxxxxxesc(byyyyy ; psIn = yyyy + */ +static const char *next_ascii_segment(const unsigned char **psIn, int iInLen) +{ + const char *sIn = *psIn; + const char *pcNextEsc; + static const char cEsc = 033; + int iRemaining = iInLen; + + pcNextEsc = sIn; + + while (1) { + while (cEsc != *pcNextEsc && iRemaining) { + iRemaining--; + pcNextEsc++; + } + + if (cEsc != *pcNextEsc) { + *psIn = sIn + iInLen; + return sIn; + } else { + *psIn = pcNextEsc; + return sIn; + } + } +} +/* }}} */ +/* {{{ escape_html_entities_ISO2022 + * single byte + * esc(B -> ASCII + * esc(J -> JIS Roman + * + * double byte + * esc$@ -> JIS C 6226-1978 + * esc$B -> JIS X 0208-1983 + */ +static char *escape_html_entities_ISO2022(const unsigned char *old, int oldlen, int +*newlen, int all, int quote_style, const char *hint_charset TSRMLS_DC) +{ + char *new; + const char *pcStringEnd; + const char *pcEscapeSafeEnd; + int maxlen, len; + static const char cEsc = 033; + maxlen = 2 * oldlen; + if (maxlen < 128) { + maxlen = 128; + } + + new = emalloc(maxlen); + len=0; + + pcStringEnd = old + oldlen; + pcEscapeSafeEnd = pcStringEnd - 3; + + /* break up into encoded and non encoded segments */ + while (oldlen) { + if (cEsc == *old) { + const unsigned char *sSegment = next_iso2022_segment(&old, +oldlen, pcEscapeSafeEnd); + int iSegmentLength = old - sSegment; + memcpy(new+len, sSegment, iSegmentLength); + len += iSegmentLength; + oldlen -= iSegmentLength; + } else { + const unsigned char *sSegment = next_ascii_segment(&old, +oldlen); + int iSegmentLength = old - sSegment; + int iNewLen; + char *sSpecialChared = php_escape_html_entities((char *) +sSegment, iSegmentLength, &iNewLen, all, quote_style, NULL TSRMLS_CC); + + memcpy(new+len, sSpecialChared, iNewLen); + len += iNewLen; + oldlen -= iSegmentLength; + efree(sSpecialChared); + } + } + + *newlen = len; + return new; +} +/* }}} */ /* {{{ php_escape_html_entities */ @@ -739,6 +870,10 @@ char *replaced; enum entity_charset charset = determine_charset(hint_charset TSRMLS_CC); int matches_map; + + if (cs_2022_jp == charset) { + return escape_html_entities_ISO2022(old, oldlen, newlen, all, +quote_style, hint_charset TSRMLS_CC); + } maxlen = 2 * oldlen; if (maxlen < 128)
-- PHP Development Mailing List <http://www.php.net/> To unsubscribe, visit: http://www.php.net/unsub.php