Hi,

Excuse me for the late reply.

I reviewed the patch and adjusted the style to the coding standards.
Attached is the revised version diff'ed against HEAD. Please verify it.
And please be sure to check out CODING_STANDARDS included in the source 
package before submitting the patch from now on.

BTW, your code doesn't seem to handle the string that might result in a 
string longer than 256 bytes. IMO erealloc() is lacking somewhere. As for 
the other part, I see no obvious problems.

Moriyoshi


"Adrian Gartland" <[EMAIL PROTECTED]> wrote:

> New patch applied against the current "php4-latest.tar.gz",
> same location:
> http://support.oregan.net/php/php_htmlspecialchars_iso_2022-jp.patch
> 
> On 11 Nov 02, "Moriyoshi Koizumi" <[EMAIL PROTECTED]> wrote:
> > Could you make a patch diff'ed against the latest version of html.c in HEAD 
> > branch? determine_charset() issue which you pointed out seems to have been 
> > fixed already.
> > 
> > Moriyoshi
> > 
> > "Adrian Gartland" <[EMAIL PROTECTED]> wrote:
> > 
> > > http://support.oregan.net/php/php_htmlspecialchars_iso_2022-jp.patch
> > > 
> > > On 11 Nov 02, "Jan Schneider" <[EMAIL PROTECTED]> wrote:
> > > > Zitat von Adrian Gartland <[EMAIL PROTECTED]>:
> > > > 
> > > > > Attached is a patch which allows iso-2022-jp (jis) encoded text to be
> > > > > passed through htmlspecialchars when the character set is
> > > > > set to ISO-2022-JP.
> > > > > 
> > > > > It should also fix a tiny bug I found in "determine_charset"
> > > > > code where "len" hadn't been set and then doing its
> > > > > charset map walk.
> > > > 
> > > > Your attachment didn't go through the mailing list filters. Please post a
> > > > link where the patch can be downloaded.
> > > > 
> > > > Jan.
> > > > 
> > > > --
> > > > http://www.horde.org - The Horde Project
> > > > http://www.ammma.de - discover your knowledge
> > > > http://www.tip4all.de - Deine private Tippgemeinschaft
> > > > 
> > > > -- 
> > > > PHP Development Mailing List <http://www.php.net/>
> > > > To unsubscribe, visit: http://www.php.net/unsub.php
> > > > 
> > > > 
> > > > 
> > > 
> > 
> > 
> > -- 
> > PHP Development Mailing List <http://www.php.net/>
> > To unsubscribe, visit: http://www.php.net/unsub.php
> > 
> > 
> > 
> 
> -- 
> Adrian Gartland - Senior Systems Engineer - TV Portal Team
> Oregan Networks UK Ltd                         Tel: +44  (0) 20 8846 0990
> The White Building, 52-54 Glentham Road        Fax: +44  (0) 20 8646 0999
> Barnes, London. SW13 9JJ, United Kingdom       WWW: http://www.oregan.net/
--- html.c      Mon Nov 18 04:11:27 2002
+++ html.c.next Tue Nov 19 05:51:43 2002
@@ -18,7 +18,7 @@
    +----------------------------------------------------------------------+
 */
 
-/* $Id: html.c,v 1.65 2002/11/16 08:30:31 sebastian Exp $ */
+/* $Id: html.c,v 1.63 2002/11/11 13:31:08 moriyoshi Exp $ */
 
 #include "php.h"
 #if PHP_WIN32
@@ -43,7 +43,7 @@
 #endif
 
 enum entity_charset { cs_terminator, cs_8859_1, cs_cp1252,
-                                         cs_8859_15, cs_utf_8, cs_big5, cs_gb2312, 
+                                         cs_8859_15, cs_2022_jp, cs_utf_8, cs_big5, 
+cs_gb2312, 
                                          cs_big5hkscs, cs_sjis, cs_eucjp};
 typedef const char *entity_table_t;
 
@@ -288,6 +288,7 @@
 } charset_map[] = {
        { "ISO-8859-1",         cs_8859_1 },
        { "ISO8859-1",          cs_8859_1 },
+       { "ISO-2022-JP",        cs_2022_jp },
        { "ISO-8859-15",        cs_8859_15 },
        { "ISO8859-15",         cs_8859_15 },
        { "utf-8",                      cs_utf_8 },
@@ -728,8 +729,138 @@
 }
 /* }}} */
 
+/* {{{ next_iso2022_segment
+ * updates whatever psIn is pointing to the end of the multi-byte run
+ * esc$bxxxxxesc(byyyyy ; psIn = yyyy
+ */
+static const char *next_iso2022_segment(const unsigned char **psIn, int iInLen, const 
+char *pcEscapeSafeEnd)
+{
+       const char *sIn = *psIn;
+       const char *pcNextEsc;
+       static const char cEsc = 033;
+       int iSegmentLength;
+       int iRemaining = iInLen;
+
+       pcNextEsc = sIn;
+       if (sIn > pcEscapeSafeEnd) {
+               /* Buffer overrun if we try and spot the escape chars */
+               *psIn = sIn + iInLen;
+               return sIn;
+       } else {
+               while(1) {
+                       pcNextEsc++; /* step past the current escape */
+                       
+                       /* search for the closing escape sequence */
+                       while (cEsc != *pcNextEsc && iRemaining) {
+                               iRemaining--;
+                               pcNextEsc++;
+                       }
+                       
+                       if (cEsc != *pcNextEsc) {
+                               pcNextEsc = NULL;
+                       }
+                       
+                       
+                       if (NULL == pcNextEsc || pcNextEsc > pcEscapeSafeEnd) {
+                               *psIn = sIn + iInLen;
+                               return sIn;
+                       } else {
+                               if ('(' == pcNextEsc[1]) {
+                                       /*End of multi-byte run. */
+                               
+                                       iSegmentLength = (pcNextEsc - sIn) + 3;
+                                       *psIn = sIn + iSegmentLength;
+                                       return sIn;
+                               }
+                       }
+               }
+       }
+}
+/* }}} */
+
+/* {{{ next_iso2022_segment
+ * updates whatever psIn is pointing to the end of the multi-byte run
+ * esc$bxxxxxesc(byyyyy ; psIn = yyyy
+ */
+static const char *next_ascii_segment(const unsigned char **psIn, int iInLen)
+{
+       const char *sIn = *psIn;
+       const char *pcNextEsc;
+       static const char cEsc = 033;
+       int iRemaining = iInLen;
+
+       pcNextEsc = sIn;
+
+       while (1) {
+               while (cEsc != *pcNextEsc && iRemaining) {
+                       iRemaining--;
+                       pcNextEsc++;
+               }
+                
+               if (cEsc != *pcNextEsc) {
+                       *psIn = sIn + iInLen;
+                       return sIn;
+               } else {
+                       *psIn = pcNextEsc;
+                       return sIn;
+               }
+       }
+}
+/* }}} */
 
+/* {{{ escape_html_entities_ISO2022
+ * single byte 
+ * esc(B -> ASCII
+ * esc(J -> JIS Roman 
+ *
+ * double byte
+ * esc$@ -> JIS C 6226-1978
+ * esc$B -> JIS X 0208-1983
+ */
+static char *escape_html_entities_ISO2022(const unsigned char *old, int oldlen, int 
+*newlen, int all, int quote_style, const char *hint_charset TSRMLS_DC)
+{
+       char *new;
+       const char *pcStringEnd;
+       const char *pcEscapeSafeEnd;
+       int maxlen, len;
+       static const char cEsc = 033;
 
+       maxlen = 2 * oldlen;
+       if (maxlen < 128) {
+               maxlen = 128;
+       }       
+
+       new = emalloc(maxlen);
+       len=0;
+       
+       pcStringEnd = old + oldlen;
+       pcEscapeSafeEnd = pcStringEnd - 3;
+       
+       /* break up into encoded and non encoded segments */        
+       while (oldlen) {
+               if (cEsc == *old) {
+                       const unsigned char *sSegment = next_iso2022_segment(&old, 
+oldlen, pcEscapeSafeEnd);
+                       int iSegmentLength = old - sSegment;
+                       memcpy(new+len, sSegment, iSegmentLength);
+                       len += iSegmentLength;
+                       oldlen -= iSegmentLength;
+               } else {
+                       const unsigned char *sSegment = next_ascii_segment(&old, 
+oldlen);
+                       int iSegmentLength = old - sSegment;
+                       int iNewLen;
+                       char *sSpecialChared = php_escape_html_entities((char *) 
+sSegment, iSegmentLength, &iNewLen, all, quote_style, NULL TSRMLS_CC);
+            
+                       memcpy(new+len, sSpecialChared, iNewLen);
+                       len += iNewLen;
+                       oldlen -= iSegmentLength;
+                       efree(sSpecialChared);
+               }
+       }
+       
+       *newlen = len;
+       return new;
+}
+/* }}} */
 
 /* {{{ php_escape_html_entities
  */
@@ -739,6 +870,10 @@
        char *replaced;
        enum entity_charset charset = determine_charset(hint_charset TSRMLS_CC);
        int matches_map;
+       
+       if (cs_2022_jp == charset) {
+               return escape_html_entities_ISO2022(old, oldlen, newlen, all, 
+quote_style, hint_charset TSRMLS_CC);
+       }
 
        maxlen = 2 * oldlen;
        if (maxlen < 128)
-- 
PHP Development Mailing List <http://www.php.net/>
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to