moriyoshi Tue Dec 23 21:36:22 2003 EDT Modified files: /php-src/ext/iconv iconv.c Log: - Supply some comments. Hope those help when you have to debug this dead beef. - Fix iconv_mime_decode() to correctly handle "malformed" headers :) - Fix iconv_mime_decode() bug, that the result is mangled when ICONV_MIME_CONTINUE_ON_ERROR is enabled and the function encounters an unconvertable encoded chunk. Index: php-src/ext/iconv/iconv.c diff -u php-src/ext/iconv/iconv.c:1.112 php-src/ext/iconv/iconv.c:1.113 --- php-src/ext/iconv/iconv.c:1.112 Tue Dec 23 12:10:54 2003 +++ php-src/ext/iconv/iconv.c Tue Dec 23 21:36:21 2003 @@ -18,7 +18,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: iconv.c,v 1.112 2003/12/23 17:10:54 moriyoshi Exp $ */ +/* $Id: iconv.c,v 1.113 2003/12/24 02:36:21 moriyoshi Exp $ */ #ifdef HAVE_CONFIG_H #include "config.h" @@ -1271,9 +1271,9 @@ int eos = 0; switch (scan_stat) { - case 0: + case 0: /* expecting any character */ switch (*p1) { - case '\r': + case '\r': /* part of an EOL sequence? */ scan_stat = 7; break; @@ -1281,17 +1281,17 @@ scan_stat = 8; break; - case '=': + case '=': /* first letter of an encoded chunk */ encoded_word = p1; scan_stat = 1; break; - case ' ': case '\t': + case ' ': case '\t': /* a chunk of whitespaces */ spaces = p1; scan_stat = 11; break; - default: + default: /* first letter of a non-encoded word */ _php_iconv_appendc(pretval, *p1, cd_pl); encoded_word = NULL; if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) { @@ -1301,7 +1301,7 @@ } break; - case 1: + case 1: /* expecting a delimiter */ if (*p1 != '?') { err = _php_iconv_appendl(pretval, encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl); if (err != PHP_ICONV_ERR_SUCCESS) { @@ -1319,13 +1319,13 @@ scan_stat = 2; break; - case 2: /* charset name */ + case 2: /* expecting a charset name */ switch (*p1) { - case '?': + case '?': /* normal delimiter: encoding scheme follows */ scan_stat = 3; break; - case '*': + case '*': /* new style delimiter: locale id follows */ scan_stat = 10; break; } @@ -1396,7 +1396,7 @@ } break; - case 3: + case 3: /* expecting a encoding scheme specifier */ switch (*p1) { case 'B': enc_scheme = PHP_ICONV_ENC_SCHEME_BASE64; @@ -1428,9 +1428,10 @@ } break; - case 4: + case 4: /* expecting a delimiter */ if (*p1 != '?') { if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) { + /* pass the entire chunk through the converter */ err = _php_iconv_appendl(pretval, encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl); if (err != PHP_ICONV_ERR_SUCCESS) { goto out; @@ -1451,14 +1452,14 @@ scan_stat = 5; break; - case 5: + case 5: /* expecting an encoded portion */ if (*p1 == '?') { encoded_text_len = (size_t)(p1 - encoded_text); scan_stat = 6; } break; - case 7: + case 7: /* expecting a "\n" character */ if (*p1 == '\n') { scan_stat = 8; } else { @@ -1469,7 +1470,8 @@ } break; - case 8: + case 8: /* checking whether the following line is part of a + folded header */ if (*p1 != ' ' && *p1 != '\t') { --p1; str_left = 1; /* quit_loop */ @@ -1482,9 +1484,10 @@ scan_stat = 11; break; - case 6: + case 6: /* expecting a End-Of-Chunk character "=" */ if (*p1 != '=') { if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) { + /* pass the entire chunk through the converter */ err = _php_iconv_appendl(pretval, encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl); if (err != PHP_ICONV_ERR_SUCCESS) { goto out; @@ -1508,11 +1511,20 @@ break; } - case 9: + case 9: /* choice point, seeing what to do next.*/ switch (*p1) { default: + /* Handle non-RFC-compliant formats + * + * RFC2047 requires the character that comes right + * after an encoded word (chunk) to be a whitespace, + * while there are lots of broken implementations that + * generate such malformed headers that don't fulfill + * that requirement. + */ if (!eos) { if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) { + /* pass the entire chunk through the converter */ err = _php_iconv_appendl(pretval, encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl); if (err != PHP_ICONV_ERR_SUCCESS) { goto out; @@ -1544,6 +1556,7 @@ if (decoded_text == NULL) { if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) { + /* pass the entire chunk through the converter */ err = _php_iconv_appendl(pretval, encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl); if (err != PHP_ICONV_ERR_SUCCESS) { goto out; @@ -1566,29 +1579,24 @@ if (err != PHP_ICONV_ERR_SUCCESS) { if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) { + /* pass the entire chunk through the converter */ err = _php_iconv_appendl(pretval, encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl); if (err != PHP_ICONV_ERR_SUCCESS) { goto out; } encoded_word = NULL; - if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) { - scan_stat = 12; - } else { - scan_stat = 0; - } - break; } else { goto out; } } - if (eos) { + if (eos) { /* reached end-of-string. done. */ scan_stat = 0; break; } switch (*p1) { - case '\r': + case '\r': /* part of an EOL sequence? */ scan_stat = 7; break; @@ -1596,16 +1604,16 @@ scan_stat = 8; break; - case '=': + case '=': /* first letter of an encoded chunk */ scan_stat = 1; break; - case ' ': case '\t': + case ' ': case '\t': /* medial whitespaces */ spaces = p1; scan_stat = 11; break; - default: + default: /* first letter of a non-encoded word */ _php_iconv_appendc(pretval, *p1, cd_pl); scan_stat = 12; break; @@ -1614,15 +1622,15 @@ } break; - case 10: /* language spec */ + case 10: /* expects a language specifier. dismiss it for now */ if (*p1 == '?') { scan_stat = 3; } break; - case 11: + case 11: /* expecting a chunk of whitespaces */ switch (*p1) { - case '\r': + case '\r': /* part of an EOL sequence? */ scan_stat = 7; break; @@ -1630,7 +1638,7 @@ scan_stat = 8; break; - case '=': + case '=': /* first letter of an encoded chunk */ if (spaces != NULL && encoded_word == NULL) { _php_iconv_appendl(pretval, spaces, (size_t)(p1 - spaces), cd_pl); spaces = NULL; @@ -1642,7 +1650,7 @@ case ' ': case '\t': break; - default: /* beginning of a word delimited by white spaces */ + default: /* first letter of a non-encoded word */ if (spaces != NULL) { _php_iconv_appendl(pretval, spaces, (size_t)(p1 - spaces), cd_pl); spaces = NULL; @@ -1658,9 +1666,9 @@ } break; - case 12: + case 12: /* expecting a non-encoded word */ switch (*p1) { - case '\r': + case '\r': /* part of an EOL sequence? */ scan_stat = 7; break; @@ -1673,6 +1681,14 @@ scan_stat = 11; break; + case '=': /* first letter of an encoded chunk */ + if (!(mode & PHP_ICONV_MIME_DECODE_STRICT)) { + encoded_word = p1; + scan_stat = 1; + break; + } + /* break is omitted intentionally */ + default: _php_iconv_appendc(pretval, *p1, cd_pl); break; @@ -1680,7 +1696,6 @@ break; } } - switch (scan_stat) { case 0: case 8: case 11: case 12: break;
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php