Author: olegk
Date: Sun Feb 5 19:28:21 2012
New Revision: 1240783
URL: http://svn.apache.org/viewvc?rev=1240783&view=rev
Log:
MIME4J-211: Add an optional fallback charset argument to
DecoderUtil.decodeEncodedWords
Contributed by TzeKai Lee <chikei at gmail.com>
Modified:
james/mime4j/trunk/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java
james/mime4j/trunk/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java
Modified:
james/mime4j/trunk/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java
URL:
http://svn.apache.org/viewvc/james/mime4j/trunk/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java?rev=1240783&r1=1240782&r2=1240783&view=diff
==============================================================================
---
james/mime4j/trunk/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java
(original)
+++
james/mime4j/trunk/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java
Sun Feb 5 19:28:21 2012
@@ -142,6 +142,38 @@ public class DecoderUtil {
* @throws IllegalArgumentException only if the DecodeMonitor strategy
throws it (Strict parsing)
*/
public static String decodeEncodedWords(String body, DecodeMonitor
monitor) throws IllegalArgumentException {
+ return decodeEncodedWords(body, monitor, null);
+ }
+
+ /**
+ * Decodes a string containing encoded words as defined by RFC 2047.
Encoded
+ * words have the form =?charset?enc?encoded-text?= where enc is either 'Q'
+ * or 'q' for quoted-printable and 'B' or 'b' for base64. Using fallback
+ * charset if charset in encoded words is invalid.
+ *
+ * @param body the string to decode
+ * @param fallback the fallback Charset to be used.
+ * @return the decoded string.
+ * @throws IllegalArgumentException only if the DecodeMonitor strategy
throws it (Strict parsing)
+ */
+ public static String decodeEncodedWords(String body, Charset fallback)
throws IllegalArgumentException {
+ return decodeEncodedWords(body, null, fallback);
+ }
+
+ /**
+ * Decodes a string containing encoded words as defined by RFC 2047.
Encoded
+ * words have the form =?charset?enc?encoded-text?= where enc is either 'Q'
+ * or 'q' for quoted-printable and 'B' or 'b' for base64. Using fallback
+ * charset if charset in encoded words is invalid.
+ *
+ * @param body the string to decode
+ * @param monitor the DecodeMonitor to be used.
+ * @param fallback the fallback Charset to be used.
+ * @return the decoded string.
+ * @throws IllegalArgumentException only if the DecodeMonitor strategy
throws it (Strict parsing)
+ */
+ public static String decodeEncodedWords(String body, DecodeMonitor
monitor, Charset fallback)
+ throws IllegalArgumentException {
int tailIndex = 0;
boolean lastMatchValid = false;
@@ -154,7 +186,7 @@ public class DecoderUtil {
String encodedText = matcher.group(4);
String decoded = null;
- decoded = tryDecodeEncodedWord(mimeCharset, encoding, encodedText,
monitor);
+ decoded = tryDecodeEncodedWord(mimeCharset, encoding, encodedText,
monitor, fallback);
if (decoded == null) {
sb.append(matcher.group(0));
} else {
@@ -178,12 +210,16 @@ public class DecoderUtil {
// return null on error
private static String tryDecodeEncodedWord(final String mimeCharset,
- final String encoding, final String encodedText, final
DecodeMonitor monitor) {
+ final String encoding, final String encodedText, final
DecodeMonitor monitor, final Charset fallback) {
Charset charset = CharsetUtil.lookup(mimeCharset);
if (charset == null) {
- monitor(monitor, mimeCharset, encoding, encodedText, "leaving word
encoded",
- "Mime charser '", mimeCharset, "' doesn't have a
corresponding Java charset");
- return null;
+ if(fallback == null) {
+ monitor(monitor, mimeCharset, encoding, encodedText, "leaving
word encoded",
+ "Mime charser '", mimeCharset, "' doesn't have a
corresponding Java charset");
+ return null;
+ } else {
+ charset = fallback;
+ }
}
if (encodedText.length() == 0) {
Modified:
james/mime4j/trunk/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java
URL:
http://svn.apache.org/viewvc/james/mime4j/trunk/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java?rev=1240783&r1=1240782&r2=1240783&view=diff
==============================================================================
---
james/mime4j/trunk/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java
(original)
+++
james/mime4j/trunk/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java
Sun Feb 5 19:28:21 2012
@@ -20,6 +20,7 @@
package org.apache.james.mime4j.codec;
import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
import junit.framework.TestCase;
@@ -67,6 +68,18 @@ public class DecoderUtilTest extends Tes
+
"\u30B8\u30CD\u30B9\u306E\u6C7A\u5B9A\u7248\u3067\u3059\uFF01", dec);
}
+ public void testDecodeJapaneseEncodedWordsWithFallback(){
+ String enc =
"=?random?B?GyRCTCQbKEobJEI+NRsoShskQkJ6GyhKGyRCOS0bKEo=?= "
+ + "=?garbage?B?GyRCOXAbKEobJEIiKBsoShskQiU1GyhKGyRCJSQbKEo=?= "
+ + "=?charset?B?GyRCJUkbKEobJEIlUxsoShskQiU4GyhKGyRCJU0bKEo=?= "
+ + "=?name?B?GyRCJTkbKEobJEIkThsoShskQjdoGyhKGyRCRGobKEo=?= "
+ + "=?trash?B?GyRCSEcbKEobJEIkRxsoShskQiQ5GyhKGyRCISobKEo=?=";
+
+ String dec = DecoderUtil.decodeEncodedWords(enc,
Charset.forName("ISO-2022-JP"));
+
assertEquals("\u672A\u627F\u8AFE\u5E83\u544A\u203B\u30B5\u30A4\u30C9\u30D3"
+ +
"\u30B8\u30CD\u30B9\u306E\u6C7A\u5B9A\u7248\u3067\u3059\uFF01", dec);
+ }
+
public void testInvalidEncodedWordsAreIgnored() {
assertEquals("=?iso8859-1?Q?=",
DecoderUtil.decodeEncodedWords("=?iso8859-1?Q?="));
assertEquals("=?iso8859-1?b?=",
DecoderUtil.decodeEncodedWords("=?iso8859-1?b?="));