Author: niklas
Date: Fri Dec 17 10:02:31 2010
New Revision: 1050333
URL: http://svn.apache.org/viewvc?rev=1050333&view=rev
Log:
Merged in changes to DecoderUtil and DecoderUtilTest from revision 809204
(MIME4J-138).
Modified:
james/mime4j/branches/apache-mime4j-0.6/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java
(contents, props changed)
james/mime4j/branches/apache-mime4j-0.6/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java
(contents, props changed)
Modified:
james/mime4j/branches/apache-mime4j-0.6/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java
URL:
http://svn.apache.org/viewvc/james/mime4j/branches/apache-mime4j-0.6/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java?rev=1050333&r1=1050332&r2=1050333&view=diff
==============================================================================
---
james/mime4j/branches/apache-mime4j-0.6/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java
(original)
+++
james/mime4j/branches/apache-mime4j-0.6/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java
Fri Dec 17 10:02:31 2010
@@ -1,252 +1,253 @@
-/****************************************************************
- * Licensed to the Apache Software Foundation (ASF) under one *
- * or more contributor license agreements. See the NOTICE file *
- * distributed with this work for additional information *
- * regarding copyright ownership. The ASF licenses this file *
- * to you under the Apache License, Version 2.0 (the *
- * "License"); you may not use this file except in compliance *
- * with the License. You may obtain a copy of the License at *
- * *
- * http://www.apache.org/licenses/LICENSE-2.0 *
- * *
- * Unless required by applicable law or agreed to in writing, *
- * software distributed under the License is distributed on an *
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
- * KIND, either express or implied. See the License for the *
- * specific language governing permissions and limitations *
- * under the License. *
- ****************************************************************/
-
-package org.apache.james.mime4j.codec;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.james.mime4j.util.CharsetUtil;
-
-/**
- * Static methods for decoding strings, byte arrays and encoded words.
- */
-public class DecoderUtil {
- private static Log log = LogFactory.getLog(DecoderUtil.class);
-
- /**
- * Decodes a string containing quoted-printable encoded data.
- *
- * @param s the string to decode.
- * @return the decoded bytes.
- */
- public static byte[] decodeBaseQuotedPrintable(String s) {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
-
- try {
- byte[] bytes = s.getBytes("US-ASCII");
-
- QuotedPrintableInputStream is = new
QuotedPrintableInputStream(
- new
ByteArrayInputStream(bytes));
-
- int b = 0;
- while ((b = is.read()) != -1) {
- baos.write(b);
- }
- } catch (IOException e) {
- /*
- * This should never happen!
- */
- log.error(e);
- }
-
- return baos.toByteArray();
- }
-
- /**
- * Decodes a string containing base64 encoded data.
- *
- * @param s the string to decode.
- * @return the decoded bytes.
- */
- public static byte[] decodeBase64(String s) {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
-
- try {
- byte[] bytes = s.getBytes("US-ASCII");
-
- Base64InputStream is = new Base64InputStream(
- new
ByteArrayInputStream(bytes));
-
- int b = 0;
- while ((b = is.read()) != -1) {
- baos.write(b);
- }
- } catch (IOException e) {
- /*
- * This should never happen!
- */
- log.error(e);
- }
-
- return baos.toByteArray();
- }
-
- /**
- * Decodes an encoded word encoded with the 'B' encoding (described
in
- * RFC 2047) found in a header field body.
- *
- * @param encodedWord the encoded word to decode.
- * @param charset the Java charset to use.
- * @return the decoded string.
- * @throws UnsupportedEncodingException if the given Java charset
isn't
- * supported.
- */
- public static String decodeB(String encodedWord, String charset)
- throws UnsupportedEncodingException {
-
- return new String(decodeBase64(encodedWord), charset);
- }
-
- /**
- * Decodes an encoded word encoded with the 'Q' encoding (described
in
- * RFC 2047) found in a header field body.
- *
- * @param encodedWord the encoded word to decode.
- * @param charset the Java charset to use.
- * @return the decoded string.
- * @throws UnsupportedEncodingException if the given Java charset
isn't
- * supported.
- */
- public static String decodeQ(String encodedWord, String charset)
- throws UnsupportedEncodingException {
-
- /*
- * Replace _ with =20
- */
- StringBuilder sb = new StringBuilder(128);
- for (int i = 0; i< encodedWord.length(); i++) {
- char c = encodedWord.charAt(i);
- if (c == '_') {
- sb.append("=20");
- } else {
- sb.append(c);
- }
- }
-
- return new String(decodeBaseQuotedPrintable(sb.toString()),
charset);
- }
-
- /**
- * Decodes a string containing encoded words as defined by RFC 2047.
- * Encoded words in have the form
- * =?charset?enc?Encoded word?= where enc is either 'Q' or 'q' for
- * quoted-printable and 'B' or 'b' for Base64.
- *
- * @param body the string to decode.
- * @return the decoded string.
- */
- public static String decodeEncodedWords(String body) {
- int previousEnd = 0;
- boolean previousWasEncoded = false;
-
- StringBuilder sb = new StringBuilder();
-
- while (true) {
- int begin = body.indexOf("=?", previousEnd);
- int end = begin == -1 ? -1 : body.indexOf("?=", begin + 2);
- if (end == -1) {
- if (previousEnd == 0)
- return body;
-
- sb.append(body.substring(previousEnd));
- return sb.toString();
- }
- end += 2;
-
- String sep = body.substring(previousEnd, begin);
-
- String decoded = decodeEncodedWord(body, begin, end);
- if (decoded == null) {
- sb.append(sep);
- sb.append(body.substring(begin, end));
- } else {
- if (!previousWasEncoded ||
!CharsetUtil.isWhitespace(sep)) {
- sb.append(sep);
- }
- sb.append(decoded);
- }
-
- previousEnd = end;
- previousWasEncoded = decoded != null;
- }
- }
-
- // return null on error
- private static String decodeEncodedWord(String body, int begin, int
end) {
- int qm1 = body.indexOf('?', begin + 2);
- if (qm1 == end - 2)
- return null;
-
- int qm2 = body.indexOf('?', qm1 + 1);
- if (qm2 == end - 2)
- return null;
-
- String mimeCharset = body.substring(begin + 2, qm1);
- String encoding = body.substring(qm1 + 1, qm2);
- String encodedText = body.substring(qm2 + 1, end - 2);
-
- String charset = CharsetUtil.toJavaCharset(mimeCharset);
- if (charset == null) {
- if (log.isWarnEnabled()) {
- log.warn("MIME charset '" + mimeCharset + "' in encoded
word '"
- + body.substring(begin, end) + "' doesn't have a
"
- + "corresponding Java charset");
- }
- return null;
- } else if (!CharsetUtil.isDecodingSupported(charset)) {
- if (log.isWarnEnabled()) {
- log.warn("Current JDK doesn't support decoding of
charset '"
- + charset + "' (MIME charset '" + mimeCharset
- + "' in encoded word '" + body.substring(begin,
end)
- + "')");
- }
- return null;
- }
-
- if (encodedText.length() == 0) {
- if (log.isWarnEnabled()) {
- log.warn("Missing encoded text in encoded word: '"
- + body.substring(begin, end) + "'");
- }
- return null;
- }
-
- try {
- if (encoding.equalsIgnoreCase("Q")) {
- return DecoderUtil.decodeQ(encodedText, charset);
- } else if (encoding.equalsIgnoreCase("B")) {
- return DecoderUtil.decodeB(encodedText, charset);
- } else {
- if (log.isWarnEnabled()) {
- log.warn("Warning: Unknown encoding in encoded word
'"
- + body.substring(begin, end) + "'");
- }
- return null;
- }
- } catch (UnsupportedEncodingException e) {
- // should not happen because of isDecodingSupported check
above
- if (log.isWarnEnabled()) {
- log.warn("Unsupported encoding in encoded word '"
- + body.substring(begin, end) + "'", e);
- }
- return null;
- } catch (RuntimeException e) {
- if (log.isWarnEnabled()) {
- log.warn("Could not decode encoded word '"
- + body.substring(begin, end) + "'", e);
- }
- return null;
- }
- }
-}
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ ****************************************************************/
+
+package org.apache.james.mime4j.codec;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.james.mime4j.util.CharsetUtil;
+
+/**
+ * Static methods for decoding strings, byte arrays and encoded words.
+ */
+public class DecoderUtil {
+ private static Log log = LogFactory.getLog(DecoderUtil.class);
+
+ private static final Pattern PATTERN_ENCODED_WORD = Pattern.compile(
+ "(.*?)=\\?([^\\?]+?)\\?(\\w)\\?([^\\?]+?)\\?=",
Pattern.DOTALL);
+
+ /**
+ * Decodes a string containing quoted-printable encoded data.
+ *
+ * @param s the string to decode.
+ * @return the decoded bytes.
+ */
+ public static byte[] decodeQuotedPrintable(String s) {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+
+ try {
+ byte[] bytes = s.getBytes("US-ASCII");
+
+ QuotedPrintableInputStream is = new
QuotedPrintableInputStream(
+ new
ByteArrayInputStream(bytes));
+
+ int b = 0;
+ while ((b = is.read()) != -1) {
+ baos.write(b);
+ }
+ } catch (IOException e) {
+ // This should never happen!
+ log.error(e);
+ throw new IllegalStateException(e);
+ }
+
+ return baos.toByteArray();
+ }
+
+ /**
+ * Decodes a string containing base64 encoded data.
+ *
+ * @param s the string to decode.
+ * @return the decoded bytes.
+ */
+ public static byte[] decodeBase64(String s) {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+
+ try {
+ byte[] bytes = s.getBytes("US-ASCII");
+
+ Base64InputStream is = new Base64InputStream(
+ new
ByteArrayInputStream(bytes));
+
+ int b = 0;
+ while ((b = is.read()) != -1) {
+ baos.write(b);
+ }
+ } catch (IOException e) {
+ // This should never happen!
+ log.error(e);
+ throw new IllegalStateException(e);
+ }
+
+ return baos.toByteArray();
+ }
+
+ /**
+ * Decodes an encoded text encoded with the 'B' encoding (described
in
+ * RFC 2047) found in a header field body.
+ *
+ * @param encodedText the encoded text to decode.
+ * @param charset the Java charset to use.
+ * @return the decoded string.
+ * @throws UnsupportedEncodingException if the given Java charset
isn't
+ * supported.
+ */
+ public static String decodeB(String encodedText, String charset)
+ throws UnsupportedEncodingException {
+ byte[] decodedBytes = decodeBase64(encodedText);
+ return new String(decodedBytes, charset);
+ }
+
+ /**
+ * Decodes an encoded text encoded with the 'Q' encoding (described
in
+ * RFC 2047) found in a header field body.
+ *
+ * @param encodedText the encoded text to decode.
+ * @param charset the Java charset to use.
+ * @return the decoded string.
+ * @throws UnsupportedEncodingException if the given Java charset
isn't
+ * supported.
+ */
+ public static String decodeQ(String encodedText, String charset)
+ throws UnsupportedEncodingException {
+ encodedText = replaceUnderscores(encodedText);
+
+ byte[] decodedBytes = decodeQuotedPrintable(encodedText);
+ return new String(decodedBytes, charset);
+ }
+
+ /**
+ * Decodes a string containing encoded words as defined by RFC 2047.
Encoded
+ * words have the form =?charset?enc?encoded-text?= where enc is
either 'Q'
+ * or 'q' for quoted-printable and 'B' or 'b' for base64.
+ *
+ * @param body the string to decode.
+ * @return the decoded string.
+ */
+ public static String decodeEncodedWords(String body) {
+ int tailIndex = 0;
+ boolean lastMatchValid = false;
+
+ StringBuilder sb = new StringBuilder();
+
+ for (Matcher matcher = PATTERN_ENCODED_WORD.matcher(body);
matcher.find();) {
+ String separator = matcher.group(1);
+ String mimeCharset = matcher.group(2);
+ String encoding = matcher.group(3);
+ String encodedText = matcher.group(4);
+
+ String decoded = tryDecodeEncodedWord(mimeCharset, encoding,
encodedText);
+ if (decoded == null) {
+ sb.append(matcher.group(0));
+ } else {
+ if (!lastMatchValid ||
!CharsetUtil.isWhitespace(separator)) {
+ sb.append(separator);
+ }
+ sb.append(decoded);
+ }
+
+ tailIndex = matcher.end();
+ lastMatchValid = decoded != null;
+ }
+
+ if (tailIndex == 0) {
+ return body;
+ } else {
+ sb.append(body.substring(tailIndex));
+ return sb.toString();
+ }
+ }
+
+ // return null on error
+ private static String tryDecodeEncodedWord(final String mimeCharset,
+ final String encoding, final String encodedText) {
+ String charset = CharsetUtil.toJavaCharset(mimeCharset);
+ if (charset == null) {
+ if (log.isWarnEnabled()) {
+ log.warn("MIME charset '" + mimeCharset + "' in encoded
word '"
+ + recombine(mimeCharset, encoding, encodedText)
+ "' doesn't have a "
+ + "corresponding Java charset");
+ }
+ return null;
+ } else if (!CharsetUtil.isDecodingSupported(charset)) {
+ if (log.isWarnEnabled()) {
+ log.warn("Current JDK doesn't support decoding of
charset '"
+ + charset + "' (MIME charset '" + mimeCharset
+ + "' in encoded word '" + recombine(mimeCharset,
encoding, encodedText)
+ + "')");
+ }
+ return null;
+ }
+
+ if (encodedText.length() == 0) {
+ if (log.isWarnEnabled()) {
+ log.warn("Missing encoded text in encoded word: '"
+ + recombine(mimeCharset, encoding, encodedText)
+ "'");
+ }
+ return null;
+ }
+
+ try {
+ if (encoding.equalsIgnoreCase("Q")) {
+ return DecoderUtil.decodeQ(encodedText, charset);
+ } else if (encoding.equalsIgnoreCase("B")) {
+ return DecoderUtil.decodeB(encodedText, charset);
+ } else {
+ if (log.isWarnEnabled()) {
+ log.warn("Warning: Unknown encoding in encoded word
'"
+ + recombine(mimeCharset, encoding,
encodedText) + "'");
+ }
+ return null;
+ }
+ } catch (UnsupportedEncodingException e) {
+ // should not happen because of isDecodingSupported check
above
+ if (log.isWarnEnabled()) {
+ log.warn("Unsupported encoding in encoded word '"
+ + recombine(mimeCharset, encoding, encodedText)
+ "'", e);
+ }
+ return null;
+ } catch (RuntimeException e) {
+ if (log.isWarnEnabled()) {
+ log.warn("Could not decode encoded word '"
+ + recombine(mimeCharset, encoding, encodedText)
+ "'", e);
+ }
+ return null;
+ }
+ }
+
+ private static String recombine(final String mimeCharset,
+ final String encoding, final String encodedText) {
+ return "=?" + mimeCharset + "?" + encoding + "?" + encodedText +
"?=";
+ }
+
+ // Replace _ with =20
+ private static String replaceUnderscores(String str) {
+ // probably faster than String#replace(CharSequence,
CharSequence)
+
+ StringBuilder sb = new StringBuilder(128);
+
+ for (int i = 0; i< str.length(); i++) {
+ char c = str.charAt(i);
+ if (c == '_') {
+ sb.append("=20");
+ } else {
+ sb.append(c);
+ }
+ }
+
+ return sb.toString();
+ }
+}
Propchange:
james/mime4j/branches/apache-mime4j-0.6/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Fri Dec 17 10:02:31 2010
@@ -0,0 +1 @@
+/james/mime4j/trunk/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java:809204
Modified:
james/mime4j/branches/apache-mime4j-0.6/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java
URL:
http://svn.apache.org/viewvc/james/mime4j/branches/apache-mime4j-0.6/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java?rev=1050333&r1=1050332&r2=1050333&view=diff
==============================================================================
---
james/mime4j/branches/apache-mime4j-0.6/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java
(original)
+++
james/mime4j/branches/apache-mime4j-0.6/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java
Fri Dec 17 10:02:31 2010
@@ -1,105 +1,127 @@
-/****************************************************************
- * Licensed to the Apache Software Foundation (ASF) under one *
- * or more contributor license agreements. See the NOTICE file *
- * distributed with this work for additional information *
- * regarding copyright ownership. The ASF licenses this file *
- * to you under the Apache License, Version 2.0 (the *
- * "License"); you may not use this file except in compliance *
- * with the License. You may obtain a copy of the License at *
- * *
- * http://www.apache.org/licenses/LICENSE-2.0 *
- * *
- * Unless required by applicable law or agreed to in writing, *
- * software distributed under the License is distributed on an *
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
- * KIND, either express or implied. See the License for the *
- * specific language governing permissions and limitations *
- * under the License. *
- ****************************************************************/
-
-package org.apache.james.mime4j.codec;
-
-import java.io.UnsupportedEncodingException;
-
-import junit.framework.TestCase;
-
-import org.apache.log4j.BasicConfigurator;
-
-public class DecoderUtilTest extends TestCase {
-
- @Override
- public void setUp() {
- BasicConfigurator.resetConfiguration();
- BasicConfigurator.configure();
- }
- /*
- public void testDecodeEncodedWords() {
- String s =
"=?ISO-2022-JP?B?GyRCTCQbKEobJEI+NRsoShskQkJ6GyhKGyRCOS0bKEo=?="
- +
"=?ISO-2022-JP?B?GyRCOXAbKEobJEIiKBsoShskQiU1GyhKGyRCJSQbKEo=?="
- +
"=?ISO-2022-JP?B?GyRCJUkbKEobJEIlUxsoShskQiU4GyhKGyRCJU0bKEo=?="
- +
"=?ISO-2022-JP?B?GyRCJTkbKEobJEIkThsoShskQjdoGyhKGyRCRGobKEo=?="
- +
"=?ISO-2022-JP?B?GyRCSEcbKEobJEIkRxsoShskQiQ5GyhKGyRCISobKEo=?=";
-
- s = DecoderUtil.decodeEncodedWords(s);
- System.out.println(s);
- }*/
-
- public void testDecodeB() throws UnsupportedEncodingException {
- String s = DecoderUtil.decodeB("VGhpcyBpcyB0aGUgcGxhaW4gd"
- + "GV4dCBtZXNzYWdlIQ==", "ISO8859-1");
- assertEquals("This is the plain text message!", s);
- }
-
-
- public void testDecodeQ() throws UnsupportedEncodingException {
- String s = DecoderUtil.decodeQ("=e1_=e2=09=E3_=E4_",
- "ISO8859-1");
- assertEquals("\u00e1 \u00e2\t\u00e3 \u00e4 ", s);
- }
-
- public void testDecodeEncodedWords() {
- assertEquals("", DecoderUtil.decodeEncodedWords(""));
- assertEquals("Yada yada", DecoderUtil.decodeEncodedWords("Yada
yada"));
- assertEquals(" \u00e1\u00e2\u00e3\t\u00e4",
-
DecoderUtil.decodeEncodedWords("=?iso-8859-1?Q?_=20=e1=e2=E3=09=E4?="));
- assertEquals("Word 1 ' \u00e2\u00e3\t\u00e4'. Word 2 '
\u00e2\u00e3\t\u00e4'",
- DecoderUtil.decodeEncodedWords("Word 1
'=?iso-8859-1?Q?_=20=e2=E3=09=E4?="
- + "'. Word 2
'=?iso-8859-1?q?_=20=e2=E3=09=E4?='"));
- assertEquals("=?iso-8859-YADA?Q?_=20=t1=e2=E3=09=E4?=",
-
DecoderUtil.decodeEncodedWords("=?iso-8859-YADA?Q?_=20=t1=e2=E3=09=E4?="));
- assertEquals("A short text",
-
DecoderUtil.decodeEncodedWords("=?US-ASCII?B?QSBzaG9ydCB0ZXh0?="));
- assertEquals("A short text again!",
-
DecoderUtil.decodeEncodedWords("=?US-ASCII?b?QSBzaG9ydCB0ZXh0IGFnYWluIQ==?="));
-
- // invalid encoded words should be returned unchanged
- assertEquals("=?iso8859-1?Q?=",
DecoderUtil.decodeEncodedWords("=?iso8859-1?Q?="));
- assertEquals("=?iso8859-1?b?=",
DecoderUtil.decodeEncodedWords("=?iso8859-1?b?="));
- assertEquals("=?ISO-8859-1?Q?",
DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?"));
- assertEquals("=?ISO-8859-1?R?abc?=",
DecoderUtil.decodeEncodedWords("=?ISO-8859-1?R?abc?="));
-
- // encoded-text requires at least one character according to rfc
2047
- assertEquals("=?ISO-8859-1?Q??=",
DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q??="));
- assertEquals("=?ISO-8859-1?B??=",
DecoderUtil.decodeEncodedWords("=?ISO-8859-1?B??="));
-
- // white space between encoded words should be removed
(MIME4J-104)
- assertEquals("a",
DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?="));
- assertEquals("a b",
DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?= b"));
- assertEquals("ab",
DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?="));
- assertEquals("ab",
DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?="));
- assertEquals("ab",
DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?=\r\n
=?ISO-8859-1?Q?b?="));
- assertEquals("a b",
DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a_b?="));
- assertEquals("a b",
DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?="));
-
- // non white space between encoded words should be retained
- assertEquals("a b c",
DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?= b =?ISO-8859-1?Q?c?="));
-
- // text before and after encoded words should be retained
- assertEquals(" a b c ", DecoderUtil.decodeEncodedWords("
=?ISO-8859-1?Q?a?= b =?ISO-8859-1?Q?c?= "));
- assertEquals("! a b c !", DecoderUtil.decodeEncodedWords("!
=?ISO-8859-1?Q?a?= b =?ISO-8859-1?Q?c?= !"));
-
- // Bug detected on June 7, 2005. Decoding the following string
caused
- // OutOfMemoryError.
- assertEquals("=3?!!\\=?\"!g6P\"!Xp:\"!",
DecoderUtil.decodeEncodedWords("=3?!!\\=?\"!g6P\"!Xp:\"!"));
- }
-}
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ ****************************************************************/
+
+package org.apache.james.mime4j.codec;
+
+import java.io.UnsupportedEncodingException;
+
+import junit.framework.TestCase;
+
+import org.apache.log4j.BasicConfigurator;
+
+public class DecoderUtilTest extends TestCase {
+
+ @Override
+ public void setUp() {
+ BasicConfigurator.resetConfiguration();
+ BasicConfigurator.configure();
+ }
+
+ public void testDecodeB() throws UnsupportedEncodingException {
+ String s = DecoderUtil.decodeB("VGhpcyBpcyB0aGUgcGxhaW4gd"
+ + "GV4dCBtZXNzYWdlIQ==", "ISO8859-1");
+ assertEquals("This is the plain text message!", s);
+ }
+
+ public void testDecodeQ() throws UnsupportedEncodingException {
+ String s = DecoderUtil.decodeQ("=e1_=e2=09=E3_=E4_",
"ISO8859-1");
+ assertEquals("\u00e1 \u00e2\t\u00e3 \u00e4 ", s);
+ }
+
+ public void testNonEncodedWordsAreIgnored() {
+ assertEquals("", DecoderUtil.decodeEncodedWords(""));
+ assertEquals("Yada yada", DecoderUtil.decodeEncodedWords("Yada
yada"));
+ }
+
+ public void testDecodeSomeEncodedWords() {
+ assertEquals(" \u00e1\u00e2\u00e3\t\u00e4",
+
DecoderUtil.decodeEncodedWords("=?iso-8859-1?Q?_=20=e1=e2=E3=09=E4?="));
+ assertEquals("Word 1 ' \u00e2\u00e3\t\u00e4'. Word 2 '
\u00e2\u00e3\t\u00e4'",
+ DecoderUtil.decodeEncodedWords("Word 1
'=?iso-8859-1?Q?_=20=e2=E3=09=E4?="
+ + "'. Word 2
'=?iso-8859-1?q?_=20=e2=E3=09=E4?='"));
+ assertEquals("=?iso-8859-YADA?Q?_=20=t1=e2=E3=09=E4?=",
+
DecoderUtil.decodeEncodedWords("=?iso-8859-YADA?Q?_=20=t1=e2=E3=09=E4?="));
+ assertEquals("A short text",
+
DecoderUtil.decodeEncodedWords("=?US-ASCII?B?QSBzaG9ydCB0ZXh0?="));
+ assertEquals("A short text again!",
+
DecoderUtil.decodeEncodedWords("=?US-ASCII?b?QSBzaG9ydCB0ZXh0IGFnYWluIQ==?="));
+ }
+
+ public void testDecodeJapaneseEncodedWords() {
+ String enc =
"=?ISO-2022-JP?B?GyRCTCQbKEobJEI+NRsoShskQkJ6GyhKGyRCOS0bKEo=?="
+ +
"=?ISO-2022-JP?B?GyRCOXAbKEobJEIiKBsoShskQiU1GyhKGyRCJSQbKEo=?="
+ +
"=?ISO-2022-JP?B?GyRCJUkbKEobJEIlUxsoShskQiU4GyhKGyRCJU0bKEo=?="
+ +
"=?ISO-2022-JP?B?GyRCJTkbKEobJEIkThsoShskQjdoGyhKGyRCRGobKEo=?="
+ +
"=?ISO-2022-JP?B?GyRCSEcbKEobJEIkRxsoShskQiQ5GyhKGyRCISobKEo=?=";
+
+ String dec = DecoderUtil.decodeEncodedWords(enc);
+
assertEquals("\u672A\u627F\u8AFE\u5E83\u544A\u203B\u30B5\u30A4\u30C9\u30D3"
+ +
"\u30B8\u30CD\u30B9\u306E\u6C7A\u5B9A\u7248\u3067\u3059\uFF01", dec);
+ }
+
+ public void testInvalidEncodedWordsAreIgnored() {
+ assertEquals("=?iso8859-1?Q?=",
DecoderUtil.decodeEncodedWords("=?iso8859-1?Q?="));
+ assertEquals("=?iso8859-1?b?=",
DecoderUtil.decodeEncodedWords("=?iso8859-1?b?="));
+ assertEquals("=?ISO-8859-1?Q?",
DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?"));
+ assertEquals("=?ISO-8859-1?R?abc?=",
DecoderUtil.decodeEncodedWords("=?ISO-8859-1?R?abc?="));
+ assertEquals("test =?ISO-8859-1?R?abc?=",
DecoderUtil.decodeEncodedWords("test =?ISO-8859-1?R?abc?="));
+ }
+
+ public void testEmptyEncodedTextIsIgnored() {
+ // encoded-text requires at least one character according to rfc
2047
+ assertEquals("=?ISO-8859-1?Q??=",
DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q??="));
+ assertEquals("=?ISO-8859-1?B??=",
DecoderUtil.decodeEncodedWords("=?ISO-8859-1?B??="));
+ }
+
+ // see MIME4J-104
+ public void testWhiteSpaceBetweenEncodedWordsGetsRemoved() {
+ assertEquals("a",
DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?="));
+ assertEquals("a b",
DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?= b"));
+ assertEquals("ab",
DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?="));
+ assertEquals("ab",
DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?="));
+ assertEquals("ab",
DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?=\r\n
=?ISO-8859-1?Q?b?="));
+ assertEquals("a b",
DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a_b?="));
+ assertEquals("a b",
DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?="));
+ }
+
+ // see MIME4J-138
+ public void testEncodedTextMayStartWithAnEqualsSign() {
+ assertEquals(" foo",
DecoderUtil.decodeEncodedWords("=?utf-8?Q?=20foo?="));
+ assertEquals("Re: How to place a view at the bottom with a 100%
width",
+
DecoderUtil.decodeEncodedWords("=?utf-8?Q?Re:=20How=20to=20place=20a=20view=20at=20the=20bottom?=
"
+ + "=?utf-8?Q?=20with=20a=20100%=20width?="));
+ assertEquals("Test \u00fc and more",
+ DecoderUtil.decodeEncodedWords("Test =?ISO-8859-1?Q?=FC_?=
=?ISO-8859-1?Q?and_more?="));
+ }
+
+ public void testNonWhiteSpaceBetweenEncodedWordsIsRetained() {
+ assertEquals("a b c",
DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?= b =?ISO-8859-1?Q?c?="));
+ assertEquals("a\rb\nc",
DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?=\rb\n=?ISO-8859-1?Q?c?="));
+ }
+
+ public void testTextBeforeAndAfterEncodedWordIsRetained() {
+ assertEquals(" a b c ", DecoderUtil.decodeEncodedWords("
=?ISO-8859-1?Q?a?= b =?ISO-8859-1?Q?c?= "));
+ assertEquals("! a b c !", DecoderUtil.decodeEncodedWords("!
=?ISO-8859-1?Q?a?= b =?ISO-8859-1?Q?c?= !"));
+ }
+
+ public void testFunnyInputDoesNotRaiseOutOfMemoryError() {
+ // Bug detected on June 7, 2005. Decoding the following string
caused OutOfMemoryError.
+ assertEquals("=3?!!\\=?\"!g6P\"!Xp:\"!",
DecoderUtil.decodeEncodedWords("=3?!!\\=?\"!g6P\"!Xp:\"!"));
+ }
+}
Propchange:
james/mime4j/branches/apache-mime4j-0.6/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Fri Dec 17 10:02:31 2010
@@ -0,0 +1 @@
+/james/mime4j/trunk/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java:809204