This is an automated email from the ASF dual-hosted git repository. btellier pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/james-mime4j.git
commit 8122e4b901274b2e377f2642b4874951a8f6fea1 Author: Shamil Vakhitov <sha...@bgerp.org> AuthorDate: Wed Aug 18 20:25:15 2021 +0500 RawField.getBody() in UTF-8 --- .../org/apache/james/mime4j/stream/RawField.java | 3 ++- .../org/apache/james/mime4j/util/ContentUtil.java | 31 ++++++++++++++++++++++ .../james/mime4j/field/UnstructuredFieldTest.java | 14 ++++++++++ .../james/mime4j/message/HeaderImplTest.java | 9 +++++-- 4 files changed, 54 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/org/apache/james/mime4j/stream/RawField.java b/core/src/main/java/org/apache/james/mime4j/stream/RawField.java index 8bcaa773..e64c6a73 100644 --- a/core/src/main/java/org/apache/james/mime4j/stream/RawField.java +++ b/core/src/main/java/org/apache/james/mime4j/stream/RawField.java @@ -19,6 +19,7 @@ package org.apache.james.mime4j.stream; +import java.nio.charset.StandardCharsets; import java.util.Locale; import org.apache.james.mime4j.util.ByteSequence; @@ -83,7 +84,7 @@ public final class RawField implements Field { if (len > off + 1 && (CharsetUtil.isWhitespace((char) (raw.byteAt(off) & 0xff)))) { off++; } - return MimeUtil.unfold(ContentUtil.decode(raw, off, len - off)); + return MimeUtil.unfold(ContentUtil.decode(raw, off, len - off, StandardCharsets.UTF_8)); } return null; } diff --git a/core/src/main/java/org/apache/james/mime4j/util/ContentUtil.java b/core/src/main/java/org/apache/james/mime4j/util/ContentUtil.java index d6d2d96e..1e078120 100644 --- a/core/src/main/java/org/apache/james/mime4j/util/ContentUtil.java +++ b/core/src/main/java/org/apache/james/mime4j/util/ContentUtil.java @@ -223,6 +223,37 @@ public class ContentUtil { return new String(underlying); } + /** + * Decodes a sub-sequence of the specified sequence of bytes into a string + * using the US-ASCII charset with falling back to {@link #decode(Charset, ByteSequence, int, int)} + * on a first non US-ASCII character. + * + * @param byteSequence + * sequence of bytes to decode. + * @param offset + * offset into the byte sequence. + * @param length + * number of bytes. + * @param charset + * fallback charset. + * @return decoded string. + */ + public static String decode(ByteSequence byteSequence, int offset, int length, Charset charset) { + if (byteSequence == null) { + return null; + } + + StringBuilder buf = new StringBuilder(length); + for (int i = offset; i < offset + length; i++) { + char ch = (char) (byteSequence.byteAt(i) & 0xff); + if (!CharsetUtil.isASCII(ch)) { + return decode(charset, byteSequence, offset, length); + } + buf.append(ch); + } + return buf.toString(); + } + /** * Decodes a sub-sequence of the specified sequence of bytes into a string * using the specified charset. diff --git a/dom/src/test/java/org/apache/james/mime4j/field/UnstructuredFieldTest.java b/dom/src/test/java/org/apache/james/mime4j/field/UnstructuredFieldTest.java index 96986c9b..23f3b0a9 100644 --- a/dom/src/test/java/org/apache/james/mime4j/field/UnstructuredFieldTest.java +++ b/dom/src/test/java/org/apache/james/mime4j/field/UnstructuredFieldTest.java @@ -19,7 +19,11 @@ package org.apache.james.mime4j.field; +import java.nio.charset.StandardCharsets; + +import org.apache.james.mime4j.codec.DecodeMonitor; import org.apache.james.mime4j.dom.field.UnstructuredField; +import org.apache.james.mime4j.util.ByteArrayBuffer; import org.junit.Assert; import org.junit.Test; @@ -39,4 +43,14 @@ public class UnstructuredFieldTest { Assert.assertEquals("Testing value without a leading ' '", "yada", f.getValue()); } + @Test + public void testGetBodyUtf8() throws Exception { + UnstructuredField f; + + byte[] data = "Subject: Счет для ООО \"СТАНЦИЯ ВИРТУАЛЬНАЯ\" от ООО \"Цифровые системы\"".getBytes(StandardCharsets.UTF_8); + + f = (UnstructuredField) DefaultFieldParser.parse(new ByteArrayBuffer(data, true), DecodeMonitor.SILENT); + Assert.assertEquals("Testing UTF8 value 1", "Счет для ООО \"СТАНЦИЯ ВИРТУАЛЬНАЯ\" от ООО \"Цифровые системы\"", f.getValue()); + } + } diff --git a/dom/src/test/java/org/apache/james/mime4j/message/HeaderImplTest.java b/dom/src/test/java/org/apache/james/mime4j/message/HeaderImplTest.java index 0ed76662..5362ecc6 100644 --- a/dom/src/test/java/org/apache/james/mime4j/message/HeaderImplTest.java +++ b/dom/src/test/java/org/apache/james/mime4j/message/HeaderImplTest.java @@ -19,7 +19,10 @@ package org.apache.james.mime4j.message; +import java.nio.charset.StandardCharsets; + import org.apache.commons.io.output.ByteArrayOutputStream; +import org.apache.james.mime4j.codec.DecodeMonitor; import org.apache.james.mime4j.dom.Header; import org.apache.james.mime4j.field.DefaultFieldParser; import org.apache.james.mime4j.message.DefaultMessageWriter; @@ -55,7 +58,9 @@ public class HeaderImplTest { public void testWriteSpecialCharacters() throws Exception { String hello = SWISS_GERMAN_HELLO; Header header = new HeaderImpl(); - header.addField(DefaultFieldParser.parse("Hello: " + hello)); + byte[] utf8bytes = ("Hello: " + hello).getBytes(StandardCharsets.UTF_8); + ByteArrayBuffer raw = new ByteArrayBuffer(utf8bytes, true); + header.addField(DefaultFieldParser.parse(raw, DecodeMonitor.SILENT)); Field field = header.getField("Hello"); Assert.assertNotNull(field); @@ -70,7 +75,7 @@ public class HeaderImplTest { byte[] b = outstream.toByteArray(); ByteArrayBuffer buf = new ByteArrayBuffer(b.length); buf.append(b, 0, b.length); - String s = ContentUtil.decode(buf); + String s = ContentUtil.decode(StandardCharsets.UTF_8, buf); Assert.assertEquals("Hello: " + SWISS_GERMAN_HELLO + "\r\n\r\n", s); } --------------------------------------------------------------------- To unsubscribe, e-mail: server-dev-unsubscr...@james.apache.org For additional commands, e-mail: server-dev-h...@james.apache.org