Author: ggregory
Date: Mon Mar 19 20:21:26 2012
New Revision: 1302639
URL: http://svn.apache.org/viewvc?rev=1302639&view=rev
Log:
CODEC-121 will be for 2.0.
Modified:
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/QuotedPrintableCodec.java
commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/QuotedPrintableCodecTest.java
Modified:
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/QuotedPrintableCodec.java
URL:
http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/QuotedPrintableCodec.java?rev=1302639&r1=1302638&r2=1302639&view=diff
==============================================================================
---
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/QuotedPrintableCodec.java
(original)
+++
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/QuotedPrintableCodec.java
Mon Mar 19 20:21:26 2012
@@ -42,10 +42,20 @@ import org.apache.commons.codec.binary.S
* to ensure the integrity of the data should the message pass through a
character- translating, and/or line-wrapping
* gateway.
* </p>
- *
+ *
+ * <p>
+ * Note:
+ * </p>
+ * <p>
+ * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet
because the complete quoted-printable spec
+ * does not lend itself well into the byte[] oriented codec framework.
Complete the codec once the streamable codec
+ * framework is ready. The motivation behind providing the codec in a partial
form is that it can already come in handy
+ * for those applications that do not require quoted-printable line formatting
(rules #3, #4, #5), for instance Q codec.
+ * </p>
+ *
* @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME
(Multipurpose Internet Mail Extensions) Part One:
* Mechanisms for Specifying and Describing the Format of Internet
Message Bodies </a>
- *
+ *
* @author Apache Software Foundation
* @since 1.3
* @version $Id$
@@ -66,14 +76,6 @@ public class QuotedPrintableCodec implem
private static final byte TAB = 9;
private static final byte SPACE = 32;
-
- private static final byte CR = 13;
-
- private static final byte LF = 10;
-
- /** Safe line length for quoted printable encoded text. */
- private static final int SAFE_LENGTH = 73;
-
// Static initializer for printable chars collection
static {
// alpha characters
@@ -109,78 +111,26 @@ public class QuotedPrintableCodec implem
* Encodes byte into its quoted-printable representation.
*
* @param b
- * byte to encode
+ * byte to encode
* @param buffer
- * the buffer to write to
- * @return The number of bytes written to the <code>buffer</code>
+ * the buffer to write to
*/
- private static final int encodeQuotedPrintable(int b,
ByteArrayOutputStream buffer) {
+ private static final void encodeQuotedPrintable(int b,
ByteArrayOutputStream buffer) {
buffer.write(ESCAPE_CHAR);
char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF,
16));
char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
buffer.write(hex1);
buffer.write(hex2);
- return 3;
- }
-
- /**
- * Return the byte at position <code>index</code> of the byte array and
- * make sure it is unsigned.
- *
- * @param index
- * position in the array
- * @param bytes
- * the byte array
- * @return the unsigned octet at position <code>index</code> from the array
- */
- private static int getUnsignedOctet(final int index, final byte[] bytes) {
- int b = bytes[index];
- if (b < 0) {
- b = 256 + b;
- }
- return b;
- }
-
- /**
- * Write a byte to the buffer.
- *
- * @param b
- * byte to write
- * @param encode
- * indicates whether the octet shall be encoded
- * @param buffer
- * the buffer to write to
- * @return the number of bytes that have been written to the buffer
- */
- private static int encodeByte(final int b, final boolean encode,
- final ByteArrayOutputStream buffer) {
- if (encode) {
- return encodeQuotedPrintable(b, buffer);
- } else {
- buffer.write(b);
- return 1;
- }
- }
-
- /**
- * Checks whether the given byte is whitespace.
- *
- * @param b
- * byte to be checked
- * @return <code>true</code> if the byte is either a space or tab character
- */
- private static boolean isWhitespace(final int b) {
- return b == SPACE || b == TAB;
}
/**
* Encodes an array of bytes into an array of quoted-printable 7-bit
characters. Unsafe characters are escaped.
- *
+ *
* <p>
- * This function fully implements the quoted-printable encoding
specification (rule #1 through rule #5)
- * as defined in RFC 1521 and is suitable for encoding binary data and
unformatted text.
+ * This function implements a subset of quoted-printable encoding
specification (rule #1 and rule #2) as defined in
+ * RFC 1521 and is suitable for encoding binary data and unformatted text.
* </p>
- *
+ *
* @param printable
* bitset of characters deemed quoted-printable
* @param bytes
@@ -195,59 +145,29 @@ public class QuotedPrintableCodec implem
printable = PRINTABLE_CHARS;
}
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
- int pos = 1;
- // encode up to buffer.length - 3, the last three octets will be
treated
- // separately for simplification of note #3
- for (int i = 0; i < bytes.length - 3; i++) {
- int b = getUnsignedOctet(i, bytes);
- if (pos < SAFE_LENGTH) {
- // up to this length it is safe to add any byte, encoded or not
- pos += encodeByte(b, !printable.get(b), buffer);
+ for (byte c : bytes) {
+ int b = c;
+ if (b < 0) {
+ b = 256 + b;
+ }
+ if (printable.get(b)) {
+ buffer.write(b);
} else {
- // rule #3: whitespace at the end of a line *must* be encoded
- encodeByte(b, !printable.get(b) || isWhitespace(b), buffer);
-
- // rule #5: soft line break
- buffer.write(ESCAPE_CHAR);
- buffer.write(CR);
- buffer.write(LF);
- pos = 1;
+ encodeQuotedPrintable(b, buffer);
}
}
-
- // rule #3: whitespace at the end of a line *must* be encoded
- // if we would do a soft break line after this octet, encode whitespace
- int b = getUnsignedOctet(bytes.length - 3, bytes);
- boolean encode = !printable.get(b) || (isWhitespace(b) && pos >
SAFE_LENGTH - 5);
- pos += encodeByte(b, encode, buffer);
-
- // note #3: '=' *must not* be the ultimate or penultimate character
- // simplification: if < 6 bytes left, do a soft line break as we may
need
- // exactly 6 bytes space for the last 2 bytes
- if (pos > SAFE_LENGTH - 2) {
- buffer.write(ESCAPE_CHAR);
- buffer.write(CR);
- buffer.write(LF);
- }
- for (int i = bytes.length - 2; i < bytes.length; i++) {
- b = getUnsignedOctet(i, bytes);
- // rule #3: trailing whitespace shall be encoded
- encode = !printable.get(b) || (i > bytes.length - 2 &&
isWhitespace(b));
- encodeByte(b, encode, buffer);
- }
-
return buffer.toByteArray();
}
/**
- * Decodes an array quoted-printable characters into an array of original
bytes. Escaped characters are
- * converted back to their original representation.
- *
+ * Decodes an array quoted-printable characters into an array of original
bytes. Escaped characters are converted
+ * back to their original representation.
+ *
* <p>
- * This function fully implements the quoted-printable encoding
specification (rule #1 through rule #5) as
- * defined in RFC 1521.
+ * This function implements a subset of quoted-printable encoding
specification (rule #1 and rule #2) as defined in
+ * RFC 1521.
* </p>
- *
+ *
* @param bytes
* array of quoted-printable characters
* @return array of original bytes
@@ -260,21 +180,16 @@ public class QuotedPrintableCodec implem
}
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
for (int i = 0; i < bytes.length; i++) {
- final int b = bytes[i];
+ int b = bytes[i];
if (b == ESCAPE_CHAR) {
try {
- // if the next octet is a CR we have found a soft line
break
- if (bytes[++i] == CR) {
- continue;
- }
- int u = Utils.digit16(bytes[i]);
+ int u = Utils.digit16(bytes[++i]);
int l = Utils.digit16(bytes[++i]);
buffer.write((char) ((u << 4) + l));
} catch (ArrayIndexOutOfBoundsException e) {
throw new DecoderException("Invalid quoted-printable
encoding", e);
}
- } else if (b != CR && b != LF) {
- // every other octet is appended except for CR & LF
+ } else {
buffer.write(b);
}
}
@@ -285,8 +200,8 @@ public class QuotedPrintableCodec implem
* Encodes an array of bytes into an array of quoted-printable 7-bit
characters. Unsafe characters are escaped.
*
* <p>
- * This function fully implements the quoted-printable encoding
specification (rule #1 through rule #5)
- * as defined in RFC 1521 and is suitable for encoding binary data and
unformatted text.
+ * This function implements a subset of quoted-printable encoding
specification (rule #1 and rule #2) as defined in
+ * RFC 1521 and is suitable for encoding binary data and unformatted text.
* </p>
*
* @param bytes
@@ -302,8 +217,8 @@ public class QuotedPrintableCodec implem
* back to their original representation.
*
* <p>
- * This function fully implements the quoted-printable encoding
specification (rule #1 through rule #2)
- * as defined in RFC 1521.
+ * This function implements a subset of quoted-printable encoding
specification (rule #1 and rule #2) as defined in
+ * RFC 1521.
* </p>
*
* @param bytes
@@ -320,8 +235,8 @@ public class QuotedPrintableCodec implem
* Encodes a string into its quoted-printable form using the default
string charset. Unsafe characters are escaped.
*
* <p>
- * This function fully implements the quoted-printable encoding
specification (rule #1 through rule #2)
- * as defined in RFC 1521 and is suitable for encoding binary data.
+ * This function implements a subset of quoted-printable encoding
specification (rule #1 and rule #2) as defined in
+ * RFC 1521 and is suitable for encoding binary data.
* </p>
*
* @param pString
@@ -450,8 +365,8 @@ public class QuotedPrintableCodec implem
* Encodes a string into its quoted-printable form using the specified
charset. Unsafe characters are escaped.
*
* <p>
- * This function fully implements the quoted-printable encoding
specification (rule #1 through rule #2)
- * as defined in RFC 1521 and is suitable for encoding binary data and
unformatted text.
+ * This function implements a subset of quoted-printable encoding
specification (rule #1 and rule #2) as defined in
+ * RFC 1521 and is suitable for encoding binary data and unformatted text.
* </p>
*
* @param pString
Modified:
commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/QuotedPrintableCodecTest.java
URL:
http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/QuotedPrintableCodecTest.java?rev=1302639&r1=1302638&r2=1302639&view=diff
==============================================================================
---
commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/QuotedPrintableCodecTest.java
(original)
+++
commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/QuotedPrintableCodecTest.java
Mon Mar 19 20:21:26 2012
@@ -24,6 +24,7 @@ import static org.junit.Assert.fail;
import org.apache.commons.codec.CharEncoding;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.EncoderException;
+import org.junit.Ignore;
import org.junit.Test;
/**
@@ -263,92 +264,30 @@ public class QuotedPrintableCodecTest {
}
@Test
+ @Ignore
+ /**
+ * The QuotedPrintableCodec documentation states that this is not
supported.
+ *
+ * @throws Exception
+ * @see <a
href="https://issues.apache.org/jira/browse/CODEC-121">CODEC-121</a>
+ */
public void testSoftLineBreakDecode() throws Exception {
- String qpdata = "If you believe that truth=3Dbeauty, then
surely=20=\r\nmathematics " +
- "is the most beautiful branch of philosophy.";
- String expected = "If you believe that truth=beauty, then surely
mathematics " +
- "is the most beautiful branch of philosophy.";
-
- QuotedPrintableCodec qpcodec = new QuotedPrintableCodec();
- assertEquals(expected, qpcodec.decode(qpdata));
-
- String encoded = qpcodec.encode(expected);
- assertEquals(expected, qpcodec.decode(encoded));
+ String qpdata = "If you believe that truth=3Dbeauty, then
surely=20=\r\nmathematics is the most beautiful branch of philosophy.";
+ String expected = "If you believe that truth=beauty, then surely
mathematics is the most beautiful branch of philosophy.";
+ assertEquals(expected, new QuotedPrintableCodec().decode(qpdata));
}
@Test
+ @Ignore
+ /**
+ * The QuotedPrintableCodec documentation states that this is not
supported.
+ *
+ * @throws Exception
+ * @see <a
href="https://issues.apache.org/jira/browse/CODEC-121">CODEC-121</a>
+ */
public void testSoftLineBreakEncode() throws Exception {
- String qpdata = "If you believe that truth=3Dbeauty, then surely
mathematics is the most " +
- "b=\r\neautiful branch of philosophy.";
- String expected = "If you believe that truth=beauty, then surely
mathematics is the most " +
- "beautiful branch of philosophy.";
-
- QuotedPrintableCodec qpcodec = new QuotedPrintableCodec();
- assertEquals(qpdata, qpcodec.encode(expected));
-
- String decoded = qpcodec.decode(qpdata);
- assertEquals(qpdata, qpcodec.encode(decoded));
- }
-
- @Test
- public void testSkipNotEncodedCRLF() throws Exception {
- String qpdata = "CRLF in an\n encoded text should be=20=\r\n\rskipped
in the\r decoding.";
- String expected = "CRLF in an encoded text should be skipped in the
decoding.";
-
- QuotedPrintableCodec qpcodec = new QuotedPrintableCodec();
- assertEquals(expected, qpcodec.decode(qpdata));
-
- String encoded = qpcodec.encode(expected);
- assertEquals(expected, qpcodec.decode(encoded));
- }
-
- @Test
- public void testTrailingSpecial() throws Exception {
- final QuotedPrintableCodec qpcodec = new QuotedPrintableCodec();
-
- String plain ="This is a example of a quoted-printable text file. This
might contain sp=cial chars.";
- String expected = "This is a example of a quoted-printable text file.
This might contain sp=3D=\r\ncial chars.";
- assertEquals(expected, qpcodec.encode(plain));
-
- plain ="This is a example of a quoted-printable text file. This might
contain ta\tbs as well.";
- expected = "This is a example of a quoted-printable text file. This
might contain ta=09=\r\nbs as well.";
- assertEquals(expected, qpcodec.encode(plain));
+ String qpdata = "If you believe that truth=3Dbeauty, then
surely=20=\r\nmathematics is the most beautiful branch of philosophy.";
+ String expected = "If you believe that truth=beauty, then surely
mathematics is the most beautiful branch of philosophy.";
+ assertEquals(qpdata, new QuotedPrintableCodec().encode(expected));
}
-
- @Test
- public void testUltimateSoftBreak() throws Exception {
- final QuotedPrintableCodec qpcodec = new QuotedPrintableCodec();
-
- String plain ="This is a example of a quoted-printable text file.
There is no end to it\t";
- String expected = "This is a example of a quoted-printable text file.
There is no end to i=\r\nt=09";
-
- assertEquals(expected, qpcodec.encode(plain));
-
- plain ="This is a example of a quoted-printable text file. There is no
end to it ";
- expected = "This is a example of a quoted-printable text file. There
is no end to i=\r\nt=20";
-
- assertEquals(expected, qpcodec.encode(plain));
-
- // whitespace before soft break
- plain ="This is a example of a quoted-printable text file. There is no
end to ";
- expected = "This is a example of a quoted-printable text file. There
is no end to=20=\r\n =20";
-
- assertEquals(expected, qpcodec.encode(plain));
-
- // non-printable character before soft break
- plain ="This is a example of a quoted-printable text file. There is no
end to= ";
- expected = "This is a example of a quoted-printable text file. There
is no end to=3D=\r\n =20";
-
- assertEquals(expected, qpcodec.encode(plain));
- }
-
- @Test
- public void testFinalBytes() throws Exception {
- // whitespace, but does not need to be encoded
- String plain ="This is a example of a quoted=printable text file.
There is no tt";
- String expected = "This is a example of a quoted=3Dprintable text
file. There is no tt";
-
- assertEquals(expected, new QuotedPrintableCodec().encode(plain));
- }
-
}