Author: sebb
Date: Sat Mar 17 12:29:15 2012
New Revision: 1301928
URL: http://svn.apache.org/viewvc?rev=1301928&view=rev
Log:
CSV-67 UnicodeUnescapeReader should not be applied before parsing
Removed:
commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/UnicodeUnescapeReader.java
Modified:
commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java
commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java
commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java
commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java
Modified:
commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java
URL:
http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java?rev=1301928&r1=1301927&r2=1301928&view=diff
==============================================================================
---
commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java
(original)
+++
commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java
Sat Mar 17 12:29:15 2012
@@ -38,7 +38,6 @@ public class CSVFormat implements Serial
private final char escape;
private final boolean leadingSpacesIgnored;
private final boolean trailingSpacesIgnored;
- private final boolean unicodeEscapesInterpreted;
private final boolean emptyLinesIgnored;
private final String lineSeparator; // for outputs
private final String[] header;
@@ -53,7 +52,7 @@ public class CSVFormat implements Serial
static final char DISABLED = '\ufffe';
/** Standard comma separated format as defined by <a
href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. */
- public static final CSVFormat DEFAULT = new CSVFormat(',', '"', DISABLED,
DISABLED, true, true, false, true, CRLF, null);
+ public static final CSVFormat DEFAULT = new CSVFormat(',', '"', DISABLED,
DISABLED, true, true, true, CRLF, null);
/**
* Excel file format (using a comma as the value delimiter).
@@ -66,10 +65,10 @@ public class CSVFormat implements Serial
*
* <pre>CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';');</pre>
*/
- public static final CSVFormat EXCEL = new CSVFormat(',', '"', DISABLED,
DISABLED, false, false, false, false, CRLF, null);
+ public static final CSVFormat EXCEL = new CSVFormat(',', '"', DISABLED,
DISABLED, false, false, false, CRLF, null);
/** Tab-delimited format, with quote; leading and trailing spaces ignored.
*/
- public static final CSVFormat TDF = new CSVFormat('\t', '"', DISABLED,
DISABLED, true, true, false, true, CRLF, null);
+ public static final CSVFormat TDF = new CSVFormat('\t', '"', DISABLED,
DISABLED, true, true, true, CRLF, null);
/**
* Default MySQL format used by the <tt>SELECT INTO OUTFILE</tt> and
@@ -79,7 +78,7 @@ public class CSVFormat implements Serial
*
* @see <a
href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html">http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a>
*/
- public static final CSVFormat MYSQL = new CSVFormat('\t', DISABLED,
DISABLED, '\\', false, false, false, false, "\n", null);
+ public static final CSVFormat MYSQL = new CSVFormat('\t', DISABLED,
DISABLED, '\\', false, false, false, "\n", null);
/**
@@ -91,7 +90,6 @@ public class CSVFormat implements Serial
* @param escape the char used to escape special
characters in values
* @param leadingSpacesIgnored <tt>true</tt> when leading whitespaces
should be ignored
* @param trailingSpacesIgnored <tt>true</tt> when trailing
whitespaces should be ignored
- * @param unicodeEscapesInterpreted <tt>true</tt> when unicode escapes
should be interpreted
* @param emptyLinesIgnored <tt>true</tt> when the parser should
skip emtpy lines
* @param lineSeparator the line separator to use for output
* @param header the header
@@ -103,7 +101,6 @@ public class CSVFormat implements Serial
char escape,
boolean leadingSpacesIgnored,
boolean trailingSpacesIgnored,
- boolean unicodeEscapesInterpreted,
boolean emptyLinesIgnored,
String lineSeparator,
String[] header) {
@@ -113,7 +110,6 @@ public class CSVFormat implements Serial
this.escape = escape;
this.leadingSpacesIgnored = leadingSpacesIgnored;
this.trailingSpacesIgnored = trailingSpacesIgnored;
- this.unicodeEscapesInterpreted = unicodeEscapesInterpreted;
this.emptyLinesIgnored = emptyLinesIgnored;
this.lineSeparator = lineSeparator;
this.header = header;
@@ -176,7 +172,7 @@ public class CSVFormat implements Serial
throw new IllegalArgumentException("The delimiter cannot be a line
break");
}
- return new CSVFormat(delimiter, encapsulator, commentStart, escape,
leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted,
emptyLinesIgnored, lineSeparator, header);
+ return new CSVFormat(delimiter, encapsulator, commentStart, escape,
leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator,
header);
}
/**
@@ -200,7 +196,7 @@ public class CSVFormat implements Serial
throw new IllegalArgumentException("The encapsulator cannot be a
line break");
}
- return new CSVFormat(delimiter, encapsulator, commentStart, escape,
leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted,
emptyLinesIgnored, lineSeparator, header);
+ return new CSVFormat(delimiter, encapsulator, commentStart, escape,
leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator,
header);
}
boolean isEncapsulating() {
@@ -228,7 +224,7 @@ public class CSVFormat implements Serial
throw new IllegalArgumentException("The comment start character
cannot be a line break");
}
- return new CSVFormat(delimiter, encapsulator, commentStart, escape,
leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted,
emptyLinesIgnored, lineSeparator, header);
+ return new CSVFormat(delimiter, encapsulator, commentStart, escape,
leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator,
header);
}
/**
@@ -261,7 +257,7 @@ public class CSVFormat implements Serial
throw new IllegalArgumentException("The escape character cannot be
a line break");
}
- return new CSVFormat(delimiter, encapsulator, commentStart, escape,
leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted,
emptyLinesIgnored, lineSeparator, header);
+ return new CSVFormat(delimiter, encapsulator, commentStart, escape,
leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator,
header);
}
boolean isEscaping() {
@@ -285,7 +281,7 @@ public class CSVFormat implements Serial
* @return A copy of this format with the specified left trimming behavior.
*/
public CSVFormat withLeadingSpacesIgnored(boolean leadingSpacesIgnored) {
- return new CSVFormat(delimiter, encapsulator, commentStart, escape,
leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted,
emptyLinesIgnored, lineSeparator, header);
+ return new CSVFormat(delimiter, encapsulator, commentStart, escape,
leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator,
header);
}
/**
@@ -305,7 +301,7 @@ public class CSVFormat implements Serial
* @return A copy of this format with the specified right trimming
behavior.
*/
public CSVFormat withTrailingSpacesIgnored(boolean trailingSpacesIgnored) {
- return new CSVFormat(delimiter, encapsulator, commentStart, escape,
leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted,
emptyLinesIgnored, lineSeparator, header);
+ return new CSVFormat(delimiter, encapsulator, commentStart, escape,
leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator,
header);
}
/**
@@ -316,28 +312,7 @@ public class CSVFormat implements Serial
* @return A copy of this format with the specified trimming behavior.
*/
public CSVFormat withSurroundingSpacesIgnored(boolean
surroundingSpacesIgnored) {
- return new CSVFormat(delimiter, encapsulator, commentStart, escape,
surroundingSpacesIgnored, surroundingSpacesIgnored, unicodeEscapesInterpreted,
emptyLinesIgnored, lineSeparator, header);
- }
-
- /**
- * Tells if unicode escape sequences (e.g. {@literal \u1234}) are turned
into their corresponding character
- * when parsing input.
- *
- * @return <tt>true</tt> if unicode escape sequences are interpreted,
<tt>false</tt> if they are left as is.
- */
- public boolean isUnicodeEscapesInterpreted() {
- return unicodeEscapesInterpreted;
- }
-
- /**
- * Returns a copy of this format with the specified unicode escaping
behavior.
- *
- * @param unicodeEscapesInterpreted the escaping behavior, <tt>true</tt>
to interpret unicode escape sequences,
- * <tt>false</tt> to leave the escape
sequences as is.
- * @return A copy of this format with the specified unicode escaping
behavior.
- */
- public CSVFormat withUnicodeEscapesInterpreted(boolean
unicodeEscapesInterpreted) {
- return new CSVFormat(delimiter, encapsulator, commentStart, escape,
leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted,
emptyLinesIgnored, lineSeparator, header);
+ return new CSVFormat(delimiter, encapsulator, commentStart, escape,
surroundingSpacesIgnored, surroundingSpacesIgnored, emptyLinesIgnored,
lineSeparator, header);
}
/**
@@ -357,7 +332,7 @@ public class CSVFormat implements Serial
* @return A copy of this format with the specified empty line skipping
behavior.
*/
public CSVFormat withEmptyLinesIgnored(boolean emptyLinesIgnored) {
- return new CSVFormat(delimiter, encapsulator, commentStart, escape,
leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted,
emptyLinesIgnored, lineSeparator, header);
+ return new CSVFormat(delimiter, encapsulator, commentStart, escape,
leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator,
header);
}
/**
@@ -377,7 +352,7 @@ public class CSVFormat implements Serial
* @return A copy of this format using the specified output line separator
*/
public CSVFormat withLineSeparator(String lineSeparator) {
- return new CSVFormat(delimiter, encapsulator, commentStart, escape,
leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted,
emptyLinesIgnored, lineSeparator, header);
+ return new CSVFormat(delimiter, encapsulator, commentStart, escape,
leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator,
header);
}
String[] getHeader() {
@@ -399,7 +374,7 @@ public class CSVFormat implements Serial
* @return A copy of this format using the specified header
*/
public CSVFormat withHeader(String... header) {
- return new CSVFormat(delimiter, encapsulator, commentStart, escape,
leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted,
emptyLinesIgnored, lineSeparator, header);
+ return new CSVFormat(delimiter, encapsulator, commentStart, escape,
leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator,
header);
}
/**
Modified:
commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java
URL:
http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java?rev=1301928&r1=1301927&r2=1301928&view=diff
==============================================================================
---
commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java
(original)
+++
commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java
Sat Mar 17 12:29:15 2012
@@ -92,10 +92,6 @@ public class CSVParser implements Iterab
public CSVParser(Reader input, CSVFormat format) throws IOException {
format.validate();
- if (format.isUnicodeEscapesInterpreted()) {
- input = new UnicodeUnescapeReader(input);
- }
-
this.lexer = new CSVLexer(format, new ExtendedBufferedReader(input));
initializeHeader(format);
Modified:
commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java
URL:
http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java?rev=1301928&r1=1301927&r2=1301928&view=diff
==============================================================================
---
commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java
(original)
+++
commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java
Sat Mar 17 12:29:15 2012
@@ -30,7 +30,7 @@ public class CSVFormatTest {
@Test
public void testImmutalibity() {
- CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, true,
true, "\r\n", null);
+ CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, true,
"\r\n", null);
format.withDelimiter('?');
format.withEncapsulator('?');
@@ -40,7 +40,6 @@ public class CSVFormatTest {
format.withLeadingSpacesIgnored(false);
format.withTrailingSpacesIgnored(false);
format.withEmptyLinesIgnored(false);
- format.withUnicodeEscapesInterpreted(false);
assertEquals('!', format.getDelimiter());
assertEquals('!', format.getEncapsulator());
@@ -51,12 +50,11 @@ public class CSVFormatTest {
assertTrue(format.isLeadingSpacesIgnored());
assertTrue(format.isTrailingSpacesIgnored());
assertTrue(format.isEmptyLinesIgnored());
- assertTrue(format.isUnicodeEscapesInterpreted());
}
@Test
public void testMutators() {
- CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, true,
true, "\r\n", null);
+ CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, true,
"\r\n", null);
assertEquals('?', format.withDelimiter('?').getDelimiter());
assertEquals('?', format.withEncapsulator('?').getEncapsulator());
@@ -69,7 +67,6 @@ public class CSVFormatTest {
assertFalse(format.withSurroundingSpacesIgnored(false).isLeadingSpacesIgnored());
assertFalse(format.withSurroundingSpacesIgnored(false).isTrailingSpacesIgnored());
assertFalse(format.withEmptyLinesIgnored(false).isEmptyLinesIgnored());
-
assertFalse(format.withUnicodeEscapesInterpreted(false).isUnicodeEscapesInterpreted());
}
@Test
@@ -172,7 +169,6 @@ public class CSVFormatTest {
assertEquals("comment start", CSVFormat.DEFAULT.getCommentStart(),
format.getCommentStart());
assertEquals("line separator", CSVFormat.DEFAULT.getLineSeparator(),
format.getLineSeparator());
assertEquals("escape", CSVFormat.DEFAULT.getEscape(),
format.getEscape());
- assertEquals("unicode escape",
CSVFormat.DEFAULT.isUnicodeEscapesInterpreted(),
format.isUnicodeEscapesInterpreted());
assertEquals("trim left", CSVFormat.DEFAULT.isLeadingSpacesIgnored(),
format.isLeadingSpacesIgnored());
assertEquals("trim right",
CSVFormat.DEFAULT.isTrailingSpacesIgnored(), format.isTrailingSpacesIgnored());
assertEquals("empty lines", CSVFormat.DEFAULT.isEmptyLinesIgnored(),
format.isEmptyLinesIgnored());
Modified:
commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java
URL:
http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java?rev=1301928&r1=1301927&r2=1301928&view=diff
==============================================================================
---
commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java
(original)
+++
commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java
Sat Mar 17 12:29:15 2012
@@ -283,7 +283,7 @@ public class CSVParserTest {
};
- CSVFormat format = new CSVFormat(',', '\'', CSVFormat.DISABLED, '/',
false, false, true, true, "\r\n", null);
+ CSVFormat format = new CSVFormat(',', '\'', CSVFormat.DISABLED, '/',
false, false, true, "\r\n", null);
CSVParser parser = new CSVParser(code, format);
List<CSVRecord> records = parser.getRecords();
@@ -312,7 +312,7 @@ public class CSVParserTest {
};
- CSVFormat format = new CSVFormat(',', CSVFormat.DISABLED,
CSVFormat.DISABLED, '/', false, false, true, true, "\r\n", null);
+ CSVFormat format = new CSVFormat(',', CSVFormat.DISABLED,
CSVFormat.DISABLED, '/', false, false, true, "\r\n", null);
CSVParser parser = new CSVParser(code, format);
List<CSVRecord> records = parser.getRecords();
@@ -357,30 +357,6 @@ public class CSVParserTest {
}
@Test
- public void testUnicodeEscape() throws Exception {
- String code = "abc,\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063";
- CSVParser parser = new CSVParser(code,
CSVFormat.DEFAULT.withUnicodeEscapesInterpreted(true));
- final Iterator<CSVRecord> iterator = parser.iterator();
- CSVRecord record = iterator.next();
- assertEquals(2, record.size());
- assertEquals("abc", record.get(0));
- assertEquals("public", record.get(1));
- assertFalse("Should not have any more records", iterator.hasNext());
- }
-
- @Test
- public void testUnicodeEscapeMySQL() throws Exception {
- String code = "abc\t\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063";
- CSVParser parser = new CSVParser(code,
CSVFormat.MYSQL.withUnicodeEscapesInterpreted(true));
- final Iterator<CSVRecord> iterator = parser.iterator();
- CSVRecord record = iterator.next();
- assertEquals(2, record.size());
- assertEquals("abc", record.get(0));
- assertEquals("public", record.get(1));
- assertFalse("Should not have any more records", iterator.hasNext());
- }
-
- @Test
public void testCarriageReturnLineFeedEndings() throws IOException {
String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
CSVParser parser = new CSVParser(new StringReader(code));