On 13 October 2012 13:14, Gary Gregory <[email protected]> wrote: > Thank you for the feedback Sebb. I'll do another pass later today.
See also my subsequent posting on the dev list. I think that might resolve the performance issue without needing to revert all the changes to CSVFormat. > Gary > > On Oct 13, 2012, at 6:28, sebb <[email protected]> wrote: > >> On 13 October 2012 07:27, <[email protected]> wrote: >>> Author: ggregory >>> Date: Sat Oct 13 06:27:52 2012 >>> New Revision: 1397783 >>> >>> URL: http://svn.apache.org/viewvc?rev=1397783&view=rev >>> Log: >>> Remove DISABLED character hack. >> >> -1 (for now) >> >> Are you sure this change does not affect performance? >> The Lexer code now has to do some boxing and unboxing. >> Unboxing is not expensive, but boxing is potentially so. >> >> Also, the code now allows for a null delimiter - is that really sensible? >> >> Also CSVFormat.getDelimiter() is inconsistent - it returns char >> whereas all the others return Character. >> >> I think the Lexer should stick to using char, particularly for the >> delimiter which cannot be null. >> >>> Modified: >>> >>> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java >>> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java >>> >>> commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java >>> >>> Modified: >>> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java >>> URL: >>> http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java?rev=1397783&r1=1397782&r2=1397783&view=diff >>> ============================================================================== >>> --- >>> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java >>> (original) >>> +++ >>> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java >>> Sat Oct 13 06:27:52 2012 >>> @@ -18,6 +18,7 @@ >>> package org.apache.commons.csv; >>> >>> import static org.apache.commons.csv.Constants.COMMA; >>> +import static org.apache.commons.csv.Constants.CR; >>> import static org.apache.commons.csv.Constants.CRLF; >>> import static org.apache.commons.csv.Constants.DOUBLE_QUOTE; >>> import static org.apache.commons.csv.Constants.ESCAPE; >>> @@ -38,30 +39,19 @@ public class CSVFormat implements Serial >>> >>> private static final long serialVersionUID = 1L; >>> >>> - private final char delimiter; >>> - private final char encapsulator; >>> - private final char commentStart; >>> - private final char escape; >>> + private final Character delimiter; >>> + private final Character encapsulator; >>> + private final Character commentStart; >>> + private final Character escape; >>> private final boolean ignoreSurroundingSpaces; // Should >>> leading/trailing spaces be ignored around values? >>> private final boolean ignoreEmptyLines; >>> private final String lineSeparator; // for outputs >>> private final String[] header; >>> >>> - private final boolean isEscaping; >>> - private final boolean isCommentingEnabled; >>> - private final boolean isEncapsulating; >>> - >>> - /** >>> - * Constant char to be used for disabling comments, escapes and >>> encapsulation. The value -2 is used because it >>> - * won't be confused with an EOF signal (-1), and because the Unicode >>> value {@code FFFE} would be encoded as two chars >>> - * (using surrogates) and thus there should never be a collision with >>> a real text char. >>> - */ >>> - static final char DISABLED = '\ufffe'; >>> - >>> /** >>> * Starting format with no settings defined; used for creating other >>> formats from scratch. >>> */ >>> - static final CSVFormat PRISTINE = new CSVFormat(DISABLED, DISABLED, >>> DISABLED, DISABLED, false, false, null, null); >>> + static final CSVFormat PRISTINE = new CSVFormat(null, null, null, >>> null, false, false, null, null); >>> >>> /** >>> * Standard comma separated format, as for {@link #RFC4180} but >>> allowing blank lines. >>> @@ -73,8 +63,8 @@ public class CSVFormat implements Serial >>> * </ul> >>> */ >>> public static final CSVFormat DEFAULT = >>> - PRISTINE. >>> - withDelimiter(COMMA) >>> + PRISTINE >>> + .withDelimiter(COMMA) >>> .withEncapsulator(DOUBLE_QUOTE) >>> .withIgnoreEmptyLines(true) >>> .withLineSeparator(CRLF); >>> @@ -89,8 +79,8 @@ public class CSVFormat implements Serial >>> * </ul> >>> */ >>> public static final CSVFormat RFC4180 = >>> - PRISTINE. >>> - withDelimiter(COMMA) >>> + PRISTINE >>> + .withDelimiter(COMMA) >>> .withEncapsulator(DOUBLE_QUOTE) >>> .withLineSeparator(CRLF); >>> >>> @@ -127,7 +117,7 @@ public class CSVFormat implements Serial >>> * @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html"> >>> * http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a> >>> */ >>> - public static final CSVFormat MYSQL = >>> + public static final CSVFormat MYSQL = >>> PRISTINE >>> .withDelimiter(TAB) >>> .withEscape(ESCAPE) >>> @@ -153,7 +143,7 @@ public class CSVFormat implements Serial >>> * @param header >>> * the header >>> */ >>> - CSVFormat(final char delimiter, final char encapsulator, final char >>> commentStart, final char escape, final boolean surroundingSpacesIgnored, >>> + CSVFormat(final Character delimiter, final Character encapsulator, >>> final Character commentStart, final Character escape, final boolean >>> surroundingSpacesIgnored, >>> final boolean emptyLinesIgnored, final String lineSeparator, >>> final String[] header) { >>> this.delimiter = delimiter; >>> this.encapsulator = encapsulator; >>> @@ -163,9 +153,6 @@ public class CSVFormat implements Serial >>> this.ignoreEmptyLines = emptyLinesIgnored; >>> this.lineSeparator = lineSeparator; >>> this.header = header; >>> - this.isEncapsulating = encapsulator != DISABLED; >>> - this.isCommentingEnabled = commentStart != DISABLED; >>> - this.isEscaping = escape != DISABLED; >>> } >>> >>> /** >>> @@ -176,8 +163,8 @@ public class CSVFormat implements Serial >>> * >>> * @return true if <code>c</code> is a line break character >>> */ >>> - private static boolean isLineBreak(final char c) { >>> - return c == '\n' || c == '\r'; >>> + private static boolean isLineBreak(final Character c) { >>> + return c != null && (c == LF || c == CR); >>> } >>> >>> /** >>> @@ -199,12 +186,12 @@ public class CSVFormat implements Serial >>> commentStart + "\")"); >>> } >>> >>> - if (encapsulator != DISABLED && encapsulator == commentStart) { >>> + if (encapsulator != null && encapsulator == commentStart) { >>> throw new IllegalArgumentException( >>> "The comment start character and the encapsulator >>> cannot be the same (\"" + commentStart + "\")"); >>> } >>> >>> - if (escape != DISABLED && escape == commentStart) { >>> + if (escape != null && escape == commentStart) { >>> throw new IllegalArgumentException("The comment start and the >>> escape character cannot be the same (\"" + >>> commentStart + "\")"); >>> } >>> @@ -229,6 +216,19 @@ public class CSVFormat implements Serial >>> * thrown if the specified character is a line break >>> */ >>> public CSVFormat withDelimiter(final char delimiter) { >>> + return withDelimiter(Character.valueOf(delimiter)); >>> + } >>> + >>> + /** >>> + * Returns a copy of this format using the specified delimiter >>> character. >>> + * >>> + * @param delimiter >>> + * the delimiter character >>> + * @return A copy of this format using the specified delimiter >>> character >>> + * @throws IllegalArgumentException >>> + * thrown if the specified character is a line break >>> + */ >>> + public CSVFormat withDelimiter(final Character delimiter) { >>> if (isLineBreak(delimiter)) { >>> throw new IllegalArgumentException("The delimiter cannot be a >>> line break"); >>> } >>> @@ -241,7 +241,7 @@ public class CSVFormat implements Serial >>> * >>> * @return the encapsulator character >>> */ >>> - public char getEncapsulator() { >>> + public Character getEncapsulator() { >>> return encapsulator; >>> } >>> >>> @@ -255,6 +255,19 @@ public class CSVFormat implements Serial >>> * thrown if the specified character is a line break >>> */ >>> public CSVFormat withEncapsulator(final char encapsulator) { >>> + return withEncapsulator(Character.valueOf(encapsulator)); >>> + } >>> + >>> + /** >>> + * Returns a copy of this format using the specified encapsulator >>> character. >>> + * >>> + * @param encapsulator >>> + * the encapsulator character >>> + * @return A copy of this format using the specified encapsulator >>> character >>> + * @throws IllegalArgumentException >>> + * thrown if the specified character is a line break >>> + */ >>> + public CSVFormat withEncapsulator(final Character encapsulator) { >>> if (isLineBreak(encapsulator)) { >>> throw new IllegalArgumentException("The encapsulator cannot be >>> a line break"); >>> } >>> @@ -268,7 +281,7 @@ public class CSVFormat implements Serial >>> * @return {@code true} if an encapsulator is defined >>> */ >>> public boolean isEncapsulating() { >>> - return isEncapsulating; >>> + return encapsulator != null; >>> } >>> >>> /** >>> @@ -276,7 +289,7 @@ public class CSVFormat implements Serial >>> * >>> * @return the comment start marker. >>> */ >>> - public char getCommentStart() { >>> + public Character getCommentStart() { >>> return commentStart; >>> } >>> >>> @@ -292,6 +305,21 @@ public class CSVFormat implements Serial >>> * thrown if the specified character is a line break >>> */ >>> public CSVFormat withCommentStart(final char commentStart) { >>> + return withCommentStart(Character.valueOf(commentStart)); >>> + } >>> + >>> + /** >>> + * Returns a copy of this format using the specified character as the >>> comment start marker. >>> + * >>> + * Note that the comment introducer character is only recognised at >>> the start of a line. >>> + * >>> + * @param commentStart >>> + * the comment start marker >>> + * @return A copy of this format using the specified character as the >>> comment start marker >>> + * @throws IllegalArgumentException >>> + * thrown if the specified character is a line break >>> + */ >>> + public CSVFormat withCommentStart(final Character commentStart) { >>> if (isLineBreak(commentStart)) { >>> throw new IllegalArgumentException("The comment start character >>> cannot be a line break"); >>> } >>> @@ -307,7 +335,7 @@ public class CSVFormat implements Serial >>> * @return <tt>true</tt> is comments are supported, <tt>false</tt> >>> otherwise >>> */ >>> public boolean isCommentingEnabled() { >>> - return isCommentingEnabled; >>> + return commentStart != null; >>> } >>> >>> /** >>> @@ -315,7 +343,7 @@ public class CSVFormat implements Serial >>> * >>> * @return the escape character >>> */ >>> - public char getEscape() { >>> + public Character getEscape() { >>> return escape; >>> } >>> >>> @@ -329,6 +357,19 @@ public class CSVFormat implements Serial >>> * thrown if the specified character is a line break >>> */ >>> public CSVFormat withEscape(final char escape) { >>> + return withEscape(Character.valueOf(escape)); >>> + } >>> + >>> + /** >>> + * Returns a copy of this format using the specified escape character. >>> + * >>> + * @param escape >>> + * the escape character >>> + * @return A copy of this format using the specified escape character >>> + * @throws IllegalArgumentException >>> + * thrown if the specified character is a line break >>> + */ >>> + public CSVFormat withEscape(final Character escape) { >>> if (isLineBreak(escape)) { >>> throw new IllegalArgumentException("The escape character cannot >>> be a line break"); >>> } >>> @@ -342,7 +383,7 @@ public class CSVFormat implements Serial >>> * @return {@code true} if escapes are processed >>> */ >>> public boolean isEscaping() { >>> - return isEscaping; >>> + return escape != null; >>> } >>> >>> /** >>> >>> Modified: >>> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java >>> URL: >>> http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java?rev=1397783&r1=1397782&r2=1397783&view=diff >>> ============================================================================== >>> --- >>> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java >>> (original) >>> +++ >>> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java >>> Sat Oct 13 06:27:52 2012 >>> @@ -32,14 +32,10 @@ import java.io.IOException; >>> */ >>> abstract class Lexer { >>> >>> - private final boolean isEncapsulating; >>> - private final boolean isEscaping; >>> - private final boolean isCommentEnabled; >>> - >>> - private final char delimiter; >>> - private final char escape; >>> - private final char encapsulator; >>> - private final char commmentStart; >>> + private final Character delimiter; >>> + private final Character escape; >>> + private final Character encapsulator; >>> + private final Character commmentStart; >>> >>> final boolean surroundingSpacesIgnored; >>> final boolean emptyLinesIgnored; >>> @@ -52,9 +48,6 @@ abstract class Lexer { >>> Lexer(final CSVFormat format, final ExtendedBufferedReader in) { >>> this.format = format; >>> this.in = in; >>> - this.isEncapsulating = format.isEncapsulating(); >>> - this.isEscaping = format.isEscaping(); >>> - this.isCommentEnabled = format.isCommentingEnabled(); >>> this.delimiter = format.getDelimiter(); >>> this.escape = format.getEscape(); >>> this.encapsulator = format.getEncapsulator(); >>> @@ -144,14 +137,14 @@ abstract class Lexer { >>> } >>> >>> boolean isEscape(final int c) { >>> - return isEscaping && c == escape; >>> + return escape != null && c == escape; >>> } >>> >>> boolean isEncapsulator(final int c) { >>> - return isEncapsulating && c == encapsulator; >>> + return encapsulator != null && c == encapsulator; >>> } >>> >>> boolean isCommentStart(final int c) { >>> - return isCommentEnabled && c == commmentStart; >>> + return commmentStart != null && c == commmentStart; >>> } >>> } >>> >>> Modified: >>> commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java >>> URL: >>> http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java?rev=1397783&r1=1397782&r2=1397783&view=diff >>> ============================================================================== >>> --- >>> commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java >>> (original) >>> +++ >>> commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java >>> Sat Oct 13 06:27:52 2012 >>> @@ -46,9 +46,9 @@ public class CSVFormatTest { >>> format.withIgnoreEmptyLines(false); >>> >>> assertEquals('!', format.getDelimiter()); >>> - assertEquals('!', format.getEncapsulator()); >>> - assertEquals('!', format.getCommentStart()); >>> - assertEquals('!', format.getEscape()); >>> + assertEquals('!', format.getEncapsulator().charValue()); >>> + assertEquals('!', format.getCommentStart().charValue()); >>> + assertEquals('!', format.getEscape().charValue()); >>> assertEquals(CRLF, format.getLineSeparator()); >>> >>> assertTrue(format.getIgnoreSurroundingSpaces()); >>> @@ -60,10 +60,10 @@ public class CSVFormatTest { >>> final CSVFormat format = new CSVFormat('!', '!', '!', '!', true, >>> true, CRLF, null); >>> >>> assertEquals('?', format.withDelimiter('?').getDelimiter()); >>> - assertEquals('?', format.withEncapsulator('?').getEncapsulator()); >>> - assertEquals('?', format.withCommentStart('?').getCommentStart()); >>> + assertEquals('?', >>> format.withEncapsulator('?').getEncapsulator().charValue()); >>> + assertEquals('?', >>> format.withCommentStart('?').getCommentStart().charValue()); >>> assertEquals("?", format.withLineSeparator("?").getLineSeparator()); >>> - assertEquals('?', format.withEscape('?').getEscape()); >>> + assertEquals('?', format.withEscape('?').getEscape().charValue()); >>> >>> >>> assertFalse(format.withIgnoreSurroundingSpaces(false).getIgnoreSurroundingSpaces()); >>> >>> assertFalse(format.withIgnoreEmptyLines(false).getIgnoreEmptyLines()); >>> @@ -131,7 +131,7 @@ public class CSVFormatTest { >>> // expected >>> } >>> >>> - >>> format.withEncapsulator(CSVFormat.DISABLED).withCommentStart(CSVFormat.DISABLED).validate(); >>> + format.withEncapsulator(null).withCommentStart(null).validate(); >>> >>> try { >>> format.withEscape('!').withCommentStart('!').validate(); >>> @@ -140,7 +140,7 @@ public class CSVFormatTest { >>> // expected >>> } >>> >>> - >>> format.withEscape(CSVFormat.DISABLED).withCommentStart(CSVFormat.DISABLED).validate(); >>> + format.withEscape(null).withCommentStart(null).validate(); >>> >>> >>> try { >>> >>> >> >> --------------------------------------------------------------------- >> To unsubscribe, e-mail: [email protected] >> For additional commands, e-mail: [email protected] >> > > --------------------------------------------------------------------- > To unsubscribe, e-mail: [email protected] > For additional commands, e-mail: [email protected] > --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
