This is an automated email from the ASF dual-hosted git repository.
ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-csv.git
The following commit(s) were added to refs/heads/master by this push:
new c137e804 A different take on PR #303
c137e804 is described below
commit c137e80407af4eba4766024fe2e566b224ed0d7b
Author: Gary Gregory <[email protected]>
AuthorDate: Tue Mar 12 19:41:43 2024 -0400
A different take on PR #303
Add support for trailing text after the closing quote, and EOF without a
final closing quote, for Excel compatibility. Fix a unit test and add a
RAT exclude for the sample CSV file.
---
pom.xml | 4 +-
src/changes/changes.xml | 5 +-
.../java/org/apache/commons/csv/CSVFormat.java | 162 +++++++++++++++------
src/main/java/org/apache/commons/csv/Lexer.java | 18 ++-
.../java/org/apache/commons/csv/CSVParserTest.java | 18 +--
.../java/org/apache/commons/csv/LexerTest.java | 26 ++++
6 files changed, 170 insertions(+), 63 deletions(-)
diff --git a/pom.xml b/pom.xml
index 9e0ab765..4f815a30 100644
--- a/pom.xml
+++ b/pom.xml
@@ -23,7 +23,7 @@
<version>67</version>
</parent>
<artifactId>commons-csv</artifactId>
- <version>1.10.1-SNAPSHOT</version>
+ <version>1.11.0-SNAPSHOT</version>
<name>Apache Commons CSV</name>
<url>https://commons.apache.org/proper/commons-csv/</url>
<inceptionYear>2005</inceptionYear>
@@ -161,7 +161,7 @@
</distributionManagement>
<properties>
- <commons.release.version>1.10.1</commons.release.version>
+ <commons.release.version>1.11.0</commons.release.version>
<commons.release.desc>(Java 8 or above)</commons.release.desc>
<!-- The RC version used in the staging repository URL. -->
<commons.rc.version>RC1</commons.rc.version>
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 5d1a4ceb..a8ff2b2b 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -42,7 +42,9 @@
<body>
<release version="1.10.1" date="YYYY-MM-DD" description="Feature and bug
fix release (Java 8 or above)">
<!-- ADD -->
- <action issue="CSV-308" type="fix" dev="ggregory" due-to="Buddhi De
Silva, Gary Gregory">[Javadoc] Add example to CSVFormat#setHeaderComments()
#344.</action>
+ <action issue="CSV-308" type="add" dev="ggregory" due-to="Buddhi De
Silva, Gary Gregory">[Javadoc] Add example to CSVFormat#setHeaderComments()
#344.</action>
+ <action type="add" dev="ggregory" due-to="DamjanJovanovic, Gary
Gregory">Add and use CSVFormat#setTrailingData(boolean) in CSVFormat.EXCEL for
Excel compatibility #303.</action>
+ <action type="add" dev="ggregory" due-to="DamjanJovanovic, Gary
Gregory">Add and use CSVFormat#setLenientEof(boolean) in CSVFormat.EXCEL for
Excel compatibility #303.</action>
<!-- FIX -->
<action type="fix" issue="CSV-306" dev="ggregory" due-to="Sam Ng, Bruno
P. Kinoshita">Replace deprecated method in user guide, update external link
#324, #325.</action>
<action type="fix" dev="ggregory" due-to="Seth Falco, Bruno P.
Kinoshita">Document duplicate header behavior #309.</action>
@@ -53,6 +55,7 @@
<action type="fix" issue="CSV-311" dev="ggregory" due-to="Christian
Feuersaenger, Gary Gregory">OutOfMemory for very long rows despite using column
value of type Reader.</action>
<action type="fix" dev="ggregory" due-to="Gary Gregory">Use
try-with-resources to manage JDBC Clob in
CSVPrinter.printRecords(ResultSet).</action>
<action type="fix" dev="ggregory" due-to="Gary Gregory">JDBC Blob
columns are now output as Base64 instead of Object#toString(), which usually is
InputStream#toString().</action>
+ <action type="fix" dev="ggregory" due-to="DamjanJovanovic, Gary
Gregory">Support unusual Excel use cases: Add support for trailing data after
the closing quote, and EOF without a final closing quote #303.</action>
<!-- UPDATE -->
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump
commons-io:commons-io: from 2.11.0 to 2.15.1.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory,
Dependabot">Bump commons-parent from 57 to 67.</action>
diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java
b/src/main/java/org/apache/commons/csv/CSVFormat.java
index 522b7785..754d2229 100644
--- a/src/main/java/org/apache/commons/csv/CSVFormat.java
+++ b/src/main/java/org/apache/commons/csv/CSVFormat.java
@@ -248,6 +248,10 @@ public final class CSVFormat implements Serializable {
private boolean skipHeaderRecord;
+ private boolean lenientEof;
+
+ private boolean trailingData;
+
private boolean trailingDelimiter;
private boolean trim;
@@ -267,6 +271,8 @@ public final class CSVFormat implements Serializable {
this.headers = csvFormat.headers;
this.skipHeaderRecord = csvFormat.skipHeaderRecord;
this.ignoreHeaderCase = csvFormat.ignoreHeaderCase;
+ this.lenientEof = csvFormat.lenientEof;
+ this.trailingData = csvFormat.trailingData;
this.trailingDelimiter = csvFormat.trailingDelimiter;
this.trim = csvFormat.trim;
this.autoFlush = csvFormat.autoFlush;
@@ -689,6 +695,18 @@ public final class CSVFormat implements Serializable {
return this;
}
+ /**
+ * Sets whether reading end-of-file is allowed even when input is
malformed, helps Excel compatibility.
+ *
+ * @param lenientEof whether reading end-of-file is allowed even when
input is malformed, helps Excel compatibility.
+ * @return This instance.
+ * @since 1.11.0
+ */
+ public Builder setLenientEof(final boolean lenientEof) {
+ this.lenientEof = lenientEof;
+ return this;
+ }
+
/**
* Sets the String to convert to and from {@code null}. No
substitution occurs if {@code null}.
*
@@ -785,6 +803,18 @@ public final class CSVFormat implements Serializable {
return this;
}
+ /**
+ * Sets whether reading trailing data is allowed in records, helps
Excel compatibility.
+ *
+ * @param trailingData whether reading trailing data is allowed in
records, helps Excel compatibility.
+ * @return This instance.
+ * @since 1.11.0
+ */
+ public Builder setTrailingData(final boolean trailingData) {
+ this.trailingData = trailingData;
+ return this;
+ }
+
/**
* Sets whether to add a trailing delimiter.
*
@@ -914,7 +944,7 @@ public final class CSVFormat implements Serializable {
* @see Predefined#Default
*/
public static final CSVFormat DEFAULT = new CSVFormat(COMMA,
DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, null, null, null,
false, false, false,
- false, false, false, DuplicateHeaderMode.ALLOW_ALL);
+ false, false, false, DuplicateHeaderMode.ALLOW_ALL, false, false);
/**
* Excel file format (using a comma as the value delimiter). Note that the
actual value delimiter used by Excel is locale-dependent, it might be necessary
@@ -935,9 +965,11 @@ public final class CSVFormat implements Serializable {
* <li>{@code setDelimiter(',')}</li>
* <li>{@code setQuote('"')}</li>
* <li>{@code setRecordSeparator("\r\n")}</li>
+ * <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
* <li>{@code setIgnoreEmptyLines(false)}</li>
* <li>{@code setAllowMissingColumnNames(true)}</li>
- * <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
+ * <li>{@code setTrailingData(true)}</li>
+ * <li>{@code setLenientEof(true)}</li>
* </ul>
* <p>
* Note: This is currently like {@link #RFC4180} plus {@link
Builder#setAllowMissingColumnNames(boolean)
Builder#setAllowMissingColumnNames(true)} and
@@ -950,6 +982,8 @@ public final class CSVFormat implements Serializable {
public static final CSVFormat EXCEL = DEFAULT.builder()
.setIgnoreEmptyLines(false)
.setAllowMissingColumnNames(true)
+ .setTrailingData(true)
+ .setLenientEof(true)
.build();
// @formatter:on
@@ -1372,7 +1406,7 @@ public final class CSVFormat implements Serializable {
*/
public static CSVFormat newFormat(final char delimiter) {
return new CSVFormat(String.valueOf(delimiter), null, null, null,
null, false, false, null, null, null, null, false, false, false, false, false,
false,
- DuplicateHeaderMode.ALLOW_ALL);
+ DuplicateHeaderMode.ALLOW_ALL, false, false);
}
static String[] toStringArray(final Object[] values) {
@@ -1455,6 +1489,10 @@ public final class CSVFormat implements Serializable {
private final boolean skipHeaderRecord;
+ private final boolean lenientEof;
+
+ private final boolean trailingData;
+
private final boolean trailingDelimiter;
private final boolean trim;
@@ -1474,6 +1512,8 @@ public final class CSVFormat implements Serializable {
this.headers = builder.headers;
this.skipHeaderRecord = builder.skipHeaderRecord;
this.ignoreHeaderCase = builder.ignoreHeaderCase;
+ this.lenientEof = builder.lenientEof;
+ this.trailingData = builder.trailingData;
this.trailingDelimiter = builder.trailingDelimiter;
this.trim = builder.trim;
this.autoFlush = builder.autoFlush;
@@ -1494,22 +1534,24 @@ public final class CSVFormat implements Serializable {
* @param ignoreEmptyLines {@code true} when the parser should skip
empty lines.
* @param recordSeparator the line separator to use for output.
* @param nullString the line separator to use for output.
- * @param headerComments the comments to be printed by the
Printer before the actual CSV data.
- * @param header the header
- * @param skipHeaderRecord if {@code true} the header row will be
skipped
- * @param allowMissingColumnNames if {@code true} the missing column names
are allowed when parsing the header line
- * @param ignoreHeaderCase if {@code true} header names will be
accessed ignoring case when parsing input
- * @param trim if {@code true} next record value will
be trimmed
- * @param trailingDelimiter if {@code true} the trailing delimiter
wil be added before record separator (if set)
- * @param autoFlush if {@code true} the underlying stream
will be flushed before closing
- * @param duplicateHeaderMode the behavior when handling duplicate
headers
+ * @param headerComments the comments to be printed by the
Printer before the actual CSV data..
+ * @param header the header.
+ * @param skipHeaderRecord if {@code true} the header row will be
skipped.
+ * @param allowMissingColumnNames if {@code true} the missing column names
are allowed when parsing the header line.
+ * @param ignoreHeaderCase if {@code true} header names will be
accessed ignoring case when parsing input.
+ * @param trim if {@code true} next record value will
be trimmed.
+ * @param trailingDelimiter if {@code true} the trailing delimiter
wil be added before record separator (if set)..
+ * @param autoFlush if {@code true} the underlying stream
will be flushed before closing.
+ * @param duplicateHeaderMode the behavior when handling duplicate
headers.
+ * @param trailingData whether reading trailing data is allowed
in records, helps Excel compatibility.
+ * @param lenientEof whether reading end-of-file is allowed
even when input is malformed, helps Excel compatibility.
* @throws IllegalArgumentException if the delimiter is a line break
character.
*/
private CSVFormat(final String delimiter, final Character quoteChar, final
QuoteMode quoteMode, final Character commentStart, final Character escape,
final boolean ignoreSurroundingSpaces, final boolean
ignoreEmptyLines, final String recordSeparator, final String nullString,
final Object[] headerComments, final String[] header, final
boolean skipHeaderRecord, final boolean allowMissingColumnNames,
final boolean ignoreHeaderCase, final boolean trim, final boolean
trailingDelimiter, final boolean autoFlush,
- final DuplicateHeaderMode duplicateHeaderMode) {
+ final DuplicateHeaderMode duplicateHeaderMode, final boolean
trailingData, final boolean lenientEof) {
this.delimiter = delimiter;
this.quoteCharacter = quoteChar;
this.quoteMode = quoteMode;
@@ -1524,6 +1566,8 @@ public final class CSVFormat implements Serializable {
this.headers = clone(header);
this.skipHeaderRecord = skipHeaderRecord;
this.ignoreHeaderCase = ignoreHeaderCase;
+ this.lenientEof = lenientEof;
+ this.trailingData = trailingData;
this.trailingDelimiter = trailingDelimiter;
this.trim = trim;
this.autoFlush = autoFlush;
@@ -1571,18 +1615,23 @@ public final class CSVFormat implements Serializable {
if (this == obj) {
return true;
}
- if (obj == null || getClass() != obj.getClass()) {
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
return false;
}
final CSVFormat other = (CSVFormat) obj;
- return duplicateHeaderMode == other.duplicateHeaderMode &&
allowMissingColumnNames == other.allowMissingColumnNames &&
- autoFlush == other.autoFlush && Objects.equals(commentMarker,
other.commentMarker) && Objects.equals(delimiter, other.delimiter) &&
- Objects.equals(escapeCharacter, other.escapeCharacter) &&
Arrays.equals(headers, other.headers) &&
- Arrays.equals(headerComments, other.headerComments) &&
ignoreEmptyLines == other.ignoreEmptyLines &&
- ignoreHeaderCase == other.ignoreHeaderCase &&
ignoreSurroundingSpaces == other.ignoreSurroundingSpaces &&
- Objects.equals(nullString, other.nullString) &&
Objects.equals(quoteCharacter, other.quoteCharacter) && quoteMode ==
other.quoteMode &&
- Objects.equals(quotedNullString, other.quotedNullString) &&
Objects.equals(recordSeparator, other.recordSeparator) &&
- skipHeaderRecord == other.skipHeaderRecord &&
trailingDelimiter == other.trailingDelimiter && trim == other.trim;
+ return allowMissingColumnNames == other.allowMissingColumnNames &&
autoFlush == other.autoFlush &&
+ Objects.equals(commentMarker, other.commentMarker) &&
Objects.equals(delimiter, other.delimiter) &&
+ duplicateHeaderMode == other.duplicateHeaderMode &&
Objects.equals(escapeCharacter, other.escapeCharacter) &&
+ Arrays.equals(headerComments, other.headerComments) &&
Arrays.equals(headers, other.headers) &&
+ ignoreEmptyLines == other.ignoreEmptyLines && ignoreHeaderCase
== other.ignoreHeaderCase &&
+ ignoreSurroundingSpaces == other.ignoreSurroundingSpaces &&
lenientEof == other.lenientEof &&
+ Objects.equals(nullString, other.nullString) &&
Objects.equals(quoteCharacter, other.quoteCharacter) &&
+ quoteMode == other.quoteMode &&
Objects.equals(quotedNullString, other.quotedNullString) &&
+ Objects.equals(recordSeparator, other.recordSeparator) &&
skipHeaderRecord == other.skipHeaderRecord &&
+ trailingData == other.trailingData && trailingDelimiter ==
other.trailingDelimiter && trim == other.trim;
}
private void escape(final char c, final Appendable appendable) throws
IOException {
@@ -1808,6 +1857,16 @@ public final class CSVFormat implements Serializable {
return ignoreSurroundingSpaces;
}
+ /**
+ * Gets whether reading end-of-file is allowed even when input is
malformed, helps Excel compatibility.
+ *
+ * @return whether reading end-of-file is allowed even when input is
malformed, helps Excel compatibility.
+ * @since 1.11.0
+ */
+ public boolean getLenientEof() {
+ return lenientEof;
+ }
+
/**
* Gets the String to convert to and from {@code null}.
* <ul>
@@ -1857,6 +1916,16 @@ public final class CSVFormat implements Serializable {
return skipHeaderRecord;
}
+ /**
+ * Gets whether reading trailing data is allowed in records, helps Excel
compatibility.
+ *
+ * @return whether reading trailing data is allowed in records, helps
Excel compatibility.
+ * @since 1.11.0
+ */
+ public boolean getTrailingData() {
+ return trailingData;
+ }
+
/**
* Gets whether to add a trailing delimiter.
*
@@ -1881,11 +1950,12 @@ public final class CSVFormat implements Serializable {
public int hashCode() {
final int prime = 31;
int result = 1;
- result = prime * result + Arrays.hashCode(headers);
result = prime * result + Arrays.hashCode(headerComments);
- return prime * result + Objects.hash(duplicateHeaderMode,
allowMissingColumnNames, autoFlush, commentMarker, delimiter, escapeCharacter,
- ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces,
nullString, quoteCharacter, quoteMode, quotedNullString, recordSeparator,
- skipHeaderRecord, trailingDelimiter, trim);
+ result = prime * result + Arrays.hashCode(headers);
+ result = prime * result + Objects.hash(allowMissingColumnNames,
autoFlush, commentMarker, delimiter, duplicateHeaderMode, escapeCharacter,
+ ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces,
lenientEof, nullString, quoteCharacter, quoteMode, quotedNullString,
+ recordSeparator, skipHeaderRecord, trailingData,
trailingDelimiter, trim);
+ return result;
}
/**
@@ -2006,6 +2076,26 @@ public final class CSVFormat implements Serializable {
return new CSVPrinter(new OutputStreamWriter(new
FileOutputStream(out), charset), this);
}
+ private void print(final InputStream inputStream, final Appendable out,
final boolean newRecord) throws IOException {
+ // InputStream is never null here
+ // There is nothing to escape when quoting is used which is the
default.
+ if (!newRecord) {
+ append(getDelimiterString(), out);
+ }
+ final boolean quoteCharacterSet = isQuoteCharacterSet();
+ if (quoteCharacterSet) {
+ append(getQuoteCharacter().charValue(), out);
+ }
+ // Stream the input to the output without reading or holding the whole
value in memory.
+ // AppendableOutputStream cannot "close" an Appendable.
+ try (OutputStream outputStream = new Base64OutputStream(new
AppendableOutputStream<>(out))) {
+ IOUtils.copy(inputStream, outputStream);
+ }
+ if (quoteCharacterSet) {
+ append(getQuoteCharacter().charValue(), out);
+ }
+ }
+
/**
* Prints the {@code value} as the next value on the line to {@code out}.
The value will be escaped or encapsulated as needed. Useful when one wants to
* avoid creating CSVPrinters. Trims the value if {@link #getTrim()} is
true.
@@ -2081,26 +2171,6 @@ public final class CSVFormat implements Serializable {
return print(Files.newBufferedWriter(out, charset));
}
- private void print(final InputStream inputStream, final Appendable out,
final boolean newRecord) throws IOException {
- // InputStream is never null here
- // There is nothing to escape when quoting is used which is the
default.
- if (!newRecord) {
- append(getDelimiterString(), out);
- }
- final boolean quoteCharacterSet = isQuoteCharacterSet();
- if (quoteCharacterSet) {
- append(getQuoteCharacter().charValue(), out);
- }
- // Stream the input to the output without reading or holding the whole
value in memory.
- // AppendableOutputStream cannot "close" an Appendable.
- try (OutputStream outputStream = new Base64OutputStream(new
AppendableOutputStream<>(out))) {
- IOUtils.copy(inputStream, outputStream);
- }
- if (quoteCharacterSet) {
- append(getQuoteCharacter().charValue(), out);
- }
- }
-
private void print(final Reader reader, final Appendable out, final
boolean newRecord) throws IOException {
// Reader is never null here
if (!newRecord) {
diff --git a/src/main/java/org/apache/commons/csv/Lexer.java
b/src/main/java/org/apache/commons/csv/Lexer.java
index ef379ec3..17113227 100644
--- a/src/main/java/org/apache/commons/csv/Lexer.java
+++ b/src/main/java/org/apache/commons/csv/Lexer.java
@@ -53,9 +53,10 @@ final class Lexer implements Closeable {
private final char escape;
private final char quoteChar;
private final char commentStart;
-
private final boolean ignoreSurroundingSpaces;
private final boolean ignoreEmptyLines;
+ private final boolean lenientEof;
+ private final boolean trailingData;
/** The input stream */
private final ExtendedBufferedReader reader;
@@ -71,6 +72,8 @@ final class Lexer implements Closeable {
this.commentStart = mapNullToDisabled(format.getCommentMarker());
this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces();
this.ignoreEmptyLines = format.getIgnoreEmptyLines();
+ this.lenientEof = format.getLenientEof();
+ this.trailingData = format.getTrailingData();
this.delimiterBuf = new char[delimiter.length - 1];
this.escapeDelimiterBuf = new char[2 * delimiter.length - 1];
}
@@ -364,14 +367,21 @@ final class Lexer implements Closeable {
token.type = EORECORD;
return token;
}
- if (!Character.isWhitespace((char)c)) {
+ if (trailingData) {
+ token.content.append((char) c);
+ } else if (!Character.isWhitespace((char) c)) {
// error invalid char between token and next
delimiter
- throw new IOException("Invalid char between
encapsulated token and delimiter at line: " +
- getCurrentLineNumber() + ", position: " +
getCharacterPosition());
+ throw new IOException(String.format("Invalid char
between encapsulated token and delimiter at line: %,d, position: %,d",
+ getCurrentLineNumber(),
getCharacterPosition()));
}
}
}
} else if (isEndOfFile(c)) {
+ if (lenientEof) {
+ token.type = Token.Type.EOF;
+ token.isReady = true; // There is data at EOF
+ return token;
+ }
// error condition (end of file before end of token)
throw new IOException("(startline " + startLineNumber +
") EOF reached before encapsulated token finished");
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java
b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index 52173287..b3a51378 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -299,7 +299,6 @@ public class CSVParserTest {
}
@Test
- @Disabled("PR 295 does not work")
public void testCSV141Excel() throws Exception {
testCSV141Ok(CSVFormat.EXCEL);
}
@@ -357,16 +356,15 @@ public class CSVParserTest {
record = parser.nextRecord();
assertEquals("1414770318327", record.get(0));
assertEquals("android.widget.EditText", record.get(1));
- assertEquals("pass sem1", record.get(2));
- assertEquals(3, record.size());
- // row 4
+ assertEquals("pass sem1\n1414770318628\"", record.get(2));
+ assertEquals("android.widget.EditText", record.get(3));
+ assertEquals("pass sem1 _84*|*", record.get(4));
+ assertEquals("0", record.get(5));
+ assertEquals("pass sem1\n", record.get(6));
+ assertEquals(7, record.size());
+ // EOF
record = parser.nextRecord();
- assertEquals("1414770318628", record.get(0));
- assertEquals("android.widget.EditText", record.get(1));
- assertEquals("pass sem1 _84*|*", record.get(2));
- assertEquals("0", record.get(3));
- assertEquals("pass sem1", record.get(4));
- assertEquals(5, record.size());
+ assertNull(record);
}
}
diff --git a/src/test/java/org/apache/commons/csv/LexerTest.java
b/src/test/java/org/apache/commons/csv/LexerTest.java
index f36eaa45..3bc55a00 100644
--- a/src/test/java/org/apache/commons/csv/LexerTest.java
+++ b/src/test/java/org/apache/commons/csv/LexerTest.java
@@ -196,6 +196,19 @@ public class LexerTest {
}
}
+ @Test
+ public void testEOFWithoutClosingQuote() throws Exception {
+ final String code = "a,\"b";
+ try (final Lexer parser = createLexer(code,
CSVFormat.Builder.create().setLenientEof(true).build())) {
+ assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
+ assertThat(parser.nextToken(new Token()), matches(EOF, "b"));
+ }
+ try (final Lexer parser = createLexer(code,
CSVFormat.Builder.create().setLenientEof(false).build())) {
+ assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
+ assertThrows(IOException.class, () -> parser.nextToken(new
Token()));
+ }
+ }
+
@Test // TODO is this correct? Do we expect <esc>BACKSPACE to be unescaped?
public void testEscapedBackspace() throws Exception {
try (final Lexer lexer = createLexer("character\\" + BACKSPACE +
"Escaped", formatWithEscaping)) {
@@ -423,6 +436,19 @@ public class LexerTest {
}
}
+ @Test
+ public void testTrailingTextAfterQuote() throws Exception {
+ final String code = "\"a\" b,\"a\" \" b,\"a\" b \"\"";
+ try (final Lexer parser = createLexer(code,
CSVFormat.Builder.create().setTrailingData(true).build())) {
+ assertThat(parser.nextToken(new Token()), matches(TOKEN, "a b"));
+ assertThat(parser.nextToken(new Token()), matches(TOKEN, "a \"
b"));
+ assertThat(parser.nextToken(new Token()), matches(EOF, "a b
\"\""));
+ }
+ try (final Lexer parser = createLexer(code,
CSVFormat.Builder.create().setTrailingData(false).build())) {
+ assertThrows(IOException.class, () -> parser.nextToken(new
Token()));
+ }
+ }
+
@Test
public void testTrimTrailingSpacesZeroLength() throws Exception {
final StringBuilder buffer = new StringBuilder("");