(commons-csv) branch master updated: A different take on PR #303

ggregory Tue, 12 Mar 2024 16:41:55 -0700

This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-csv.git



The following commit(s) were added to refs/heads/master by this push:
     new c137e804 A different take on PR #303
c137e804 is described below

commit c137e80407af4eba4766024fe2e566b224ed0d7b
Author: Gary Gregory <[email protected]>
AuthorDate: Tue Mar 12 19:41:43 2024 -0400

    A different take on PR #303
    
    Add support for trailing text after the closing quote, and EOF without a
    final closing quote, for Excel compatibility. Fix a unit test and add a
    RAT exclude for the sample CSV file.
---
 pom.xml                                            |   4 +-
 src/changes/changes.xml                            |   5 +-
 .../java/org/apache/commons/csv/CSVFormat.java     | 162 +++++++++++++++------
 src/main/java/org/apache/commons/csv/Lexer.java    |  18 ++-
 .../java/org/apache/commons/csv/CSVParserTest.java |  18 +--
 .../java/org/apache/commons/csv/LexerTest.java     |  26 ++++
 6 files changed, 170 insertions(+), 63 deletions(-)

diff --git a/pom.xml b/pom.xml
index 9e0ab765..4f815a30 100644
--- a/pom.xml
+++ b/pom.xml
@@ -23,7 +23,7 @@
     <version>67</version>
   </parent>
   <artifactId>commons-csv</artifactId>
-  <version>1.10.1-SNAPSHOT</version>
+  <version>1.11.0-SNAPSHOT</version>
   <name>Apache Commons CSV</name>
   <url>https://commons.apache.org/proper/commons-csv/</url>
   <inceptionYear>2005</inceptionYear>
@@ -161,7 +161,7 @@
   </distributionManagement>
 
   <properties>
-    <commons.release.version>1.10.1</commons.release.version>
+    <commons.release.version>1.11.0</commons.release.version>
     <commons.release.desc>(Java 8 or above)</commons.release.desc>
     <!-- The RC version used in the staging repository URL. -->
     <commons.rc.version>RC1</commons.rc.version>
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 5d1a4ceb..a8ff2b2b 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -42,7 +42,9 @@
   <body>
     <release version="1.10.1" date="YYYY-MM-DD" description="Feature and bug 
fix release (Java 8 or above)">
       <!-- ADD -->
-      <action issue="CSV-308" type="fix" dev="ggregory" due-to="Buddhi De 
Silva, Gary Gregory">[Javadoc] Add example to CSVFormat#setHeaderComments() 
#344.</action>
+      <action issue="CSV-308" type="add" dev="ggregory" due-to="Buddhi De 
Silva, Gary Gregory">[Javadoc] Add example to CSVFormat#setHeaderComments() 
#344.</action>
+      <action type="add" dev="ggregory" due-to="DamjanJovanovic, Gary 
Gregory">Add and use CSVFormat#setTrailingData(boolean) in CSVFormat.EXCEL for 
Excel compatibility #303.</action>
+      <action type="add" dev="ggregory" due-to="DamjanJovanovic, Gary 
Gregory">Add and use CSVFormat#setLenientEof(boolean) in CSVFormat.EXCEL for 
Excel compatibility #303.</action>
       <!-- FIX -->
       <action type="fix" issue="CSV-306" dev="ggregory" due-to="Sam Ng, Bruno 
P. Kinoshita">Replace deprecated method in user guide, update external link 
#324, #325.</action>
       <action type="fix" dev="ggregory" due-to="Seth Falco, Bruno P. 
Kinoshita">Document duplicate header behavior #309.</action>
@@ -53,6 +55,7 @@
       <action type="fix" issue="CSV-311" dev="ggregory" due-to="Christian 
Feuersaenger, Gary Gregory">OutOfMemory for very long rows despite using column 
value of type Reader.</action>
       <action type="fix" dev="ggregory" due-to="Gary Gregory">Use 
try-with-resources to manage JDBC Clob in 
CSVPrinter.printRecords(ResultSet).</action>
       <action type="fix" dev="ggregory" due-to="Gary Gregory">JDBC Blob 
columns are now output as Base64 instead of Object#toString(), which usually is 
InputStream#toString().</action>
+      <action type="fix" dev="ggregory" due-to="DamjanJovanovic, Gary 
Gregory">Support unusual Excel use cases: Add support for trailing data after 
the closing quote, and EOF without a final closing quote #303.</action>
       <!-- UPDATE -->
       <action type="update" dev="ggregory" due-to="Gary Gregory">Bump 
commons-io:commons-io: from 2.11.0 to 2.15.1.</action>
       <action type="update" dev="ggregory" due-to="Gary Gregory, 
Dependabot">Bump commons-parent from 57 to 67.</action>
diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java 
b/src/main/java/org/apache/commons/csv/CSVFormat.java
index 522b7785..754d2229 100644
--- a/src/main/java/org/apache/commons/csv/CSVFormat.java
+++ b/src/main/java/org/apache/commons/csv/CSVFormat.java
@@ -248,6 +248,10 @@ public final class CSVFormat implements Serializable {
 
         private boolean skipHeaderRecord;
 
+        private boolean lenientEof;
+
+        private boolean trailingData;
+
         private boolean trailingDelimiter;
 
         private boolean trim;
@@ -267,6 +271,8 @@ public final class CSVFormat implements Serializable {
             this.headers = csvFormat.headers;
             this.skipHeaderRecord = csvFormat.skipHeaderRecord;
             this.ignoreHeaderCase = csvFormat.ignoreHeaderCase;
+            this.lenientEof = csvFormat.lenientEof;
+            this.trailingData = csvFormat.trailingData;
             this.trailingDelimiter = csvFormat.trailingDelimiter;
             this.trim = csvFormat.trim;
             this.autoFlush = csvFormat.autoFlush;
@@ -689,6 +695,18 @@ public final class CSVFormat implements Serializable {
             return this;
         }
 
+        /**
+         * Sets whether reading end-of-file is allowed even when input is 
malformed, helps Excel compatibility.
+         *
+         * @param lenientEof whether reading end-of-file is allowed even when 
input is malformed, helps Excel compatibility.
+         * @return This instance.
+         * @since 1.11.0
+         */
+        public Builder setLenientEof(final boolean lenientEof) {
+            this.lenientEof = lenientEof;
+            return this;
+        }
+
         /**
          * Sets the String to convert to and from {@code null}. No 
substitution occurs if {@code null}.
          *
@@ -785,6 +803,18 @@ public final class CSVFormat implements Serializable {
             return this;
         }
 
+        /**
+         * Sets whether reading trailing data is allowed in records, helps 
Excel compatibility.
+         *
+         * @param trailingData whether reading trailing data is allowed in 
records, helps Excel compatibility.
+         * @return This instance.
+         * @since 1.11.0
+         */
+        public Builder setTrailingData(final boolean trailingData) {
+            this.trailingData = trailingData;
+            return this;
+        }
+
         /**
          * Sets whether to add a trailing delimiter.
          *
@@ -914,7 +944,7 @@ public final class CSVFormat implements Serializable {
      * @see Predefined#Default
      */
     public static final CSVFormat DEFAULT = new CSVFormat(COMMA, 
DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, null, null, null, 
false, false, false,
-            false, false, false, DuplicateHeaderMode.ALLOW_ALL);
+            false, false, false, DuplicateHeaderMode.ALLOW_ALL, false, false);
 
     /**
      * Excel file format (using a comma as the value delimiter). Note that the 
actual value delimiter used by Excel is locale-dependent, it might be necessary
@@ -935,9 +965,11 @@ public final class CSVFormat implements Serializable {
      * <li>{@code setDelimiter(',')}</li>
      * <li>{@code setQuote('"')}</li>
      * <li>{@code setRecordSeparator("\r\n")}</li>
+     * <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
      * <li>{@code setIgnoreEmptyLines(false)}</li>
      * <li>{@code setAllowMissingColumnNames(true)}</li>
-     * <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
+     * <li>{@code setTrailingData(true)}</li>
+     * <li>{@code setLenientEof(true)}</li>
      * </ul>
      * <p>
      * Note: This is currently like {@link #RFC4180} plus {@link 
Builder#setAllowMissingColumnNames(boolean) 
Builder#setAllowMissingColumnNames(true)} and
@@ -950,6 +982,8 @@ public final class CSVFormat implements Serializable {
     public static final CSVFormat EXCEL = DEFAULT.builder()
             .setIgnoreEmptyLines(false)
             .setAllowMissingColumnNames(true)
+            .setTrailingData(true)
+            .setLenientEof(true)
             .build();
     // @formatter:on
 
@@ -1372,7 +1406,7 @@ public final class CSVFormat implements Serializable {
      */
     public static CSVFormat newFormat(final char delimiter) {
         return new CSVFormat(String.valueOf(delimiter), null, null, null, 
null, false, false, null, null, null, null, false, false, false, false, false, 
false,
-                DuplicateHeaderMode.ALLOW_ALL);
+                DuplicateHeaderMode.ALLOW_ALL, false, false);
     }
 
     static String[] toStringArray(final Object[] values) {
@@ -1455,6 +1489,10 @@ public final class CSVFormat implements Serializable {
 
     private final boolean skipHeaderRecord;
 
+    private final boolean lenientEof;
+
+    private final boolean trailingData;
+
     private final boolean trailingDelimiter;
 
     private final boolean trim;
@@ -1474,6 +1512,8 @@ public final class CSVFormat implements Serializable {
         this.headers = builder.headers;
         this.skipHeaderRecord = builder.skipHeaderRecord;
         this.ignoreHeaderCase = builder.ignoreHeaderCase;
+        this.lenientEof = builder.lenientEof;
+        this.trailingData = builder.trailingData;
         this.trailingDelimiter = builder.trailingDelimiter;
         this.trim = builder.trim;
         this.autoFlush = builder.autoFlush;
@@ -1494,22 +1534,24 @@ public final class CSVFormat implements Serializable {
      * @param ignoreEmptyLines        {@code true} when the parser should skip 
empty lines.
      * @param recordSeparator         the line separator to use for output.
      * @param nullString              the line separator to use for output.
-     * @param headerComments          the comments to be printed by the 
Printer before the actual CSV data.
-     * @param header                  the header
-     * @param skipHeaderRecord        if {@code true} the header row will be 
skipped
-     * @param allowMissingColumnNames if {@code true} the missing column names 
are allowed when parsing the header line
-     * @param ignoreHeaderCase        if {@code true} header names will be 
accessed ignoring case when parsing input
-     * @param trim                    if {@code true} next record value will 
be trimmed
-     * @param trailingDelimiter       if {@code true} the trailing delimiter 
wil be added before record separator (if set)
-     * @param autoFlush               if {@code true} the underlying stream 
will be flushed before closing
-     * @param duplicateHeaderMode     the behavior when handling duplicate 
headers
+     * @param headerComments          the comments to be printed by the 
Printer before the actual CSV data..
+     * @param header                  the header.
+     * @param skipHeaderRecord        if {@code true} the header row will be 
skipped.
+     * @param allowMissingColumnNames if {@code true} the missing column names 
are allowed when parsing the header line.
+     * @param ignoreHeaderCase        if {@code true} header names will be 
accessed ignoring case when parsing input.
+     * @param trim                    if {@code true} next record value will 
be trimmed.
+     * @param trailingDelimiter       if {@code true} the trailing delimiter 
wil be added before record separator (if set)..
+     * @param autoFlush               if {@code true} the underlying stream 
will be flushed before closing.
+     * @param duplicateHeaderMode     the behavior when handling duplicate 
headers.
+     * @param trailingData            whether reading trailing data is allowed 
in records, helps Excel compatibility.
+     * @param lenientEof              whether reading end-of-file is allowed 
even when input is malformed, helps Excel compatibility.
      * @throws IllegalArgumentException if the delimiter is a line break 
character.
      */
     private CSVFormat(final String delimiter, final Character quoteChar, final 
QuoteMode quoteMode, final Character commentStart, final Character escape,
             final boolean ignoreSurroundingSpaces, final boolean 
ignoreEmptyLines, final String recordSeparator, final String nullString,
             final Object[] headerComments, final String[] header, final 
boolean skipHeaderRecord, final boolean allowMissingColumnNames,
             final boolean ignoreHeaderCase, final boolean trim, final boolean 
trailingDelimiter, final boolean autoFlush,
-            final DuplicateHeaderMode duplicateHeaderMode) {
+            final DuplicateHeaderMode duplicateHeaderMode, final boolean 
trailingData, final boolean lenientEof) {
         this.delimiter = delimiter;
         this.quoteCharacter = quoteChar;
         this.quoteMode = quoteMode;
@@ -1524,6 +1566,8 @@ public final class CSVFormat implements Serializable {
         this.headers = clone(header);
         this.skipHeaderRecord = skipHeaderRecord;
         this.ignoreHeaderCase = ignoreHeaderCase;
+        this.lenientEof = lenientEof;
+        this.trailingData = trailingData;
         this.trailingDelimiter = trailingDelimiter;
         this.trim = trim;
         this.autoFlush = autoFlush;
@@ -1571,18 +1615,23 @@ public final class CSVFormat implements Serializable {
         if (this == obj) {
             return true;
         }
-        if (obj == null || getClass() != obj.getClass()) {
+        if (obj == null) {
+            return false;
+        }
+        if (getClass() != obj.getClass()) {
             return false;
         }
         final CSVFormat other = (CSVFormat) obj;
-        return duplicateHeaderMode == other.duplicateHeaderMode && 
allowMissingColumnNames == other.allowMissingColumnNames &&
-                autoFlush == other.autoFlush && Objects.equals(commentMarker, 
other.commentMarker) && Objects.equals(delimiter, other.delimiter) &&
-                Objects.equals(escapeCharacter, other.escapeCharacter) && 
Arrays.equals(headers, other.headers) &&
-                Arrays.equals(headerComments, other.headerComments) && 
ignoreEmptyLines == other.ignoreEmptyLines &&
-                ignoreHeaderCase == other.ignoreHeaderCase && 
ignoreSurroundingSpaces == other.ignoreSurroundingSpaces &&
-                Objects.equals(nullString, other.nullString) && 
Objects.equals(quoteCharacter, other.quoteCharacter) && quoteMode == 
other.quoteMode &&
-                Objects.equals(quotedNullString, other.quotedNullString) && 
Objects.equals(recordSeparator, other.recordSeparator) &&
-                skipHeaderRecord == other.skipHeaderRecord && 
trailingDelimiter == other.trailingDelimiter && trim == other.trim;
+        return allowMissingColumnNames == other.allowMissingColumnNames && 
autoFlush == other.autoFlush &&
+                Objects.equals(commentMarker, other.commentMarker) && 
Objects.equals(delimiter, other.delimiter) &&
+                duplicateHeaderMode == other.duplicateHeaderMode && 
Objects.equals(escapeCharacter, other.escapeCharacter) &&
+                Arrays.equals(headerComments, other.headerComments) && 
Arrays.equals(headers, other.headers) &&
+                ignoreEmptyLines == other.ignoreEmptyLines && ignoreHeaderCase 
== other.ignoreHeaderCase &&
+                ignoreSurroundingSpaces == other.ignoreSurroundingSpaces && 
lenientEof == other.lenientEof &&
+                Objects.equals(nullString, other.nullString) && 
Objects.equals(quoteCharacter, other.quoteCharacter) &&
+                quoteMode == other.quoteMode && 
Objects.equals(quotedNullString, other.quotedNullString) &&
+                Objects.equals(recordSeparator, other.recordSeparator) && 
skipHeaderRecord == other.skipHeaderRecord &&
+                trailingData == other.trailingData && trailingDelimiter == 
other.trailingDelimiter && trim == other.trim;
     }
 
     private void escape(final char c, final Appendable appendable) throws 
IOException {
@@ -1808,6 +1857,16 @@ public final class CSVFormat implements Serializable {
         return ignoreSurroundingSpaces;
     }
 
+    /**
+     * Gets whether reading end-of-file is allowed even when input is 
malformed, helps Excel compatibility.
+     *
+     * @return whether reading end-of-file is allowed even when input is 
malformed, helps Excel compatibility.
+     * @since 1.11.0
+     */
+    public boolean getLenientEof() {
+        return lenientEof;
+    }
+
     /**
      * Gets the String to convert to and from {@code null}.
      * <ul>
@@ -1857,6 +1916,16 @@ public final class CSVFormat implements Serializable {
         return skipHeaderRecord;
     }
 
+    /**
+     * Gets whether reading trailing data is allowed in records, helps Excel 
compatibility.
+     *
+     * @return whether reading trailing data is allowed in records, helps 
Excel compatibility.
+     * @since 1.11.0
+     */
+    public boolean getTrailingData() {
+        return trailingData;
+    }
+
     /**
      * Gets whether to add a trailing delimiter.
      *
@@ -1881,11 +1950,12 @@ public final class CSVFormat implements Serializable {
     public int hashCode() {
         final int prime = 31;
         int result = 1;
-        result = prime * result + Arrays.hashCode(headers);
         result = prime * result + Arrays.hashCode(headerComments);
-        return prime * result + Objects.hash(duplicateHeaderMode, 
allowMissingColumnNames, autoFlush, commentMarker, delimiter, escapeCharacter,
-                ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, 
nullString, quoteCharacter, quoteMode, quotedNullString, recordSeparator,
-                skipHeaderRecord, trailingDelimiter, trim);
+        result = prime * result + Arrays.hashCode(headers);
+        result = prime * result + Objects.hash(allowMissingColumnNames, 
autoFlush, commentMarker, delimiter, duplicateHeaderMode, escapeCharacter,
+                ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, 
lenientEof, nullString, quoteCharacter, quoteMode, quotedNullString,
+                recordSeparator, skipHeaderRecord, trailingData, 
trailingDelimiter, trim);
+        return result;
     }
 
     /**
@@ -2006,6 +2076,26 @@ public final class CSVFormat implements Serializable {
         return new CSVPrinter(new OutputStreamWriter(new 
FileOutputStream(out), charset), this);
     }
 
+    private void print(final InputStream inputStream, final Appendable out, 
final boolean newRecord) throws IOException {
+        // InputStream is never null here
+        // There is nothing to escape when quoting is used which is the 
default.
+        if (!newRecord) {
+            append(getDelimiterString(), out);
+        }
+        final boolean quoteCharacterSet = isQuoteCharacterSet();
+        if (quoteCharacterSet) {
+            append(getQuoteCharacter().charValue(), out);
+        }
+        // Stream the input to the output without reading or holding the whole 
value in memory.
+        // AppendableOutputStream cannot "close" an Appendable.
+        try (OutputStream outputStream = new Base64OutputStream(new 
AppendableOutputStream<>(out))) {
+            IOUtils.copy(inputStream, outputStream);
+        }
+        if (quoteCharacterSet) {
+            append(getQuoteCharacter().charValue(), out);
+        }
+    }
+
     /**
      * Prints the {@code value} as the next value on the line to {@code out}. 
The value will be escaped or encapsulated as needed. Useful when one wants to
      * avoid creating CSVPrinters. Trims the value if {@link #getTrim()} is 
true.
@@ -2081,26 +2171,6 @@ public final class CSVFormat implements Serializable {
         return print(Files.newBufferedWriter(out, charset));
     }
 
-    private void print(final InputStream inputStream, final Appendable out, 
final boolean newRecord) throws IOException {
-        // InputStream is never null here
-        // There is nothing to escape when quoting is used which is the 
default.
-        if (!newRecord) {
-            append(getDelimiterString(), out);
-        }
-        final boolean quoteCharacterSet = isQuoteCharacterSet();
-        if (quoteCharacterSet) {
-            append(getQuoteCharacter().charValue(), out);
-        }
-        // Stream the input to the output without reading or holding the whole 
value in memory.
-        // AppendableOutputStream cannot "close" an Appendable.
-        try (OutputStream outputStream = new Base64OutputStream(new 
AppendableOutputStream<>(out))) {
-            IOUtils.copy(inputStream, outputStream);
-        }
-        if (quoteCharacterSet) {
-            append(getQuoteCharacter().charValue(), out);
-        }
-    }
-
     private void print(final Reader reader, final Appendable out, final 
boolean newRecord) throws IOException {
         // Reader is never null here
         if (!newRecord) {
diff --git a/src/main/java/org/apache/commons/csv/Lexer.java 
b/src/main/java/org/apache/commons/csv/Lexer.java
index ef379ec3..17113227 100644
--- a/src/main/java/org/apache/commons/csv/Lexer.java
+++ b/src/main/java/org/apache/commons/csv/Lexer.java
@@ -53,9 +53,10 @@ final class Lexer implements Closeable {
     private final char escape;
     private final char quoteChar;
     private final char commentStart;
-
     private final boolean ignoreSurroundingSpaces;
     private final boolean ignoreEmptyLines;
+    private final boolean lenientEof;
+    private final boolean trailingData;
 
     /** The input stream */
     private final ExtendedBufferedReader reader;
@@ -71,6 +72,8 @@ final class Lexer implements Closeable {
         this.commentStart = mapNullToDisabled(format.getCommentMarker());
         this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces();
         this.ignoreEmptyLines = format.getIgnoreEmptyLines();
+        this.lenientEof = format.getLenientEof();
+        this.trailingData = format.getTrailingData();
         this.delimiterBuf = new char[delimiter.length - 1];
         this.escapeDelimiterBuf = new char[2 * delimiter.length - 1];
     }
@@ -364,14 +367,21 @@ final class Lexer implements Closeable {
                             token.type = EORECORD;
                             return token;
                         }
-                        if (!Character.isWhitespace((char)c)) {
+                        if (trailingData) {
+                            token.content.append((char) c);
+                        } else if (!Character.isWhitespace((char) c)) {
                             // error invalid char between token and next 
delimiter
-                            throw new IOException("Invalid char between 
encapsulated token and delimiter at line: " +
-                                    getCurrentLineNumber() + ", position: " + 
getCharacterPosition());
+                            throw new IOException(String.format("Invalid char 
between encapsulated token and delimiter at line: %,d, position: %,d",
+                                    getCurrentLineNumber(), 
getCharacterPosition()));
                         }
                     }
                 }
             } else if (isEndOfFile(c)) {
+                if (lenientEof) {
+                    token.type = Token.Type.EOF;
+                    token.isReady = true; // There is data at EOF
+                    return token;
+                }
                 // error condition (end of file before end of token)
                 throw new IOException("(startline " + startLineNumber +
                         ") EOF reached before encapsulated token finished");
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java 
b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index 52173287..b3a51378 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -299,7 +299,6 @@ public class CSVParserTest {
     }
 
     @Test
-    @Disabled("PR 295 does not work")
     public void testCSV141Excel() throws Exception {
         testCSV141Ok(CSVFormat.EXCEL);
     }
@@ -357,16 +356,15 @@ public class CSVParserTest {
             record = parser.nextRecord();
             assertEquals("1414770318327", record.get(0));
             assertEquals("android.widget.EditText", record.get(1));
-            assertEquals("pass sem1", record.get(2));
-            assertEquals(3, record.size());
-            // row 4
+            assertEquals("pass sem1\n1414770318628\"", record.get(2));
+            assertEquals("android.widget.EditText", record.get(3));
+            assertEquals("pass sem1 _84*|*", record.get(4));
+            assertEquals("0", record.get(5));
+            assertEquals("pass sem1\n", record.get(6));
+            assertEquals(7, record.size());
+            // EOF
             record = parser.nextRecord();
-            assertEquals("1414770318628", record.get(0));
-            assertEquals("android.widget.EditText", record.get(1));
-            assertEquals("pass sem1 _84*|*", record.get(2));
-            assertEquals("0", record.get(3));
-            assertEquals("pass sem1", record.get(4));
-            assertEquals(5, record.size());
+            assertNull(record);
         }
     }
 
diff --git a/src/test/java/org/apache/commons/csv/LexerTest.java 
b/src/test/java/org/apache/commons/csv/LexerTest.java
index f36eaa45..3bc55a00 100644
--- a/src/test/java/org/apache/commons/csv/LexerTest.java
+++ b/src/test/java/org/apache/commons/csv/LexerTest.java
@@ -196,6 +196,19 @@ public class LexerTest {
         }
     }
 
+    @Test
+    public void testEOFWithoutClosingQuote() throws Exception {
+        final String code = "a,\"b";
+        try (final Lexer parser = createLexer(code, 
CSVFormat.Builder.create().setLenientEof(true).build())) {
+            assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
+            assertThat(parser.nextToken(new Token()), matches(EOF, "b"));
+        }
+        try (final Lexer parser = createLexer(code, 
CSVFormat.Builder.create().setLenientEof(false).build())) {
+            assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
+            assertThrows(IOException.class, () -> parser.nextToken(new 
Token()));
+        }
+    }
+
     @Test // TODO is this correct? Do we expect <esc>BACKSPACE to be unescaped?
     public void testEscapedBackspace() throws Exception {
         try (final Lexer lexer = createLexer("character\\" + BACKSPACE + 
"Escaped", formatWithEscaping)) {
@@ -423,6 +436,19 @@ public class LexerTest {
         }
     }
 
+    @Test
+    public void testTrailingTextAfterQuote() throws Exception {
+        final String code = "\"a\" b,\"a\" \" b,\"a\" b \"\"";
+        try (final Lexer parser = createLexer(code, 
CSVFormat.Builder.create().setTrailingData(true).build())) {
+            assertThat(parser.nextToken(new Token()), matches(TOKEN, "a b"));
+            assertThat(parser.nextToken(new Token()), matches(TOKEN, "a \" 
b"));
+            assertThat(parser.nextToken(new Token()), matches(EOF, "a b 
\"\""));
+        }
+        try (final Lexer parser = createLexer(code, 
CSVFormat.Builder.create().setTrailingData(false).build())) {
+            assertThrows(IOException.class, () -> parser.nextToken(new 
Token()));
+        }
+    }
+
     @Test
     public void testTrimTrailingSpacesZeroLength() throws Exception {
         final StringBuilder buffer = new StringBuilder("");

(commons-csv) branch master updated: A different take on PR #303

Reply via email to