This is an automated email from the ASF dual-hosted git repository.

snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git

commit 6c654988fe4576783c00ab3c6329e175231eda88
Author: Patrick Mezard <patr...@mezard.eu>
AuthorDate: Tue Jun 9 17:00:16 2020 +0200

    NUTCH-2790 indexer-csv: escape field leading quote character
    
    Before the change, the leading quote of a field value like '"value'
    would be left unescaped.
---
 .../java/org/apache/nutch/indexwriter/csv/CSVIndexWriter.java    | 3 +--
 .../org/apache/nutch/indexwriter/csv/TestCSVIndexWriter.java     | 9 +++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git 
a/src/plugin/indexer-csv/src/java/org/apache/nutch/indexwriter/csv/CSVIndexWriter.java
 
b/src/plugin/indexer-csv/src/java/org/apache/nutch/indexwriter/csv/CSVIndexWriter.java
index 160d03d..99c0702 100644
--- 
a/src/plugin/indexer-csv/src/java/org/apache/nutch/indexwriter/csv/CSVIndexWriter.java
+++ 
b/src/plugin/indexer-csv/src/java/org/apache/nutch/indexwriter/csv/CSVIndexWriter.java
@@ -405,13 +405,12 @@ public class CSVIndexWriter implements IndexWriter {
     if (max > maxFieldLength) {
       max = maxFieldLength;
     }
-    while (nextQuoteChar > 0 && nextQuoteChar < max) {
+    while (nextQuoteChar >= 0 && nextQuoteChar < max) {
       csvout.write(value.substring(start, nextQuoteChar).getBytes(encoding));
       csvout.write(escapeCharacter.bytes);
       csvout.write(quoteCharacter.bytes);
       start = nextQuoteChar + 1;
       nextQuoteChar = quoteCharacter.find(value, start);
-      if (nextQuoteChar > max) break;
     }
     csvout.write(value.substring(start, max).getBytes(encoding));
   }
diff --git 
a/src/plugin/indexer-csv/src/test/org/apache/nutch/indexwriter/csv/TestCSVIndexWriter.java
 
b/src/plugin/indexer-csv/src/test/org/apache/nutch/indexwriter/csv/TestCSVIndexWriter.java
index 761d042..5714cc2 100644
--- 
a/src/plugin/indexer-csv/src/test/org/apache/nutch/indexwriter/csv/TestCSVIndexWriter.java
+++ 
b/src/plugin/indexer-csv/src/test/org/apache/nutch/indexwriter/csv/TestCSVIndexWriter.java
@@ -159,6 +159,15 @@ public class TestCSVIndexWriter {
   }
 
   @Test
+  public void testCSVescapeLeadingQuotes() throws IOException {
+    String[] params = { CSVConstants.CSV_FIELDS, "test" };
+    String[] fields = { "test", "\"quote\"" };
+    String csv = getCSV(params, fields);
+    assertEquals("Leading quotes inside a quoted field must be escaped",
+        "\"\"\"quote\"\"\"", csv.trim());
+  }
+
+  @Test
   public void testCSVclipMaxLength() throws IOException {
     String[] params = { CSVConstants.CSV_FIELDS, "test",
         CSVConstants.CSV_MAXFIELDLENGTH, "8" };

Reply via email to