Revision: 52535
Author:   daniel
Date:     2009-06-29 08:53:13 +0000 (Mon, 29 Jun 2009)

Log Message:
-----------
allow bad csv lines to be skipped

Modified Paths:
--------------
    
trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/AbstractIntegratorApp.java
    
trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/FeatureSetSourceDescriptor.java
    
trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/TsvFeatureSetCursor.java

Modified: 
trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/AbstractIntegratorApp.java
===================================================================
--- 
trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/AbstractIntegratorApp.java
        2009-06-28 22:43:38 UTC (rev 52534)
+++ 
trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/AbstractIntegratorApp.java
        2009-06-29 08:53:13 UTC (rev 52535)
@@ -9,6 +9,7 @@
 import java.sql.Connection;
 import java.sql.ResultSet;
 import java.sql.SQLException;
+import java.text.ParseException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -22,10 +23,12 @@
 import de.brightbyte.db.DatabaseSchema;
 import de.brightbyte.db.SqlDialect;
 import de.brightbyte.db.SqlScriptRunner;
+import de.brightbyte.io.ChunkingCursor;
 import de.brightbyte.io.IOUtil;
 import de.brightbyte.io.LineCursor;
 import de.brightbyte.text.Chunker;
 import de.brightbyte.util.BeanUtils;
+import de.brightbyte.util.LoggingErrorHandler;
 import de.brightbyte.util.PersistenceException;
 import de.brightbyte.wikiword.DatasetIdentifier;
 import de.brightbyte.wikiword.StoreBackedApp;
@@ -278,6 +281,8 @@
                        
                        fsc = new TsvFeatureSetCursor(lines, chunker);
                        
+                       if (sourceDescriptor.getSkipBadRows()) 
((TsvFeatureSetCursor)fsc).setParseErrorHandler( new 
LoggingErrorHandler<ChunkingCursor, ParseException, PersistenceException>(out));
+                       
                        if (fields!=null) {
                                if (sourceDescriptor.getSkipHeader()) 
((TsvFeatureSetCursor)fsc).readFields();
                                ((TsvFeatureSetCursor)fsc).setFields(fields);

Modified: 
trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/FeatureSetSourceDescriptor.java
===================================================================
--- 
trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/FeatureSetSourceDescriptor.java
   2009-06-28 22:43:38 UTC (rev 52534)
+++ 
trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/FeatureSetSourceDescriptor.java
   2009-06-29 08:53:13 UTC (rev 52535)
@@ -106,7 +106,7 @@
        }
 
        public boolean getSkipHeader() {
-               return getTweak("skip-header", false);
+               return getTweak("csv-skip-header", false);
        }
 
        public String getSourceFileFormat() {
@@ -155,4 +155,8 @@
                return getTweak("source-table", null);
        }
 
+       public boolean getSkipBadRows() {
+               return getTweak("csv-skip-bad-rows", false);
+       }
+
 }

Modified: 
trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/TsvFeatureSetCursor.java
===================================================================
--- 
trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/TsvFeatureSetCursor.java
     2009-06-28 22:43:38 UTC (rev 52534)
+++ 
trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/TsvFeatureSetCursor.java
     2009-06-29 08:53:13 UTC (rev 52535)
@@ -4,6 +4,7 @@
 import java.io.InputStream;
 import java.io.Reader;
 import java.io.UnsupportedEncodingException;
+import java.text.ParseException;
 import java.util.List;
 
 import de.brightbyte.data.cursor.DataCursor;
@@ -11,6 +12,8 @@
 import de.brightbyte.io.ChunkingCursor;
 import de.brightbyte.text.Chunker;
 import de.brightbyte.text.CsvLineChunker;
+import de.brightbyte.util.ErrorHandler;
+import de.brightbyte.util.LoggingErrorHandler;
 import de.brightbyte.util.PersistenceException;
 
 public class TsvFeatureSetCursor implements DataCursor<FeatureSet> {
@@ -46,6 +49,14 @@
                this.source = source;
        }
        
+       public void setParseErrorHandler(ErrorHandler<ChunkingCursor, 
ParseException, PersistenceException> errorHandler) {
+               if (source instanceof ChunkingCursor) {
+                       
((ChunkingCursor)source).setParseErrorHandler(errorHandler);
+               } else {
+                       throw new IllegalStateException("source is not a 
ChunkingCursor, can't set error handler");
+               }
+       }
+       
        public void setFields(String[] fields) {
                if (fields==null) throw new NullPointerException();
                this.fields = fields;



_______________________________________________
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to