Author: siren Date: Thu May 10 09:13:15 2007 New Revision: 536909 URL: http://svn.apache.org/viewvc?view=rev&rev=536909 Log: NUTCH-456 Parse msexcel plugin speedup contributed by Heiko Dietze
Modified: lucene/nutch/trunk/CHANGES.txt lucene/nutch/trunk/src/plugin/parse-msexcel/src/java/org/apache/nutch/parse/msexcel/ExcelExtractor.java Modified: lucene/nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?view=diff&rev=536909&r1=536908&r2=536909 ============================================================================== --- lucene/nutch/trunk/CHANGES.txt (original) +++ lucene/nutch/trunk/CHANGES.txt Thu May 10 09:13:15 2007 @@ -10,7 +10,8 @@ 3. NUTCH-393 - Indexer should handle null documents returned by filters. (Eelco Lempsink via ab) - + 4. NUTCH-456 - Parse msexcel plugin speedup (Heiko Dietze via siren) + Release 0.9 - 2007-04-02 Modified: lucene/nutch/trunk/src/plugin/parse-msexcel/src/java/org/apache/nutch/parse/msexcel/ExcelExtractor.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-msexcel/src/java/org/apache/nutch/parse/msexcel/ExcelExtractor.java?view=diff&rev=536909&r1=536908&r2=536909 ============================================================================== --- lucene/nutch/trunk/src/plugin/parse-msexcel/src/java/org/apache/nutch/parse/msexcel/ExcelExtractor.java (original) +++ lucene/nutch/trunk/src/plugin/parse-msexcel/src/java/org/apache/nutch/parse/msexcel/ExcelExtractor.java Thu May 10 09:13:15 2007 @@ -40,10 +40,10 @@ protected String extractText(InputStream input) throws Exception { - String resultText = ""; + StringBuilder resultText = new StringBuilder(); HSSFWorkbook wb = new HSSFWorkbook(input); if (wb == null) { - return resultText; + return resultText.toString(); } HSSFSheet sheet; @@ -69,25 +69,24 @@ for (int k=0; k<cNum; k++) { if ((cell = row.getCell((short) k)) != null) { /*if(HSSFDateUtil.isCellDateFormatted(cell) == true) { - resultText += cell.getDateCellValue().toString() + " "; + resultText.append(cell.getDateCellValue().toString()) } else */ if (cell.getCellType() == HSSFCell.CELL_TYPE_STRING) { - resultText += cell.getStringCellValue() + " "; + resultText.append(cell.getStringCellValue()).append(" "); } else if (cell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC) { - Double d = new Double(cell.getNumericCellValue()); - resultText += d.toString() + " "; + double d = cell.getNumericCellValue(); + resultText.append(d).append(" "); } /* else if(cell.getCellType() == HSSFCell.CELL_TYPE_FORMULA){ - resultText += cell.getCellFormula() + " "; + resultText.append(cell.getCellFormula()); } */ } } } } - return resultText; + return resultText.toString(); } } - ------------------------------------------------------------------------- This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ _______________________________________________ Nutch-cvs mailing list Nutch-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/nutch-cvs