Author: jerome Date: Tue May 9 16:06:17 2006 New Revision: 405566 URL: http://svn.apache.org/viewcvs?rev=405566&view=rev Log: NUTCH-134 - No more needs for the clusterer to remove html tags from summaries
Modified: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/LocalNutchInputComponent.java Modified: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/LocalNutchInputComponent.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/LocalNutchInputComponent.java?rev=405566&r1=405565&r2=405566&view=diff ============================================================================== --- lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/LocalNutchInputComponent.java (original) +++ lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/LocalNutchInputComponent.java Tue May 9 16:06:17 2006 @@ -34,7 +34,6 @@ import com.dawidweiss.carrot.core.local.ProcessingException; import com.dawidweiss.carrot.core.local.RequestContext; import com.dawidweiss.carrot.core.local.clustering.*; -import com.dawidweiss.carrot.util.common.StringUtils; /** * A local input component that ignores the query passed from the @@ -103,7 +102,7 @@ // produce 'documents' for successor components. final RawDocumentsConsumer consumer = (RawDocumentsConsumer) next; for (int i=0;i<summaries.length;i++) { - consumer.addDocument(new NutchDocument(i, details[i], htmlToText(summaries[i]), defaultLanguage)); + consumer.addDocument(new NutchDocument(i, details[i], summaries[i], defaultLanguage)); } } @@ -121,14 +120,4 @@ return SUCCESSOR_CAPABILITIES; } - /** - * Converts a html chunk to plain text. - * - * This method is only required because Nutch's summaries are in HTML. - * I guess it would be possible to get rid of the code below by - * adding patches/ methods to Nutch that return plain text summaries. - */ - private final String htmlToText(String html) { - return StringUtils.removeMarkup(html); - } }