Author: jeastman
Date: Sat Oct 16 17:47:54 2010
New Revision: 1023340
URL: http://svn.apache.org/viewvc?rev=1023340&view=rev
Log:
CHD3 adds a _SUCCESS file to the vectors directory and the RandomSeedGenerator
was trying to process it as a vector file. All tests run, including
build-reuters.sh on CHD3
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java?rev=1023340&r1=1023339&r2=1023340&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
Sat Oct 16 17:47:54 2010
@@ -78,8 +78,8 @@ public final class RandomSeedGenerator {
int nextClusterId = 0;
for (FileStatus fileStatus : inputFiles) {
- if (fileStatus.isDir()) {
- continue; // select only the top level files
+ if (fileStatus.isDir() ||
fileStatus.getPath().getName().startsWith("_")) {
+ continue; // select only the top level files that do not begin with
"_" (Cloudera CHD3 adds _SUCCESS file)
}
SequenceFile.Reader reader = new SequenceFile.Reader(fs,
fileStatus.getPath(), conf);
Writable key =
reader.getKeyClass().asSubclass(Writable.class).newInstance();