Author: gsingers
Date: Fri Mar 25 14:28:12 2011
New Revision: 1085397
URL: http://svn.apache.org/viewvc?rev=1085397&view=rev
Log:
MAHOUT-548: add in some CSV support for creating vectors, as well as a few
other fixes for working with vectors
Added:
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java
Modified:
mahout/trunk/utils/pom.xml
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java
Modified: mahout/trunk/utils/pom.xml
URL:
http://svn.apache.org/viewvc/mahout/trunk/utils/pom.xml?rev=1085397&r1=1085396&r2=1085397&view=diff
==============================================================================
--- mahout/trunk/utils/pom.xml (original)
+++ mahout/trunk/utils/pom.xml Fri Mar 25 14:28:12 2011
@@ -142,6 +142,11 @@
<type>test-jar</type>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.apache.solr</groupId>
+ <artifactId>solr-commons-csv</artifactId>
+ <version>1.4.1</version>
+ </dependency>
<dependency>
<groupId>junit</groupId>
Modified:
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java?rev=1085397&r1=1085396&r2=1085397&view=diff
==============================================================================
---
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
(original)
+++
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
Fri Mar 25 14:28:12 2011
@@ -77,16 +77,22 @@ public final class VectorDumper {
Option dictTypeOpt =
obuilder.withLongName("dictionaryType").withRequired(false).withArgument(
abuilder.withName("dictionaryType").withMinimum(1).withMaximum(1).create()).withDescription(
"The dictionary file type
(text|sequencefile)").withShortName("dt").create();
- Option centroidJSonOpt =
obuilder.withLongName("json").withRequired(false).withDescription(
- "Output the centroid as JSON. Otherwise it substitutes in the
terms for vector cell entries")
+ Option jsonOpt =
obuilder.withLongName("json").withRequired(false).withDescription(
+ "Output the Vector as JSON. Otherwise it substitutes in the terms
for vector cell entries")
.withShortName("j").create();
+ Option csvOpt =
obuilder.withLongName("csv").withRequired(false).withDescription(
+ "Output the Vector as CSV. Otherwise it substitutes in the terms
for vector cell entries")
+ .withShortName("c").create();
+ Option namesAsCommentsOpt =
obuilder.withLongName("namesAsComments").withRequired(false).withDescription(
+ "If using CSV output, optionally add a comment line for each
NamedVector (if the vector is one) printing out the name")
+ .withShortName("n").create();
Option sizeOpt = obuilder.withLongName("sizeOnly").withRequired(false).
withDescription("Dump only the size of the
vector").withShortName("sz").create();
Option helpOpt = obuilder.withLongName("help").withDescription("Print out
help").withShortName("h")
.create();
Group group =
gbuilder.withName("Options").withOption(seqOpt).withOption(outputOpt).withOption(
-
dictTypeOpt).withOption(dictOpt).withOption(centroidJSonOpt).withOption(vectorAsKeyOpt).withOption(
+
dictTypeOpt).withOption(dictOpt).withOption(csvOpt).withOption(vectorAsKeyOpt).withOption(
printKeyOpt).withOption(sizeOpt).create();
try {
@@ -122,10 +128,12 @@ public final class VectorDumper {
throw new OptionException(dictTypeOpt);
}
}
- boolean useJSON = cmdLine.hasOption(centroidJSonOpt);
+ boolean useJSON = cmdLine.hasOption(jsonOpt);
+ boolean useCSV = cmdLine.hasOption(csvOpt);
+
boolean sizeOnly = cmdLine.hasOption(sizeOpt);
SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
-
+ boolean namesAsComments = cmdLine.hasOption(namesAsCommentsOpt);
Writable keyWritable =
reader.getKeyClass().asSubclass(Writable.class).newInstance();
Writable valueWritable =
reader.getValueClass().asSubclass(Writable.class).newInstance();
boolean transposeKeyValue = cmdLine.hasOption(vectorAsKeyOpt);
@@ -140,6 +148,16 @@ public final class VectorDumper {
try {
boolean printKey = cmdLine.hasOption(printKeyOpt);
long i = 0;
+ if (useCSV && dictionary != null){
+ writer.write("#");
+ for (int j = 0; j < dictionary.length; j++) {
+ writer.write(dictionary[j]);
+ if (j < dictionary.length - 1){
+ writer.write(',');
+ }
+ }
+ writer.write('\n');
+ }
while (reader.next(keyWritable, valueWritable)) {
if (printKey) {
Writable notTheVectorWritable = transposeKeyValue ?
valueWritable : keyWritable;
@@ -159,7 +177,14 @@ public final class VectorDumper {
writer.write(String.valueOf(vector.size()));
writer.write('\n');
} else {
- String fmtStr = useJSON ? vector.asFormatString() :
VectorHelper.vectorToString(vector, dictionary);
+ String fmtStr;
+ if (useJSON){
+ fmtStr = VectorHelper.vectorToJSONString(vector, dictionary);
+ } else if (useCSV){
+ fmtStr = VectorHelper.vectorToCSVString(vector,
namesAsComments);
+ } else {
+ fmtStr = vector.asFormatString();
+ }
writer.write(fmtStr);
writer.write('\n');
}
Modified:
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java?rev=1085397&r1=1085396&r2=1085397&view=diff
==============================================================================
---
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java
(original)
+++
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java
Fri Mar 25 14:28:12 2011
@@ -40,14 +40,45 @@ import org.apache.mahout.math.map.OpenOb
public final class VectorHelper {
private static final Pattern TAB_PATTERN = Pattern.compile("\t");
+
private VectorHelper() { }
-
+
+ public static String vectorToCSVString(Vector vector, boolean
namesAsComments){
+ StringBuilder bldr = new StringBuilder(2048);
+ try {
+ vectorToCSVString(vector, namesAsComments, bldr);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ return bldr.toString();
+ }
+
+ public static void vectorToCSVString(Vector vector, boolean namesAsComments,
+ Appendable bldr) throws IOException {
+ if (namesAsComments && vector instanceof NamedVector){
+ bldr.append("#").append(((NamedVector)vector).getName()).append('\n');
+ }
+ Iterator<Vector.Element> iter = vector.iterator();
+ boolean first = true;
+ while (iter.hasNext()) {
+ if (first) {
+ first = false;
+ } else {
+ bldr.append(",");
+ }
+ Vector.Element elt = iter.next();
+ bldr.append(String.valueOf(elt.get()));
+ }
+ bldr.append('\n');
+ }
+
+
/**
* @return a String from a vector that fills in the values with the
appropriate value from a dictionary where
* each the ith entry is the term for the ith vector cell.
*/
- public static String vectorToString(Vector vector, String[] dictionary) {
+ public static String vectorToJSONString(Vector vector, String[] dictionary) {
StringBuilder bldr = new StringBuilder(2048);
if (vector instanceof NamedVector) {
@@ -67,12 +98,13 @@ public final class VectorHelper {
if (dictionary != null) {
bldr.append(dictionary[elt.index()]);
} else {
- bldr.append(elt.index());
+ bldr.append(String.valueOf(elt.index()));
}
- bldr.append(':').append(elt.get());
+ bldr.append(':').append(String.valueOf(elt.get()));
}
return bldr.append('}').toString();
}
+
/**
* Read in a dictionary file. Format is:
Added:
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java?rev=1085397&view=auto
==============================================================================
---
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java
(added)
+++
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java
Fri Mar 25 14:28:12 2011
@@ -0,0 +1,94 @@
+package org.apache.mahout.utils.vectors.csv;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.commons.csv.CSVParser;
+import org.apache.commons.csv.CSVStrategy;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.Vector;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Iterator;
+
+
+/**
+ * Iterates a CSV file and produces {@link org.apache.mahout.math.Vector}.
+ * <br/>
+ * The Iterator returned throws {@link UnsupportedOperationException} for the
{@link java.util.Iterator#remove()} method.
+ * <p/>
+ * Assumes DenseVector for now, but in the future may have the option of
mapping columns to sparse format
+ * <p/>
+ * The Iterator is not thread-safe.
+ *
+ *
+ **/
+public class CSVVectorIterable implements Iterable<Vector> {
+ protected CSVParser parser;
+ protected String [] line;
+
+ public CSVVectorIterable(Reader reader) throws IOException {
+ parser = new CSVParser(reader);
+ line = parser.getLine();
+ }
+
+ public CSVVectorIterable(Reader reader, CSVStrategy strategy) throws
IOException {
+ parser = new CSVParser(reader, strategy);
+ line = parser.getLine();
+ }
+
+
+ @Override
+ public Iterator<Vector> iterator() {
+ return new CSVIterator();
+ }
+
+ private class CSVIterator implements Iterator<Vector>{
+
+
+ public CSVIterator() {
+ }
+
+ @Override
+ public boolean hasNext() {
+ return line != null;
+ }
+
+ @Override
+ public Vector next() {
+
+ Vector result = null;
+ result = new DenseVector(line.length);
+ for (int i = 0; i < line.length; i++) {
+ result.setQuick(i, Double.parseDouble(line[i]));
+ }
+ //move the line forward
+ try {
+ line = parser.getLine();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ return result;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+}
Modified:
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java?rev=1085397&r1=1085396&r2=1085397&view=diff
==============================================================================
---
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java
(original)
+++
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java
Fri Mar 25 14:28:12 2011
@@ -25,8 +25,8 @@ import org.apache.mahout.math.Vector;
/**
* Write out the vectors to any {@link java.io.Writer} using {@link
org.apache.mahout.math.Vector#asFormatString()}.
*/
-public class JWriterVectorWriter implements VectorWriter {
- private final Writer writer;
+public class JWriterVectorWriter extends VectorWriter {
+ protected final Writer writer;
public JWriterVectorWriter(Writer writer) {
this.writer = writer;
@@ -45,14 +45,22 @@ public class JWriterVectorWriter impleme
if (result >= maxDocs) {
break;
}
- writer.write(vector.asFormatString());
- writer.write('\n');
-
+ formatVector(vector);
result++;
}
return result;
}
-
+
+ protected void formatVector(Vector vector) throws IOException {
+ writer.write(vector.asFormatString());
+ writer.write('\n');
+ }
+
+ @Override
+ public void write(Vector vector) throws IOException {
+ formatVector(vector);
+ }
+
@Override
public void close() throws IOException {
writer.flush();
Added:
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java?rev=1085397&view=auto
==============================================================================
---
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java
(added)
+++
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java
Fri Mar 25 14:28:12 2011
@@ -0,0 +1,105 @@
+package org.apache.mahout.utils.vectors.io;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.ContentSummary;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+
+/**
+ * Given a Sequence File containing vectors (actually, {@link
org.apache.mahout.math.VectorWritable}, iterate over it.
+ *
+ **/
+public class SequenceFileVectorIterable implements Iterable<Vector>{
+ protected SequenceFile.Reader reader;
+ protected long fileLen;
+ protected Writable keyWritable;
+ protected Writable valueWritable;
+ protected boolean useKey;
+
+ /**
+ * Construct the Iterable
+ * @param fs The {@link org.apache.hadoop.fs.FileSystem} containing the
{@link org.apache.hadoop.io.SequenceFile}
+ * @param file The {@link org.apache.hadoop.fs.Path} containing the file
+ * @param conf The {@link org.apache.hadoop.conf.Configuration} to use
+ * @param useKey If true, use the key as the {@link
org.apache.mahout.math.VectorWritable}, otherwise use the value
+ * @throws IllegalAccessException
+ * @throws InstantiationException
+ * @throws IOException
+ */
+ public SequenceFileVectorIterable(FileSystem fs, Path file, Configuration
conf, boolean useKey) throws IllegalAccessException, InstantiationException,
IOException {
+ this.reader = new SequenceFile.Reader(fs, file, conf);
+ ContentSummary summary = fs.getContentSummary(file);
+ fileLen = summary.getLength();
+ this.useKey = useKey;
+ keyWritable =
reader.getKeyClass().asSubclass(Writable.class).newInstance();
+ valueWritable =
reader.getValueClass().asSubclass(Writable.class).newInstance();
+ }
+
+ /**
+ * The Iterator returned does not support remove()
+ * @return The {@link java.util.Iterator}
+ */
+ public Iterator<Vector> iterator() {
+ return new SFIterator();
+
+ }
+
+ private final class SFIterator implements Iterator<Vector>{
+ @Override
+ public boolean hasNext() {
+ //TODO: is this legitimate? We can't call next here since it breaks the
iterator contract
+ try {
+ return reader.getPosition() < fileLen;
+ } catch (IOException e) {
+ return false;
+ }
+ }
+
+ @Override
+ public Vector next() {
+ Vector result = null;
+ boolean valid = false;
+ try {
+ valid = reader.next(keyWritable, valueWritable);
+ if (valid){
+ result = ((VectorWritable) (useKey ? keyWritable :
valueWritable)).get();
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+
+ return result;
+ }
+
+ /**
+ * Not supported
+ */
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+}
Modified:
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java?rev=1085397&r1=1085396&r2=1085397&view=diff
==============================================================================
---
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java
(original)
+++
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java
Fri Mar 25 14:28:12 2011
@@ -30,16 +30,16 @@ import org.apache.mahout.math.VectorWrit
*
* Closes the writer when done
*/
-public class SequenceFileVectorWriter implements VectorWriter {
+public class SequenceFileVectorWriter extends VectorWriter {
private final SequenceFile.Writer writer;
-
+ long recNum = 0;
public SequenceFileVectorWriter(SequenceFile.Writer writer) {
this.writer = writer;
}
@Override
public long write(Iterable<Vector> iterable, long maxDocs) throws
IOException {
- long recNum = 0;
+
for (Vector point : iterable) {
if (recNum >= maxDocs) {
break;
@@ -51,7 +51,13 @@ public class SequenceFileVectorWriter im
}
return recNum;
}
-
+
+ @Override
+ public void write(Vector vector) throws IOException {
+ writer.append(new LongWritable(recNum++), new VectorWritable(vector));
+
+ }
+
@Override
public long write(Iterable<Vector> iterable) throws IOException {
return write(iterable, Long.MAX_VALUE);
Modified:
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java?rev=1085397&r1=1085396&r2=1085397&view=diff
==============================================================================
---
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java
(original)
+++
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java
Fri Mar 25 14:28:12 2011
@@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.mahout.math.Vector;
-public interface VectorWriter {
+public abstract class VectorWriter {
/**
* Write all values in the Iterable to the output
* @param iterable The {@link Iterable} to loop over
@@ -29,7 +29,15 @@ public interface VectorWriter {
* @throws IOException if there was a problem writing
*
*/
- long write(Iterable<Vector> iterable) throws IOException;
+ public abstract long write(Iterable<Vector> iterable) throws IOException;
+
+ /**
+ * Write out a vector
+ *
+ * @param vector The {@link org.apache.mahout.math.Vector} to write
+ * @throws IOException
+ */
+ public abstract void write(Vector vector) throws IOException;
/**
* Write the first <code>maxDocs</code> to the output.
@@ -38,12 +46,12 @@ public interface VectorWriter {
* @return The number of docs written
* @throws IOException if there was a problem writing
*/
- long write(Iterable<Vector> iterable, long maxDocs) throws IOException;
+ public abstract long write(Iterable<Vector> iterable, long maxDocs) throws
IOException;
/**
* Close any internally held resources. If external Writers are passed in,
the implementation should indicate
* whether it also closes them
* @throws IOException if there was an issue closing the item
*/
- void close() throws IOException;
+ public abstract void close() throws IOException;
}
Added:
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java?rev=1085397&view=auto
==============================================================================
---
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java
(added)
+++
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java
Fri Mar 25 14:28:12 2011
@@ -0,0 +1,60 @@
+package org.apache.mahout.utils.vectors.csv;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.utils.MahoutTestCase;
+import org.apache.mahout.utils.vectors.RandomVectorIterable;
+import org.apache.mahout.utils.vectors.VectorHelper;
+import org.apache.mahout.utils.vectors.io.JWriterVectorWriter;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.io.StringWriter;
+
+
+/**
+ *
+ *
+ **/
+public class CSVVectorIterableTest extends MahoutTestCase {
+
+
+ @Test
+ public void test() throws Exception {
+
+ StringWriter sWriter = new StringWriter();
+ JWriterVectorWriter jwvw = new JWriterVectorWriter(sWriter) {
+
+ protected void formatVector(Vector vector) throws IOException {
+ String vecStr = VectorHelper.vectorToCSVString(vector, false);
+ writer.write(vecStr);
+ }
+ };
+ Iterable<Vector> iter = new RandomVectorIterable(50);
+ jwvw.write(iter);
+ jwvw.close();
+ CSVVectorIterable csvIter = new CSVVectorIterable(new
StringReader(sWriter.getBuffer().toString()));
+ int count = 0;
+ for (Vector vector : csvIter) {
+ //System.out.println("Vec: " + vector);
+ count++;
+ }
+ assertEquals(50, count);
+ }
+}
Added:
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java?rev=1085397&view=auto
==============================================================================
---
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java
(added)
+++
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java
Fri Mar 25 14:28:12 2011
@@ -0,0 +1,39 @@
+package org.apache.mahout.utils.vectors.io;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
+import org.apache.mahout.utils.MahoutTestCase;
+import org.apache.mahout.utils.vectors.RandomVectorIterable;
+import org.junit.Test;
+
+
+/**
+ *
+ *
+ **/
+public class SequenceFileVectorIterableTest extends MahoutTestCase {
+
+
+ @Test
+ public void testSFVI() throws Exception {
+ Path path = getTestTempFilePath("sfvw");
+ Configuration conf = new Configuration();
+ FileSystem fs = FileSystem.get(conf);
+ SequenceFile.Writer seqWriter = new SequenceFile.Writer(fs, conf, path,
LongWritable.class, VectorWritable.class);
+ SequenceFileVectorWriter writer = new SequenceFileVectorWriter(seqWriter);
+ Iterable<Vector> iter = new RandomVectorIterable(50);
+ writer.write(iter);
+ writer.close();
+ SequenceFileVectorIterable sfVIter = new SequenceFileVectorIterable(fs,
path, conf, false);
+ int count = 0;
+ for (Vector vector : sfVIter) {
+ count++;
+ }
+ assertEquals(50, count);
+ }
+}