Author: jmannix
Date: Tue Apr 5 00:20:27 2011
New Revision: 1088831
URL: http://svn.apache.org/viewvc?rev=1088831&view=rev
Log:
Fixes MAHOUT-369, adds new tests for Lanczos SVD consistency, provides a few
methods to convert old COLT vectors to Mahout vectors.
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVector.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix1D.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/DenseDoubleMatrix1D.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SparseDoubleMatrix1D.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/EigenvalueDecomposition.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Property.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/lanczos/TestLanczosSolver.java
mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java?rev=1088831&r1=1088830&r2=1088831&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
Tue Apr 5 00:20:27 2011
@@ -17,11 +17,6 @@
package org.apache.mahout.math.hadoop.decomposer;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -34,6 +29,7 @@ import org.apache.mahout.common.Abstract
import org.apache.mahout.math.DenseMatrix;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.Matrix;
+import org.apache.mahout.math.NamedVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorIterable;
import org.apache.mahout.math.VectorWritable;
@@ -42,6 +38,11 @@ import org.apache.mahout.math.hadoop.Dis
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
public class DistributedLanczosSolver extends LanczosSolver implements Tool {
public static final String RAW_EIGENVECTORS = "rawEigenvectors";
@@ -190,14 +191,17 @@ public class DistributedLanczosSolver ex
* @param outputPath The path (relative to the current Configuration's
FileSystem) to save the output to.
*/
public void serializeOutput(Matrix eigenVectors, List<Double> eigenValues,
Path outputPath) throws IOException {
- log.info("Persisting {} eigenVectors and eigenValues to: {}",
eigenVectors.numRows(), outputPath);
+ int numEigenVectors = eigenVectors.numRows();
+ log.info("Persisting {} eigenVectors and eigenValues to: {}",
numEigenVectors, outputPath);
Configuration conf = getConf() != null ? getConf() : new Configuration();
FileSystem fs = FileSystem.get(conf);
SequenceFile.Writer seqWriter =
new SequenceFile.Writer(fs, conf, outputPath, IntWritable.class,
VectorWritable.class);
IntWritable iw = new IntWritable();
- for (int i = 0; i < eigenVectors.numRows() - 1; i++) {
- Vector v = eigenVectors.getRow(i);
+ for (int i = 0; i < numEigenVectors; i++) {
+ // Persist eigenvectors sorted by eigenvalues in descending order
+ NamedVector v = new NamedVector(eigenVectors.getRow(numEigenVectors-1-i),
+ "eigenVector" + i + ", eigenvalue = " +
eigenValues.get(numEigenVectors-1-i));
Writable vw = new VectorWritable(v);
iw.set(i);
seqWriter.append(iw, vw);
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVector.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVector.java?rev=1088831&r1=1088830&r2=1088831&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVector.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVector.java
Tue Apr 5 00:20:27 2011
@@ -18,6 +18,7 @@
package org.apache.mahout.math.hadoop.decomposer;
import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.Vector;
import java.util.regex.Pattern;
@@ -31,8 +32,8 @@ public class EigenVector extends DenseVe
private final String name;
- public EigenVector(DenseVector v, double eigenValue, double cosAngleError,
int order) {
- super(v, false);
+ public EigenVector(Vector v, double eigenValue, double cosAngleError, int
order) {
+ super(v instanceof DenseVector ? (DenseVector) v : new DenseVector(v),
false);
name = "e|" + order + "| = |" + eigenValue + "|, err = " + cosAngleError;
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java?rev=1088831&r1=1088830&r2=1088831&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
Tue Apr 5 00:20:27 2011
@@ -17,15 +17,6 @@
package org.apache.mahout.math.hadoop.decomposer;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -35,7 +26,6 @@ import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
-import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.MatrixSlice;
import org.apache.mahout.math.OrthonormalityVerifier;
import org.apache.mahout.math.SparseRowMatrix;
@@ -49,6 +39,15 @@ import org.apache.mahout.math.hadoop.Dis
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
/**
* <p>Class for taking the output of an eigendecomposition (specified as a
Path location), and verifies correctness,
* in terms of the following: if you have a vector e, and a matrix m, then let
e' = m.timesSquared(v); the error
@@ -193,7 +192,7 @@ public class EigenVerificationJob extend
for (Map.Entry<MatrixSlice, EigenStatus> pruneSlice : prunedEigenMeta) {
MatrixSlice s = pruneSlice.getKey();
EigenStatus meta = pruneSlice.getValue();
- EigenVector ev = new EigenVector((DenseVector) s.vector(),
+ EigenVector ev = new EigenVector(s.vector(),
meta.getEigenValue(),
Math.abs(1 - meta.getCosAngle()),
s.index());
@@ -226,7 +225,8 @@ public class EigenVerificationJob extend
Collections.sort(prunedEigenMeta, new Comparator<Map.Entry<MatrixSlice,
EigenStatus>>() {
@Override
- public int compare(Map.Entry<MatrixSlice, EigenStatus> e1,
Map.Entry<MatrixSlice, EigenStatus> e2) {
+ public int compare(Map.Entry<MatrixSlice, EigenStatus> e1,
+ Map.Entry<MatrixSlice, EigenStatus> e2) {
return e1.getKey().index() - e2.getKey().index();
}
});
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java?rev=1088831&r1=1088830&r2=1088831&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java
Tue Apr 5 00:20:27 2011
@@ -59,7 +59,7 @@ public final class TestDistributedLanczo
eigenVectors.assignRow(i, v);
i++;
}
- assertEquals("number of eigenvectors", 9, i);
+ assertEquals("number of eigenvectors", 10, i);
}
@Test
Modified:
mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java?rev=1088831&r1=1088830&r2=1088831&view=diff
==============================================================================
---
mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java
(original)
+++
mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java
Tue Apr 5 00:20:27 2011
@@ -18,19 +18,10 @@
package org.apache.mahout.math.decomposer.lanczos;
-import java.util.EnumMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.mahout.math.DenseVector;
-import org.apache.mahout.math.Matrix;
-import org.apache.mahout.math.MatrixSlice;
-import org.apache.mahout.math.SparseRowMatrix;
-import org.apache.mahout.math.VectorIterable;
+import org.apache.mahout.math.*;
+import org.apache.mahout.math.function.DoubleFunction;
import org.apache.mahout.math.function.Functions;
import org.apache.mahout.math.function.PlusMult;
-import org.apache.mahout.math.function.DoubleFunction;
-import org.apache.mahout.math.Vector;
import org.apache.mahout.math.matrix.DoubleMatrix1D;
import org.apache.mahout.math.matrix.DoubleMatrix2D;
import org.apache.mahout.math.matrix.impl.DenseDoubleMatrix2D;
@@ -38,6 +29,10 @@ import org.apache.mahout.math.matrix.lin
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.util.EnumMap;
+import java.util.List;
+import java.util.Map;
+
/**
* <p>Simple implementation of the <a
href="http://en.wikipedia.org/wiki/Lanczos_algorithm">Lanczos algorithm</a> for
* finding eigenvalues of a symmetric matrix, applied to non-symmetric
matrices by applying Matrix.timesSquared(vector)
@@ -152,7 +147,7 @@ public class LanczosSolver {
endTime(TimingSection.TRIDIAG_DECOMP);
startTime(TimingSection.FINAL_EIGEN_CREATE);
- for (int i = 0; i < basis.numRows() - 1; i++) {
+ for (int i = 0; i < basis.numRows(); i++) {
Vector realEigen = new DenseVector(corpus.numCols());
// the eigenvectors live as columns of V, in reverse order. Weird but
true.
DoubleMatrix1D ejCol = eigenVects.viewColumn(basis.numRows() - i - 1);
@@ -162,8 +157,9 @@ public class LanczosSolver {
}
realEigen = realEigen.normalize();
eigenVectors.assignRow(i, realEigen);
- log.info("Eigenvector {} found with eigenvalue {}", i, eigenVals.get(i));
- eigenValues.add(eigenVals.get(i));
+ double e = Math.sqrt(eigenVals.get(i) * scaleFactor);
+ log.info("Eigenvector {} found with eigenvalue {}", i, e);
+ eigenValues.add(e);
}
log.info("LanczosSolver finished.");
endTime(TimingSection.FINAL_EIGEN_CREATE);
Modified:
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix1D.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix1D.java?rev=1088831&r1=1088830&r2=1088831&view=diff
==============================================================================
---
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix1D.java
(original)
+++
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix1D.java
Tue Apr 5 00:20:27 2011
@@ -8,10 +8,12 @@ It is provided "as is" without expressed
*/
package org.apache.mahout.math.matrix;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.Vector;
import org.apache.mahout.math.function.DoubleDoubleFunction;
+import org.apache.mahout.math.function.DoubleFunction;
import org.apache.mahout.math.function.Functions;
import org.apache.mahout.math.function.PlusMult;
-import org.apache.mahout.math.function.DoubleFunction;
import org.apache.mahout.math.list.DoubleArrayList;
import org.apache.mahout.math.list.IntArrayList;
import org.apache.mahout.math.matrix.impl.AbstractMatrix1D;
@@ -169,6 +171,14 @@ public abstract class DoubleMatrix1D ext
return this;
}
+ public Vector toVector() {
+ final DenseVector vector = new DenseVector(cardinality());
+ for(int i=0; i<cardinality(); i++) {
+ vector.set(i, get(i));
+ }
+ return vector;
+ }
+
/**
* Assigns the result of a function to each cell; <tt>x[i] =
function(x[i],y[i])</tt>. <p> <b>Example:</b>
* <pre>
Modified:
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/DenseDoubleMatrix1D.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/DenseDoubleMatrix1D.java?rev=1088831&r1=1088830&r2=1088831&view=diff
==============================================================================
---
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/DenseDoubleMatrix1D.java
(original)
+++
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/DenseDoubleMatrix1D.java
Tue Apr 5 00:20:27 2011
@@ -9,10 +9,10 @@ It is provided "as is" without expressed
package org.apache.mahout.math.matrix.impl;
import org.apache.mahout.math.function.DoubleDoubleFunction;
+import org.apache.mahout.math.function.DoubleFunction;
import org.apache.mahout.math.function.Functions;
import org.apache.mahout.math.function.Mult;
import org.apache.mahout.math.function.PlusMult;
-import org.apache.mahout.math.function.DoubleFunction;
import org.apache.mahout.math.matrix.DoubleMatrix1D;
import org.apache.mahout.math.matrix.DoubleMatrix2D;
Modified:
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SparseDoubleMatrix1D.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SparseDoubleMatrix1D.java?rev=1088831&r1=1088830&r2=1088831&view=diff
==============================================================================
---
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SparseDoubleMatrix1D.java
(original)
+++
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SparseDoubleMatrix1D.java
Tue Apr 5 00:20:27 2011
@@ -8,6 +8,9 @@ It is provided "as is" without expressed
*/
package org.apache.mahout.math.matrix.impl;
+import org.apache.mahout.math.RandomAccessSparseVector;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.function.IntDoubleProcedure;
import org.apache.mahout.math.map.AbstractIntDoubleMap;
import org.apache.mahout.math.map.OpenIntDoubleHashMap;
import org.apache.mahout.math.matrix.DoubleMatrix1D;
@@ -76,6 +79,19 @@ public final class SparseDoubleMatrix1D
this.isNoView = false;
}
+
+ public Vector toVector() {
+ final RandomAccessSparseVector vector = new
RandomAccessSparseVector(cardinality());
+ elements.forEachPair(new IntDoubleProcedure() {
+ public boolean apply(int i, double v) {
+ vector.setQuick(i, v);
+ return true;
+ }
+ });
+ return vector;
+ }
+
+
/**
* Sets all cells to the state specified by <tt>value</tt>.
*
Modified:
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/EigenvalueDecomposition.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/EigenvalueDecomposition.java?rev=1088831&r1=1088830&r2=1088831&view=diff
==============================================================================
---
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/EigenvalueDecomposition.java
(original)
+++
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/EigenvalueDecomposition.java
Tue Apr 5 00:20:27 2011
@@ -8,6 +8,9 @@ It is provided "as is" without expressed
*/
package org.apache.mahout.math.matrix.linalg;
+import org.apache.mahout.math.Matrix;
+import org.apache.mahout.math.MatrixSlice;
+import org.apache.mahout.math.Vector;
import org.apache.mahout.math.matrix.DoubleMatrix1D;
import org.apache.mahout.math.matrix.DoubleMatrix2D;
import org.apache.mahout.math.matrix.impl.DenseDoubleMatrix1D;
@@ -16,7 +19,7 @@ import org.apache.mahout.math.matrix.imp
import java.io.Serializable;
import static org.apache.mahout.math.Algebra.hypot;
-import static org.apache.mahout.math.matrix.linalg.Property.*;
+import static org.apache.mahout.math.matrix.linalg.Property.checkSquare;
/** @deprecated until unit tests are in place. Until this time, this
class/interface is unsupported. */
@Deprecated
@@ -43,30 +46,16 @@ public final class EigenvalueDecompositi
private double cdivr;
private double cdivi;
- /**
- * Constructs and returns a new eigenvalue decomposition object; The
decomposed matrices can be retrieved via instance
- * methods of the returned decomposition object. Checks for symmetry, then
constructs the eigenvalue decomposition.
- *
- * @param A A square matrix.
- * @throws IllegalArgumentException if <tt>A</tt> is not square.
- */
- public EigenvalueDecomposition(DoubleMatrix2D A) {
- checkSquare(A);
-
- n = A.columns();
- V = new double[n][n];
+ public EigenvalueDecomposition(double[][] v) {
+ if(v.length != v[0].length) {
+ throw new IllegalArgumentException("Matrix must be square");
+ }
+ n = v.length;
+ V = v;
d = new double[n];
e = new double[n];
- boolean issymmetric = DEFAULT.isSymmetric(A);
-
- if (issymmetric) {
- for (int i = 0; i < n; i++) {
- for (int j = 0; j < n; j++) {
- V[i][j] = A.getQuick(i, j);
- }
- }
-
+ if (isSymmetric(v)) {
// Tridiagonalize.
tred2();
@@ -79,7 +68,7 @@ public final class EigenvalueDecompositi
for (int j = 0; j < n; j++) {
for (int i = 0; i < n; i++) {
- H[i][j] = A.getQuick(i, j);
+ H[i][j] = v[i][j];
}
}
@@ -91,6 +80,58 @@ public final class EigenvalueDecompositi
}
}
+ public EigenvalueDecomposition(Matrix A) {
+ this(toArray(A));
+ }
+
+ private static double[][] toArray(Matrix A) {
+ checkSquare(A);
+
+ int n = A.numCols();
+ double[][] V = new double[n][n];
+ for(MatrixSlice slice : A) {
+ int row = slice.index();
+ for(Vector.Element element : slice.vector()) {
+ V[row][element.index()] = element.get();
+ }
+ }
+ return V;
+ }
+
+ private static boolean isSymmetric(double[][] matrix) {
+ for(int i=0; i<matrix.length; i++) {
+ for(int j=0; j<i; j++) {
+ if(matrix[i][j] != matrix[j][i]) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ private static double[][] toArray(DoubleMatrix2D A) {
+ checkSquare(A);
+
+ int n = A.columns();
+ double[][] V = new double[n][n];
+ for(int row = 0; row < A.rows(); row++) {
+ for(int col = 0; col < A.rows(); col++) {
+ V[row][col] = A.getQuick(row, col);
+ }
+ }
+ return V;
+ }
+ /**
+ * Constructs and returns a new eigenvalue decomposition object; The
decomposed matrices can be retrieved via instance
+ * methods of the returned decomposition object. Checks for symmetry, then
constructs the eigenvalue decomposition.
+ *
+ * @param A A square matrix.
+ * @throws IllegalArgumentException if <tt>A</tt> is not square.
+ */
+ public EigenvalueDecomposition(DoubleMatrix2D A) {
+ this(toArray(A));
+ }
+
private void cdiv(double xr, double xi, double yr, double yi) {
double r;
double d;
Modified:
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Property.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Property.java?rev=1088831&r1=1088830&r2=1088831&view=diff
==============================================================================
---
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Property.java
(original)
+++
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Property.java
Tue Apr 5 00:20:27 2011
@@ -8,6 +8,7 @@ It is provided "as is" without expressed
*/
package org.apache.mahout.math.matrix.linalg;
+import org.apache.mahout.math.Matrix;
import org.apache.mahout.math.function.Functions;
import org.apache.mahout.math.matrix.DoubleMatrix1D;
import org.apache.mahout.math.matrix.DoubleMatrix2D;
@@ -53,6 +54,12 @@ public final class Property {
}
}
+ public static void checkSquare(Matrix matrix) {
+ if(matrix.numRows() != matrix.numCols()) {
+ throw new IllegalArgumentException("Matrix must be square");
+ }
+ }
+
/** Returns the matrix's fraction of non-zero cells; <tt>A.cardinality() /
A.size()</tt>. */
public static double density(DoubleMatrix2D a) {
return a.cardinality() / (double) a.size();
Modified:
mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java?rev=1088831&r1=1088830&r2=1088831&view=diff
==============================================================================
---
mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java
(original)
+++
mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java
Tue Apr 5 00:20:27 2011
@@ -17,12 +17,8 @@
package org.apache.mahout.math.decomposer;
-import org.apache.mahout.math.MahoutTestCase;
-import org.apache.mahout.math.Matrix;
-import org.apache.mahout.math.SequentialAccessSparseVector;
-import org.apache.mahout.math.SparseRowMatrix;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.VectorIterable;
+import org.apache.mahout.math.*;
+import org.apache.mahout.math.function.Functions;
import java.util.Random;
@@ -67,7 +63,7 @@ public abstract class SolverTest extends
Vector afterMultiply = isSymmetric ? corpus.times(e) :
corpus.timesSquared(e);
double dot = afterMultiply.dot(e);
double afterNorm = afterMultiply.getLengthSquared();
- double error = 1 - dot / Math.sqrt(afterNorm * e.getLengthSquared());
+ double error = 1 - Math.abs(dot / Math.sqrt(afterNorm *
e.getLengthSquared()));
assertTrue("Error margin: {" + error + " too high! (for eigen " + i +
')', Math.abs(error) < errorMargin);
}
}
@@ -105,4 +101,31 @@ public abstract class SolverTest extends
}
return m;
}
+
+ public static Matrix randomHierarchicalMatrix(int numRows, int numCols,
boolean symmetric) {
+ DenseMatrix matrix = new DenseMatrix(numRows, numCols);
+ Random r = new Random(1234L);
+ for(int row = 0; row < numRows; row++) {
+ Vector v = new DenseVector(numCols);
+ for(int col = 0; col < numCols; col++) {
+ double val = r.nextGaussian();
+ v.set(col, val);
+ }
+ v.assign(Functions.MULT, 1/((row + 1) * v.norm(2)));
+ matrix.assignRow(row, v);
+ }
+ if(symmetric) {
+ if(true) return matrix.times(matrix.transpose());
+ for(int i = 0; i < numRows; i++) {
+ for(int j = 0; j < i; j++) {
+ matrix.set(j, i, matrix.get(i, j));
+ }
+ }
+ }
+ return matrix;
+ }
+
+ public static Matrix randomHierarchicalSymmetricMatrix(int size) {
+ return randomHierarchicalMatrix(size, size, true);
+ }
}
Modified:
mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/lanczos/TestLanczosSolver.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/lanczos/TestLanczosSolver.java?rev=1088831&r1=1088830&r2=1088831&view=diff
==============================================================================
---
mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/lanczos/TestLanczosSolver.java
(original)
+++
mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/lanczos/TestLanczosSolver.java
Tue Apr 5 00:20:27 2011
@@ -19,37 +19,71 @@ package org.apache.mahout.math.decompose
import org.apache.mahout.math.DenseMatrix;
import org.apache.mahout.math.Matrix;
+import org.apache.mahout.math.Vector;
import org.apache.mahout.math.decomposer.SolverTest;
+import org.apache.mahout.math.matrix.DoubleMatrix1D;
+import org.apache.mahout.math.matrix.linalg.EigenvalueDecomposition;
import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
public final class TestLanczosSolver extends SolverTest {
+ private static final Logger log =
LoggerFactory.getLogger(TestLanczosSolver.class);
+
+ private static final double ERROR_TOLERANCE = 1e-5;
+
+ @Test
+ public void testEigenvalueCheck() throws Exception {
+ int size = 100;
+ Matrix m = randomHierarchicalSymmetricMatrix(size);
+ int desiredRank = 80;
+ float fractionOfEigensExpectedGood = 0.75f;
+ LanczosSolver solver = new LanczosSolver();
+ Matrix eigenvectors = new DenseMatrix(desiredRank, size);
+ List<Double> eigenvalueList = new ArrayList<Double>();
+ solver.solve(m, desiredRank, eigenvectors, eigenvalueList);
+
+ EigenvalueDecomposition decomposition = new EigenvalueDecomposition(m);
+ DoubleMatrix1D eigenvalues = decomposition.getRealEigenvalues();
+
+ for(int i = 0; i < fractionOfEigensExpectedGood * desiredRank; i++) {
+ log.info(i + " : L = {}, E = {}",
+ eigenvalueList.get(desiredRank - i - 1),
+ eigenvalues.get(eigenvalues.size() - i - 1) );
+ Vector v = eigenvectors.getRow(i);
+ Vector v2 = decomposition.getV().viewColumn(eigenvalues.size() - i -
1).toVector();
+ double error = 1 - Math.abs(v.dot(v2)/(v.norm(2) * v2.norm(2)));
+ log.info("error: {}", error);
+ assertTrue(i + ": 1 - cosAngle = " + error, error < ERROR_TOLERANCE);
+ }
+ }
+
@Test
public void testLanczosSolver() throws Exception {
- int numColumns = 800;
- Matrix corpus = randomSequentialAccessSparseMatrix(1000, 900, numColumns,
30, 1.0);
+ int numRows = 800;
+ int numColumns = 500;
+ Matrix corpus = randomHierarchicalMatrix(numRows, numColumns, false);
int rank = 50;
Matrix eigens = new DenseMatrix(rank, numColumns);
long time = timeLanczos(corpus, eigens, rank, false);
assertTrue("Lanczos taking too long! Are you in the debugger? :)", time <
10000);
assertOrthonormal(eigens);
- assertEigen(eigens, corpus, 0.1, false);
+ assertEigen(eigens, corpus, rank / 2, ERROR_TOLERANCE, false);
}
@Test
public void testLanczosSolverSymmetric() throws Exception {
- int numColumns = 400;
- Matrix corpus = randomSequentialAccessSparseMatrix(500, 450, numColumns,
10, 1.0);
- Matrix gramMatrix = corpus.times(corpus.transpose());
+ Matrix corpus = randomHierarchicalSymmetricMatrix(500);
int rank = 30;
- Matrix eigens = new DenseMatrix(rank, gramMatrix.numCols());
- long time = timeLanczos(gramMatrix, eigens, rank, true);
+ Matrix eigens = new DenseMatrix(rank, corpus.numCols());
+ long time = timeLanczos(corpus, eigens, rank, true);
assertTrue("Lanczos taking too long! Are you in the debugger? :)", time <
10000);
assertOrthonormal(eigens);
- assertEigen(eigens, gramMatrix, 0.1, true);
+ assertEigen(eigens, corpus, rank / 2, ERROR_TOLERANCE, true);
}
public static long timeLanczos(Matrix corpus, Matrix eigens, int rank,
boolean symmetric) {
Modified:
mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java?rev=1088831&r1=1088830&r2=1088831&view=diff
==============================================================================
---
mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
(original)
+++
mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
Tue Apr 5 00:20:27 2011
@@ -17,11 +17,6 @@
package org.apache.mahout.clustering;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -68,6 +63,11 @@ import org.apache.mahout.vectorizer.Weig
import org.junit.Before;
import org.junit.Test;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
public final class TestClusterDumper extends MahoutTestCase {
private static final String[] DOCS = { "The quick red fox jumped over the
lazy brown dogs.",
@@ -329,11 +329,14 @@ public final class TestClusterDumper ext
int sampleDimension = sampleData.get(0).get().size();
// Run EigenVerificationJob from within DistributedLanczosSolver.run(...)
int desiredRank = 13;
- solver.run(testData, output, tmp, sampleData.size(), sampleDimension,
false, desiredRank, 0.5, 0.0, false);
+ solver.run(testData, output, tmp, sampleData.size(), sampleDimension,
+ false, desiredRank, 0.5, 0.0, false);
+
Path cleanEigenvectors = new Path(output,
EigenVerificationJob.CLEAN_EIGENVECTORS);
// now multiply the testdata matrix and the eigenvector matrix
- DistributedRowMatrix svdT = new DistributedRowMatrix(cleanEigenvectors,
tmp, desiredRank - 1, sampleDimension);
+ DistributedRowMatrix svdT = new DistributedRowMatrix(cleanEigenvectors,
tmp,
+ desiredRank, sampleDimension);
Configuration conf = new Configuration(config);
svdT.setConf(conf);
DistributedRowMatrix a = new DistributedRowMatrix(testData, tmp,
sampleData.size(), sampleDimension);
@@ -348,6 +351,7 @@ public final class TestClusterDumper ext
// run ClusterDumper
ClusterDumper clusterDumper = new ClusterDumper(finalClusterPath(conf,
output, 10), new Path(output, "clusteredPoints"));
clusterDumper.printClusters(termDictionary);
+ assertTrue(true);
}
@Test
@@ -369,9 +373,11 @@ public final class TestClusterDumper ext
Path cleanEigenvectors = new Path(output,
EigenVerificationJob.CLEAN_EIGENVECTORS);
// now multiply the testdata matrix and the eigenvector matrix
- DistributedRowMatrix svdT = new DistributedRowMatrix(cleanEigenvectors,
tmp, desiredRank - 1, sampleDimension);
+ DistributedRowMatrix svdT = new DistributedRowMatrix(cleanEigenvectors,
tmp, desiredRank,
+ sampleDimension);
svdT.setConf(conf);
- DistributedRowMatrix a = new DistributedRowMatrix(testData, tmp,
sampleData.size(), sampleDimension);
+ DistributedRowMatrix a = new DistributedRowMatrix(testData, tmp,
sampleData.size(),
+ sampleDimension);
a.setConf(conf);
DistributedRowMatrix sData = a.transpose().times(svdT.transpose());
sData.setConf(conf);
@@ -379,9 +385,12 @@ public final class TestClusterDumper ext
// now run the Canopy job to prime kMeans canopies
CanopyDriver.run(conf, sData.getRowPath(), output, measure, 8, 4, false,
false);
// now run the KMeans job
- KMeansDriver.run(sData.getRowPath(), new Path(output, "clusters-0"),
output, measure, 0.001, 10, true, false);
+ KMeansDriver.run(sData.getRowPath(), new Path(output, "clusters-0"),
output, measure,
+ 0.001, 10, true, false);
// run ClusterDumper
- ClusterDumper clusterDumper = new ClusterDumper(finalClusterPath(conf,
output, 10), new Path(output, "clusteredPoints"));
+ ClusterDumper clusterDumper = new ClusterDumper(finalClusterPath(conf,
output, 10),
+ new Path(output, "clusteredPoints"));
clusterDumper.printClusters(termDictionary);
+ assertTrue(true);
}
}