Author: tn Date: Thu Feb 16 19:41:42 2012 New Revision: 1245133 URL: http://svn.apache.org/viewvc?rev=1245133&view=rev Log: Changed StorelessCovariance according to suggestions from psteitz. JIRA: MATH-449
Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessBivariateCovariance.java commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessCovariance.java commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/correlation/StorelessCovarianceTest.java Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessBivariateCovariance.java URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessBivariateCovariance.java?rev=1245133&r1=1245132&r2=1245133&view=diff ============================================================================== --- commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessBivariateCovariance.java (original) +++ commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessBivariateCovariance.java Thu Feb 16 19:41:42 2012 @@ -30,10 +30,13 @@ import org.apache.commons.math3.exceptio * Sandia National Laboratories. It computes the covariance for a pair of variables. * Use {@link StorelessCovariance} to estimate an entire covariance matrix.</p> * + * <p>Note: This class is package private as it is only used internally in + * the {@link StorelessCovariance} class.</p> + * * @version $Id$ * @since 3.0 */ -public class StorelessBivariateCovariance { +class StorelessBivariateCovariance { /** the mean of variable x */ private double meanX; Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessCovariance.java URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessCovariance.java?rev=1245133&r1=1245132&r2=1245133&view=diff ============================================================================== --- commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessCovariance.java (original) +++ commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessCovariance.java Thu Feb 16 19:41:42 2012 @@ -34,121 +34,129 @@ import org.apache.commons.math3.linear.R * Arbitrary-Order Statistical Moments</a>, 2008, Technical Report SAND2008-6212, * Sandia National Laboratories.</p> * + * <p>Note: the underlying covariance matrix is symmetric, thus only the + * upper triangular part of the matrix is stored and updated each increment.</p> + * * @version $Id$ * @since 3.0 */ public class StorelessCovariance extends Covariance { - /** the two-dimensional covariance matrix */ - private StorelessBivariateCovariance[][] covMatrix; - - /** row dimension of the covariance matrix */ - private int rowDimension; - - /** column dimension of the covariance matrix */ - private int colDimension; + /** the square covariance matrix (upper triangular part) */ + private StorelessBivariateCovariance[] covMatrix; - /** flag for bias correction */ - private boolean biasCorrected; + /** dimension of the square covariance matrix */ + private int dimension; /** - * Create a bias corrected covariance matrix with a given number of rows and columns. + * Create a bias corrected covariance matrix with a given dimension. * - * @param rows number of rows - * @param cols number of columns + * @param dim the dimension of the square covariance matrix */ - public StorelessCovariance(final int rows, final int cols) { - this(rows, cols, true); + public StorelessCovariance(final int dim) { + this(dim, true); } /** * Create a covariance matrix with a given number of rows and columns and the * indicated bias correction. * - * @param rows number of variables in the rows - * @param cols number of variables in the columns - * @param biasCorrection if <code>true</code> the covariance estimate is corrected + * @param dim the dimension of the covariance matrix + * @param biasCorrected if <code>true</code> the covariance estimate is corrected * for bias, i.e. n-1 in the denominator, otherwise there is no bias correction, * i.e. n in the denominator. */ - public StorelessCovariance(final int rows, final int cols, - final boolean biasCorrection) { - rowDimension = rows; - colDimension = cols; - biasCorrected = biasCorrection; - covMatrix = new StorelessBivariateCovariance[rowDimension][colDimension]; - initializeMatrix(); + public StorelessCovariance(final int dim, final boolean biasCorrected) { + dimension = dim; + covMatrix = new StorelessBivariateCovariance[dimension * (dimension + 1) / 2]; + initializeMatrix(biasCorrected); } /** * Initialize the internal two-dimensional array of * {@link StorelessBivariateCovariance} instances. + * + * @param biasCorrected if the covariance estimate shall be corrected for bias */ - private void initializeMatrix() { - for(int i=0;i<rowDimension;i++){ - for(int j=0;j<colDimension;j++){ - covMatrix[i][j] = new StorelessBivariateCovariance(biasCorrected); + private void initializeMatrix(final boolean biasCorrected) { + for(int i = 0; i < dimension; i++){ + for(int j = 0; j < dimension; j++){ + setElement(i, j, new StorelessBivariateCovariance(biasCorrected)); } } } /** - * Get the covariance for an individual element of the covariance matrix. + * Returns the index (i, j) translated into the one-dimensional + * array used to store the upper triangular part of the symmetric + * covariance matrix. * - * @param xIndex row index in the covariance matrix - * @param yIndex column index in the covariance matrix - * @return the covariance of the given element + * @param i the row index + * @param j the column index + * @return the corresponding index in the matrix array */ - public StorelessBivariateCovariance getCovariance(final int xIndex, - final int yIndex) { - return covMatrix[xIndex][yIndex]; + private int indexOf(final int i, final int j) { + return j < i ? i * (i + 1) / 2 + j : j * (j + 1) / 2 + i; } /** - * Set the covariance for an individual element of the covariance matrix. - * - * @param xIndex row index in the covariance matrix - * @param yIndex column index in the covariance matrix - * @param cov the covariance to be set + * Gets the element at index (i, j) from the covariance matrix + * @param i the row index + * @param j the column index + * @return the {@link StorelessBivariateCovariance} element at the given index */ - public void setCovariance(final int xIndex, final int yIndex, - final StorelessBivariateCovariance cov) { - covMatrix[xIndex][yIndex] = cov; + private StorelessBivariateCovariance getElement(final int i, final int j) { + return covMatrix[indexOf(i, j)]; } /** - * Increment one individual element of the covariance matrix. - * - * <p>The element is specified by the xIndex and yIndex and incremented with the - * corresponding values of x and y.</p> + * Sets the covariance element at index (i, j) in the covariance matrix + * @param i the row index + * @param j the column index + * @param cov the {@link StorelessBivariateCovariance} element to be set + */ + private void setElement(final int i, final int j, + final StorelessBivariateCovariance cov) { + covMatrix[indexOf(i, j)] = cov; + } + + /** + * Get the covariance for an individual element of the covariance matrix. * * @param xIndex row index in the covariance matrix * @param yIndex column index in the covariance matrix - * @param x value of x - * @param y value of y + * @return the covariance of the given element + * @throws NumberIsTooSmallException if the number of observations + * in the cell is < 2 */ - public void incrementCovariance(final int xIndex, final int yIndex, - final double x, final double y) { - covMatrix[xIndex][yIndex].increment(x, y); + public double getCovariance(final int xIndex, + final int yIndex) + throws NumberIsTooSmallException { + + return getElement(xIndex, yIndex).getResult(); + } /** * Increment the covariance matrix with one row of data. * - * @param rowData array representing one row of data. + * @param data array representing one row of data. * @throws DimensionMismatchException if the length of <code>rowData</code> * does not match with the covariance matrix */ - public void incrementRow(final double[] rowData) + public void increment(final double[] data) throws DimensionMismatchException { - int length = rowData.length; - if (length != colDimension) { - throw new DimensionMismatchException(length, colDimension); + int length = data.length; + if (length != dimension) { + throw new DimensionMismatchException(length, dimension); } + + // only update the upper triangular part of the covariance matrix + // as only these parts are actually stored for (int i = 0; i < length; i++){ - for (int j = 0; j < length; j++){ - covMatrix[i][j].increment(rowData[i], rowData[j]); + for (int j = i; j < length; j++){ + getElement(i, j).increment(data[i], data[j]); } } @@ -171,10 +179,10 @@ public class StorelessCovariance extends * for a cell is < 2 */ public double[][] getData() throws NumberIsTooSmallException { - final double[][] data = new double[rowDimension][rowDimension]; - for (int i = 0; i < rowDimension; i++) { - for (int j = 0; j < colDimension; j++) { - data[i][j] = covMatrix[i][j].getResult(); + final double[][] data = new double[dimension][dimension]; + for (int i = 0; i < dimension; i++) { + for (int j = 0; j < dimension; j++) { + data[i][j] = getElement(i, j).getResult(); } } return data; Modified: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/correlation/StorelessCovarianceTest.java URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/correlation/StorelessCovarianceTest.java?rev=1245133&r1=1245132&r2=1245133&view=diff ============================================================================== --- commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/correlation/StorelessCovarianceTest.java (original) +++ commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/correlation/StorelessCovarianceTest.java Thu Feb 16 19:41:42 2012 @@ -19,9 +19,9 @@ package org.apache.commons.math3.stat.co import org.apache.commons.math3.TestUtils; import org.apache.commons.math3.linear.Array2DRowRealMatrix; import org.apache.commons.math3.linear.RealMatrix; +import org.junit.Assert; import org.junit.Test; - public class StorelessCovarianceTest { protected final double[] longleyData = new double[] { @@ -163,9 +163,9 @@ public class StorelessCovarianceTest { 2973.033333333333, 1382.433333333333, 32917.40000000, 22.66666666666667 }; - StorelessCovariance covMatrix = new StorelessCovariance(7, 7); + StorelessCovariance covMatrix = new StorelessCovariance(7); for(int i=0;i<matrix.getRowDimension();i++){ - covMatrix.incrementRow(matrix.getRow(i)); + covMatrix.increment(matrix.getRow(i)); } RealMatrix covarianceMatrix = covMatrix.getCovarianceMatrix(); @@ -174,8 +174,6 @@ public class StorelessCovarianceTest { } - - /** * Test R Swiss fertility dataset against R. * Data Source: R datasets package @@ -192,9 +190,9 @@ public class StorelessCovarianceTest { 241.5632030527289, 379.9043755781684, -190.56061054579092, -61.6988297872340, 1739.2945371877890 }; - StorelessCovariance covMatrix = new StorelessCovariance(5, 5); + StorelessCovariance covMatrix = new StorelessCovariance(5); for(int i=0;i<matrix.getRowDimension();i++){ - covMatrix.incrementRow(matrix.getRow(i)); + covMatrix.increment(matrix.getRow(i)); } RealMatrix covarianceMatrix = covMatrix.getCovarianceMatrix(); @@ -203,93 +201,26 @@ public class StorelessCovarianceTest { } /** - * Test Longley dataset against R. - * Data Source: J. Longley (1967) "An Appraisal of Least Squares - * Programs for the Electronic Computer from the Point of View of the User" - * Journal of the American Statistical Association, vol. 62. September, - * pp. 819-841. - * - * Data are from NIST: - * http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Longley.dat + * Test symmetry of the covariance matrix */ @Test - public void testLonglyByEntry() { - RealMatrix matrix = createRealMatrix(longleyData, 16, 7); - - double[] rData = new double[] { - 12333921.73333333246, 3.679666000000000e+04, 343330206.333333313, - 1649102.666666666744, 1117681.066666666651, 23461965.733333334, 16240.93333333333248, - 36796.66000000000, 1.164576250000000e+02, 1063604.115416667, - 6258.666250000000, 3490.253750000000, 73503.000000000, 50.92333333333334, - 343330206.33333331347, 1.063604115416667e+06, 9879353659.329166412, - 56124369.854166664183, 30880428.345833335072, 685240944.600000024, 470977.90000000002328, - 1649102.66666666674, 6.258666250000000e+03, 56124369.854166664, - 873223.429166666698, -115378.762499999997, 4462741.533333333, 2973.03333333333330, - 1117681.06666666665, 3.490253750000000e+03, 30880428.345833335, - -115378.762499999997, 484304.095833333326, 1764098.133333333, 1382.43333333333339, - 23461965.73333333433, 7.350300000000000e+04, 685240944.600000024, - 4462741.533333333209, 1764098.133333333302, 48387348.933333330, 32917.40000000000146, - 16240.93333333333, 5.092333333333334e+01, 470977.900000000, - 2973.033333333333, 1382.433333333333, 32917.40000000, 22.66666666666667 - }; + public void testSymmetry() { + RealMatrix matrix = createRealMatrix(swissData, 47, 5); - int row = matrix.getRowDimension(); - int col = matrix.getColumnDimension(); - double x = 0.0; - double y = 0.0; - StorelessCovariance covMatrix = new StorelessCovariance(7, 7); - for(int i=0;i<row;i++){ - for(int j=0;j<col;j++){ - x = matrix.getEntry(i, j); - for(int k=0;k<col;k++){ - y = matrix.getEntry(i, k); - covMatrix.incrementCovariance(j, k, x, y); - } - } + final int dimension = 5; + StorelessCovariance storelessCov = new StorelessCovariance(dimension); + for(int i=0;i<matrix.getRowDimension();i++){ + storelessCov.increment(matrix.getRow(i)); } - RealMatrix covarianceMatrix = covMatrix.getCovarianceMatrix(); - - TestUtils.assertEquals("covariance matrix", createRealMatrix(rData, 7, 7), covarianceMatrix, 10E-7); - - } - - /** - * Test R Swiss fertility dataset against R. - * Data Source: R datasets package - */ - @Test - public void testSwissFertilityByEntry() { - RealMatrix matrix = createRealMatrix(swissData, 47, 5); - - double[] rData = new double[] { - 156.0424976873265, 100.1691489361702, -64.36692876965772, -79.7295097132285, 241.5632030527289, - 100.169148936170251, 515.7994172062905, -124.39283071230344, -139.6574005550416, 379.9043755781684, - -64.3669287696577, -124.3928307123034, 63.64662349676226, 53.5758556891767, -190.5606105457909, - -79.7295097132285, -139.6574005550416, 53.57585568917669, 92.4560592044403, -61.6988297872340, - 241.5632030527289, 379.9043755781684, -190.56061054579092, -61.6988297872340, 1739.2945371877890 - }; - - int row = matrix.getRowDimension(); - int col = matrix.getColumnDimension(); - double x = 0.0; - double y = 0.0; - StorelessCovariance covMatrix = new StorelessCovariance(5, 5); - for(int i=0;i<row;i++){ - for(int j=0;j<col;j++){ - x = matrix.getEntry(i, j); - for(int k=0;k<col;k++){ - y = matrix.getEntry(i, k); - covMatrix.incrementCovariance(j, k, x, y); - } + double[][] covMatrix = storelessCov.getData(); + for (int i = 0; i < dimension; i++) { + for (int j = i; j < dimension; j++) { + Assert.assertEquals(covMatrix[i][j], covMatrix[j][i], 10e-9); } } - - RealMatrix covarianceMatrix = covMatrix.getCovarianceMatrix(); - - TestUtils.assertEquals("covariance matrix", createRealMatrix(rData, 5, 5), covarianceMatrix, 10E-13); } - + protected RealMatrix createRealMatrix(double[] data, int nRows, int nCols) { double[][] matrixData = new double[nRows][nCols]; int ptr = 0;