mdiggory 2003/07/09 14:45:24 Modified: math/src/java/org/apache/commons/math/stat Univariate.java StoreUnivariate.java BeanListUnivariateImpl.java UnivariateImpl.java TestStatisticImpl.java AbstractStoreUnivariate.java ListUnivariateImpl.java Frequency.java StatUtils.java TestStatistic.java StoreUnivariateImpl.java math/src/test/org/apache/commons/math/stat StatUtilsTest.java UnivariateImplTest.java Added: math/src/java/org/apache/commons/math/stat AbstractUnivariate.java math/src/test/org/apache/commons/math/stat MixedListUnivariateImplTest.java Log: Changes the Univariate implementations to use the UnivariateStatistic package. Slims down StatUtils by removing some of the higher moments. Reimplmenets ListUnivariate to work with NumberTransformers, turning into a Mixed Object List Univariate. Revision Changes Path 1.8 +26 -6 jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/Univariate.java Index: Univariate.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/Univariate.java,v retrieving revision 1.7 retrieving revision 1.8 diff -u -r1.7 -r1.8 --- Univariate.java 7 Jul 2003 23:25:13 -0000 1.7 +++ Univariate.java 9 Jul 2003 21:45:23 -0000 1.8 @@ -69,12 +69,21 @@ * reported statistics will be based on these values<p> * The default windowSize is "infinite" -- i.e., all values added are included * in all computations. - * - * @author <a href="mailto:[EMAIL PROTECTED]">Tim O'Brien</a> * @version $Revision$ $Date$ - * */ public interface Univariate { + /** + * A LEPTOKURTIC set has a positive kurtosis (a high peak) + */ + public static int LEPTOKURTIC = 1; + /** + * A MESOKURTIC set has a kurtosis of 0 - it is a normal distribution + */ + public static int MESOKURTIC = 0; + /** + * A PLATYKURTIC set has a negative kurtosis (a flat "peak") + */ + public static int PLATYKURTIC = -1; /** * Adds the value to the set of numbers @@ -83,14 +92,14 @@ void addValue(double v); /** - * Returns the <a href=http://www.xycoon.com/arithmetic_mean.htm> + * Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm"> * arithmetic mean </a> of the available values * @return The mean or Double.NaN if no values have been added. */ double getMean(); /** - * Returns the <a href=http://www.xycoon.com/geometric_mean.htm> + * Returns the <a href="http://www.xycoon.com/geometric_mean.htm"> * geometric mean </a> of the available values * @return The geometricMean, Double.NaN if no values have been added, * or if the productof the available values is less than or equal to 0. @@ -127,6 +136,17 @@ */ double getKurtosis(); + /** + * Returns the Kurtosis "classification" a distribution can be + * leptokurtic (high peak), platykurtic (flat peak), + * or mesokurtic (zero kurtosis). + * + * @return A static constant defined in this interface, + * StoredDeviation.LEPTOKURITC, StoredDeviation.PLATYKURTIC, or + * StoredDeviation.MESOKURTIC + */ + int getKurtosisClass(); + /** * Returns the maximum of the available values * @return The max or Double.NaN if no values have been added. 1.7 +1 -43 jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/StoreUnivariate.java Index: StoreUnivariate.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/StoreUnivariate.java,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- StoreUnivariate.java 7 Jul 2003 23:25:13 -0000 1.6 +++ StoreUnivariate.java 9 Jul 2003 21:45:23 -0000 1.7 @@ -60,51 +60,9 @@ * Univariate provides additional percentile functionality * such as. This additional functionality comes with * a price of increased storage costs. - * - * @author <a href="mailto:[EMAIL PROTECTED]">Tim O'Brien</a> + * @version $Revision$ $Date$ */ public interface StoreUnivariate extends Univariate { - - /** - * A LEPTOKURTIC set has a positive kurtosis (a high peak) - */ - public static int LEPTOKURTIC = 1; - - /** - * A MESOKURTIC set has a kurtosis of 0 - it is a normal distribution - */ - public static int MESOKURTIC = 0; - - /** - * A PLATYKURTIC set has a negative kurtosis (a flat "peak") - */ - public static int PLATYKURTIC = -1; - - /** - * Returns the skewness of a given distribution. Skewness is a - * measure of the assymetry of a given distribution. - * - * @return The skewness of this distribution - */ - double getSkewness(); - - /** - * Kurtosis is a measure of the "peakedness" of a distribution - * - * @return the mode - */ - double getKurtosis(); - - /** - * Returns the Kurtosis "classification" a distribution can be - * leptokurtic (high peak), platykurtic (flat peak), - * or mesokurtic (zero kurtosis). - * - * @return A static constant defined in this interface, - * StoredDeviation.LEPTOKURITC, StoredDeviation.PLATYKURTIC, or - * StoredDeviation.MESOKURTIC - */ - int getKurtosisClass(); /** * Returns the current set of values in an array of double primitives. 1.3 +37 -41 jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/BeanListUnivariateImpl.java Index: BeanListUnivariateImpl.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/BeanListUnivariateImpl.java,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- BeanListUnivariateImpl.java 22 Jun 2003 03:57:55 -0000 1.2 +++ BeanListUnivariateImpl.java 9 Jul 2003 21:45:23 -0000 1.3 @@ -54,49 +54,71 @@ package org.apache.commons.math.stat; import java.util.List; -import org.apache.commons.beanutils.PropertyUtils; + +import org.apache.commons.math.util.BeanTransformer; /** * This implementation of StoreUnivariate uses commons-beanutils to gather * univariate statistics for a List of Java Beans by property. This * implementation uses beanutils' PropertyUtils to get a simple, nested, * indexed, mapped, or combined property from an element of a List. - * - * @author <a href="mailto:[EMAIL PROTECTED]">Tim O'Brien</a> + * @version $Revision$ $Date$ */ public class BeanListUnivariateImpl extends ListUnivariateImpl { + /** + * propertyName of the property to get from the bean + */ private String propertyName; + /** + * Construct a BeanListUnivariate with specified + * backing list + * @param list Backing List + */ public BeanListUnivariateImpl(List list) { - super( list ); + super(list); } + /** + * Construct a BeanListUnivariate with specified + * backing list and propertyName + * @param list Backing List + * @param propertyName Bean propertyName + */ public BeanListUnivariateImpl(List list, String propertyName) { - super( list ); - setPropertyName( propertyName ); + super(list); + setPropertyName(propertyName); + this.transformer = new BeanTransformer(propertyName); } + /** + * @return propertyName + */ public String getPropertyName() { return propertyName; } + /** + * @param propertyName Name of Property + */ public void setPropertyName(String propertyName) { - System.out.println( "Set prop name; " + propertyName ); + System.out.println("Set prop name; " + propertyName); this.propertyName = propertyName; + this.transformer = new BeanTransformer(propertyName); } - - /* (non-Javadoc) + /** * @see org.apache.commons.math.Univariate#addValue(double) */ public void addValue(double v) { - String msg = "The BeanListUnivariateImpl does not accept values " + - "through the addValue method. Because elements of this list " + - "are JavaBeans, one must be sure to set the 'propertyName' " + - "property and add new Beans to the underlying list via the " + - "addBean(Object bean) method"; - throw new UnsupportedOperationException( msg ); + String msg = + "The BeanListUnivariateImpl does not accept values " + + "through the addValue method. Because elements of this list " + + "are JavaBeans, one must be sure to set the 'propertyName' " + + "property and add new Beans to the underlying list via the " + + "addBean(Object bean) method"; + throw new UnsupportedOperationException(msg); } /** @@ -106,32 +128,6 @@ */ public void addObject(Object bean) { list.add(bean); - } - - /** - * Reads the property of an element in the list. - * - * @param index The location of the value in the internal List - * @return A Number object representing the value at a given - * index - */ - protected Number getInternalIndex(int index) { - - try { - Number n = (Number) PropertyUtils.getProperty( list.get( index ), - propertyName ); - - return n; - } catch( Exception e ) { - // TODO: We could use a better strategy for error handling - // here. - - // This is a somewhat foolish design decision, but until - // we figure out what needs to be done, let's return NaN - return new Double(Double.NaN); - } - - } } 1.18 +54 -264 jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/UnivariateImpl.java Index: UnivariateImpl.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/UnivariateImpl.java,v retrieving revision 1.17 retrieving revision 1.18 diff -u -r1.17 -r1.18 --- UnivariateImpl.java 7 Jul 2003 23:25:13 -0000 1.17 +++ UnivariateImpl.java 9 Jul 2003 21:45:23 -0000 1.18 @@ -54,8 +54,6 @@ package org.apache.commons.math.stat; import java.io.Serializable; - -import org.apache.commons.math.util.DoubleArray; import org.apache.commons.math.util.FixedDoubleArray; /** @@ -66,203 +64,32 @@ * Integers, floats and longs can be added, but they will be converted * to doubles by addValue(). * - * @author <a href="mailto:[EMAIL PROTECTED]">Tim O'Brien</a> - * @author Brent Worden * @version $Revision$ $Date$ - * */ -public class UnivariateImpl implements Univariate, Serializable { - - /** hold the window size **/ - private int windowSize = Univariate.INFINITE_WINDOW; - - /** Just in case the windowSize is not infinite, we need to - * keep an array to remember values 0 to N - */ - private DoubleArray doubleArray; - - /** count of values that have been added */ - private int n = 0; - - /** sum of values that have been added */ - private double sum = Double.NaN; - - /** sum of the square of each value that has been added */ - private double sumsq = Double.NaN; - - /** min of values that have been added */ - private double min = Double.NaN; - - /** max of values that have been added */ - private double max = Double.NaN; - - /** sumLog of values that have been added */ - private double sumLog = Double.NaN; +public class UnivariateImpl + extends AbstractUnivariate + implements Univariate, Serializable { - /** mean of values that have been added */ - private double mean = Double.NaN; - - /** second moment of values that have been added */ - private double m2 = Double.NaN; - - /** third moment of values that have been added */ - private double m3 = Double.NaN; - - /** fourth moment of values that have been added */ - private double m4 = Double.NaN; - - /** variance of values that have been added */ - private double variance = Double.NaN; - - /** skewness of values that have been added */ - private double skewness = Double.NaN; - - /** kurtosis of values that have been added */ - private double kurtosis = Double.NaN; + /** fixed storage */ + private FixedDoubleArray storage = null; /** Creates new univariate with an infinite window */ public UnivariateImpl() { + super(); } - /** Creates a new univariate with a fixed window **/ - public UnivariateImpl(int window) { - setWindowSize(window); - } - - /* (non-Javadoc) - * @see org.apache.commons.math.stat.Univariate#getN() + /** + * Creates a new univariate with a fixed window + * @param window Window Size */ - public int getN() { - return n; - } - - /* (non-Javadoc) - * @see org.apache.commons.math.stat.Univariate#getSum() - */ - public double getSum() { - if (windowSize != Univariate.INFINITE_WINDOW) { - return StatUtils.sum(doubleArray.getElements()); - } - - return sum; - } - - /* (non-Javadoc) - * @see org.apache.commons.math.stat.Univariate#getSumsq() - */ - public double getSumsq() { - if (windowSize != Univariate.INFINITE_WINDOW) { - return StatUtils.sumSq(doubleArray.getElements()); - } - - return sumsq; - } - - /* (non-Javadoc) - * @see org.apache.commons.math.stat.Univariate#getMean() - */ - public double getMean() { - if (windowSize != Univariate.INFINITE_WINDOW) { - return StatUtils.mean(doubleArray.getElements()); - } - - return mean; - } - - /** - * Returns the standard deviation for this collection of values - * @see org.apache.commons.math.stat.Univariate#getStandardDeviation() - */ - public double getStandardDeviation() { - double stdDev = Double.NaN; - if (getN() != 0) { - stdDev = Math.sqrt(getVariance()); - } - return (stdDev); - } - - /** - * Returns the variance of the values that have been added via West's - * algorithm as described by - * <a href="http://doi.acm.org/10.1145/359146.359152">Chan, T. F. and - * J. G. Lewis 1979, <i>Communications of the ACM</i>, - * vol. 22 no. 9, pp. 526-531.</a>. - * - * @return The variance of a set of values. Double.NaN is returned for - * an empty set of values and 0.0 is returned for a <= 1 value set. - */ - public double getVariance() { - if (windowSize != Univariate.INFINITE_WINDOW) { - variance = StatUtils.variance(doubleArray.getElements()); - } - return variance; - } - - /** - * Returns the skewness of the values that have been added as described by - * <a href="http://mathworld.wolfram.com/k-Statistic.html">Equation (6) for k-Statistics</a>. - * - * @return The skew of a set of values. Double.NaN is returned for - * an empty set of values and 0.0 is returned for a <= 2 value set. - */ - public double getSkewness() { - if (windowSize != Univariate.INFINITE_WINDOW) { - return StatUtils.skewness(doubleArray.getElements()); - } - return skewness; + public UnivariateImpl(int window) { + super(window); + storage = new FixedDoubleArray(window); } /** - * Returns the kurtosis of the values that have been added as described by - * <a href="http://mathworld.wolfram.com/k-Statistic.html">Equation (7) for k-Statistics</a>. - * - * @return The kurtosis of a set of values. Double.NaN is returned for - * an empty set of values and 0.0 is returned for a <= 3 value set. - */ - public double getKurtosis() { - if (windowSize != Univariate.INFINITE_WINDOW) { - return StatUtils.kurtosis(doubleArray.getElements()); - } - return kurtosis; - } - - /* (non-Javadoc) - * @see org.apache.commons.math.stat.Univariate#getMax() - */ - public double getMax() { - if (windowSize != Univariate.INFINITE_WINDOW) { - return StatUtils.max(doubleArray.getElements()); - } - return max; - } - - /* (non-Javadoc) - * @see org.apache.commons.math.stat.Univariate#getMin() - */ - public double getMin() { - if (windowSize != Univariate.INFINITE_WINDOW) { - return StatUtils.min(doubleArray.getElements()); - } - return min; - } - - /* (non-Javadoc) - * @see org.apache.commons.math.stat.Univariate#getGeometricMean() - */ - public double getGeometricMean() { - - if (windowSize != Univariate.INFINITE_WINDOW) { - return StatUtils.geometricMean(doubleArray.getElements()); - } - - if (n == 0) { - return Double.NaN; - } else { - return Math.exp(sumLog / (double) n); - } - } - - /* If windowSize is set to Infinite, moments are calculated using the following + * If windowSize is set to Infinite, moments + * are calculated using the following * <a href="http://www.spss.com/tech/stat/Algorithms/11.5/descriptives.pdf"> * recursive strategy * </a>. @@ -271,15 +98,15 @@ */ public void addValue(double value) { - if (windowSize != Univariate.INFINITE_WINDOW) { + if (storage != null) { /* then all getters deligate to StatUtils * and this clause simply adds/rolls a value in the storage array */ - if (windowSize == n) { - doubleArray.addElementRolling(value); + if (getWindowSize() == n) { + storage.addElementRolling(value); } else { n++; - doubleArray.addElement(value); + storage.addElement(value); } } else { @@ -287,57 +114,18 @@ * is no need to discard the influence of any single item. */ n++; - - if (n <= 1) { - /* if n <= 1, initialize the sumLog, min, max, mean, variance and pre-variance */ - sumLog = 0.0; - sum = min = max = mean = value; - sumsq = value * value; - variance = m2 = 0.0; - skewness = kurtosis = 0.0; - m2 = m3 = m4 = 0.0; - } else { - /* otherwise calc these values */ - sumLog += Math.log(value); - sum += value; - sumsq += value * value; - min = Math.min(min, value); - max = Math.max(max, value); - - double dev = value - mean; - double v = dev / ((double) n); - double v2 = v * v; - - double n0 = (double) n; - double n1 = (double) (n - 1); - double n2 = (double) (n - 2); - double n3 = (double) (n - 3); - - m4 = - m4 - - (4.0 * v * m3) - + (6.0 * v2 * m2) - + ((n0 * n0) - 3 * n1) * (v2 * v2 * n1 * n0); - - m3 = m3 - (3.0 * v * m2) + (n0 * n1 * n2 * v2 * v); - - m2 += n1 * dev * v; - - mean += v; - - variance = (n <= 1) ? 0.0 : m2 / n1; - - skewness = - (n <= 2 || variance < 10E-20) - ? 0.0 - : (n0 * m3) / (n1 * n2 * Math.sqrt(variance) * variance); - - kurtosis = - (n <= 3 || variance < 10E-20) - ? 0.0 - : (n0 * (n0 + 1) * m4 - 3 * m2 * m2 * n1) - / (n1 * n2 * n3 * variance * variance); - } + min.increment(value); + max.increment(value); + sum.increment(value); + sumsq.increment(value); + sumLog.increment(value); + geoMean.increment(value); + + moment.increment(value); + //mean.increment(value); + //variance.increment(value); + //skewness.increment(value); + //kurtosis.increment(value); } } @@ -350,9 +138,9 @@ public String toString() { StringBuffer outBuffer = new StringBuffer(); outBuffer.append("UnivariateImpl:\n"); - outBuffer.append("n: " + n + "\n"); - outBuffer.append("min: " + min + "\n"); - outBuffer.append("max: " + max + "\n"); + outBuffer.append("n: " + getN() + "\n"); + outBuffer.append("min: " + getMin() + "\n"); + outBuffer.append("max: " + getMax() + "\n"); outBuffer.append("mean: " + getMean() + "\n"); outBuffer.append("std dev: " + getStandardDeviation() + "\n"); outBuffer.append("skewness: " + getSkewness() + "\n"); @@ -360,33 +148,35 @@ return outBuffer.toString(); } - /* (non-Javadoc) + /** * @see org.apache.commons.math.Univariate#clear() */ public void clear() { - this.n = 0; - this.min = this.max = Double.NaN; - this.sumLog = this.mean = Double.NaN; - this.variance = this.skewness = this.kurtosis = Double.NaN; - this.m2 = this.m3 = this.m4 = Double.NaN; - if (doubleArray != null) - doubleArray = new FixedDoubleArray(windowSize); + super.clear(); + if (getWindowSize() != INFINITE_WINDOW) { + storage = new FixedDoubleArray(getWindowSize()); + } + } + + /** + * @see org.apache.commons.math.stat.AbstractUnivariate#internalValues() + */ + protected double[] internalValues() { + return storage == null ? null : storage.getValues(); } - /* (non-Javadoc) - * @see org.apache.commons.math.Univariate#getWindowSize() + /** + * @see org.apache.commons.math.stat.AbstractUnivariate#start() */ - public int getWindowSize() { - return windowSize; + protected int start() { + return storage.start(); } - /* (non-Javadoc) - * @see org.apache.commons.math.Univariate#setWindowSize(int) + /** + * @see org.apache.commons.math.stat.AbstractUnivariate#size() */ - public void setWindowSize(int windowSize) { - clear(); - this.windowSize = windowSize; - doubleArray = new FixedDoubleArray(windowSize); + protected int size() { + return storage.getNumElements(); } } 1.4 +1 -2 jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/TestStatisticImpl.java Index: TestStatisticImpl.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/TestStatisticImpl.java,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- TestStatisticImpl.java 7 Jul 2003 23:19:20 -0000 1.3 +++ TestStatisticImpl.java 9 Jul 2003 21:45:23 -0000 1.4 @@ -62,7 +62,6 @@ * Implements test statistics defined in the TestStatistic interface. * * @version $Revision$ $Date$ - * */ public class TestStatisticImpl implements TestStatistic { 1.9 +35 -232 jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/AbstractStoreUnivariate.java Index: AbstractStoreUnivariate.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/AbstractStoreUnivariate.java,v retrieving revision 1.8 retrieving revision 1.9 diff -u -r1.8 -r1.9 --- AbstractStoreUnivariate.java 7 Jul 2003 23:25:13 -0000 1.8 +++ AbstractStoreUnivariate.java 9 Jul 2003 21:45:23 -0000 1.9 @@ -52,267 +52,70 @@ * <http://www.apache.org/>. */ package org.apache.commons.math.stat; + import java.util.Arrays; +import org.apache.commons.math.stat.univariate.rank.Percentile; + /** - * Provides univariate measures for an array of doubles. - * - * @author <a href="mailto:[EMAIL PROTECTED]">Tim O'Brien</a> + * Provides univariate measures for an array of doubles. + * @version $Revision$ $Date$ */ -public abstract class AbstractStoreUnivariate implements StoreUnivariate { +public abstract class AbstractStoreUnivariate + extends AbstractUnivariate + implements StoreUnivariate { + /** Percentile */ + protected Percentile percentile = new Percentile(50); + /** - * Returns the skewness of this collection of values - * @see org.apache.commons.math.stat.StoreUnivariate#getSkewness() + * Create an AbstractStoreUnivariate */ - public double getSkewness() { - // Initialize the skewness - double skewness = Double.NaN; - - // Get the mean and the standard deviation - double mean = getMean(); - double stdDev = getStandardDeviation(); - - // Sum the cubes of the distance from the mean divided by the - // standard deviation - double accum = 0.0; - for (int i = 0; i < getN(); i++) { - accum += Math.pow((getElement(i) - mean) / stdDev, 3.0); - } - - // Get N - double n = getN(); - - // Calculate skewness - skewness = (n / ((n - 1) * (n - 2))) * accum; - - return skewness; + public AbstractStoreUnivariate() { + super(); } /** - * Returns the kurtosis for this collection of values - * @see org.apache.commons.math.stat.StoreUnivariate#getKurtosis() + * Create an AbstractStoreUnivariate with a specific Window + * @param window WindowSIze for stat calculation */ - public double getKurtosis() { - // Initialize the kurtosis - double kurtosis = Double.NaN; - - // Get the mean and the standard deviation - double mean = getMean(); - double stdDev = getStandardDeviation(); - - // Sum the ^4 of the distance from the mean divided by the - // standard deviation - double accum = 0.0; - for (int i = 0; i < getN(); i++) { - accum += Math.pow((getElement(i) - mean) / stdDev, 4.0); - } - - // Get N - double n = getN(); - - double coefficientOne = (n * (n + 1)) / ((n - 1) * (n - 2) * (n - 3)); - double termTwo = ((3 * Math.pow(n - 1, 2.0)) / ((n - 2) * (n - 3))); - // Calculate kurtosis - kurtosis = (coefficientOne * accum) - termTwo; - - return kurtosis; + public AbstractStoreUnivariate(int window) { + super(window); } /** - * Returns the type or class of kurtosis that this collection of - * values exhibits - * @see org.apache.commons.math.stat.StoreUnivariate#getKurtosisClass() - */ - public int getKurtosisClass() { - - int kClass = StoreUnivariate.MESOKURTIC; - - double kurtosis = getKurtosis(); - if (kurtosis > 0) { - kClass = StoreUnivariate.LEPTOKURTIC; - } else if (kurtosis < 0) { - kClass = StoreUnivariate.PLATYKURTIC; - } - - return (kClass); - - } - - /** - * Returns the mean for this collection of values - * @see org.apache.commons.math.stat.Univariate#getMean() - */ - public double getMean() { - double arithMean = getSum() / getN(); - return arithMean; - } - - /** - * Returns the geometric mean for this collection of values - * @see org.apache.commons.math.stat.Univariate#getGeometricMean() - */ - public double getGeometricMean() { - double gMean = Double.NaN; - - if (getN() > 0) { - double sumLog = 0.0; - for (int i = 0; i < getN(); i++) { - sumLog += Math.log(getElement(i)); - } - gMean = Math.exp(sumLog / (double)getN() ); - } - - return gMean; - } - - /** - * Returns the variance for this collection of values - * @see org.apache.commons.math.stat.Univariate#getVariance() + * @see org.apache.commons.math.stat.StoreUnivariate#getPercentile(double) */ - public double getVariance() { - // Initialize variance - double variance = Double.NaN; - - if (getN() == 1) { - // If this is a single value - variance = 0; - } else if (getN() > 1) { - // Get the mean - double mean = getMean(); - - // Calculate the sum of the squares of the distance between each - // value and the mean - double accum = 0.0; - for (int i = 0; i < getN(); i++) { - accum += Math.pow((getElement(i) - mean), 2.0); - } - - // Divide the accumulator by N - Hmmm... unbiased or biased? - variance = accum / (getN() - 1); - } - - return variance; + public double getPercentile(double p) { + percentile.setPercentile(p); + return percentile.evaluate(this.getValues(), this.start(), this.size()); } - + /** - * Returns the standard deviation for this collection of values - * @see org.apache.commons.math.stat.Univariate#getStandardDeviation() + * @see org.apache.commons.math.stat2.AbstractStoreUnivariate#getSortedValues() */ - public double getStandardDeviation() { - double stdDev = Double.NaN; - if (getN() != 0) { - stdDev = Math.sqrt(getVariance()); - } - return (stdDev); + public double[] getSortedValues() { + double[] sort = getValues(); + Arrays.sort(sort); + return sort; } - + /** - * Returns the maximum value contained herein. - * @see org.apache.commons.math.stat.Univariate#getMax() + * @see org.apache.commons.math.stat.Univariate#addValue(double) */ - public double getMax() { - - // Initialize maximum to NaN - double max = Double.NaN; - - for (int i = 0; i < getN(); i++) { - if (i == 0) { - max = getElement(i); - } else { - if (getElement(i) > max) { - max = getElement(i); - } - } - } - - return max; - } + public abstract void addValue(double value); /** - * Returns the minimum value contained herein - * @see org.apache.commons.math.stat.Univariate#getMin() + * @see org.apache.commons.math.stat.StoreUnivariate#getValues() */ - public double getMin() { - // Initialize minimum to NaN - double min = Double.NaN; - - for (int i = 0; i < getN(); i++) { - if (i == 0) { - min = getElement(i); - } else { - if (getElement(i) < min) { - min = getElement(i); - } - } - } + public abstract double[] getValues(); - return min; - } - - /** - * Returns the sum of all values contained herein - * @see org.apache.commons.math.stat.Univariate#getSum() - */ - public double getSum() { - double accum = 0.0; - for (int i = 0; i < getN(); i++) { - accum += getElement(i); - } - return accum; - } /** - * Returns the sun of the squares of all values contained herein - * @see org.apache.commons.math.stat.Univariate#getSumsq() + * @see org.apache.commons.math.stat.StoreUnivariate#getElement(int) */ - public double getSumsq() { - double accum = 0.0; - for (int i = 0; i < getN(); i++) { - accum += Math.pow(getElement(i), 2.0); - } - return accum; - } + public abstract double getElement(int index); - /** - * @see org.apache.commons.math.stat.StoreUnivariate#getSortedValues() - * - */ - public double[] getSortedValues() { - double[] values = getValues(); - Arrays.sort(values); - return values; - } - /** - * Returns an estimate for the pth percentile of the stored values - * @see org.apache.commons.math.stat.StoreUnivariate#getPercentile(double) - */ - public double getPercentile(double p) { - if ((p > 100) || (p <= 0)) { - throw new IllegalArgumentException("invalid percentile value"); - } - double n = (double) getN(); - if (n == 0) { - return Double.NaN; - } - if (n == 1) { - return getElement(0); // always return single value for n = 1 - } - double pos = p * (n + 1) / 100; - double fpos = Math.floor(pos); - int intPos = (int) fpos; - double d = pos - fpos; - double[] sorted = getSortedValues(); - if (pos < 1) { - return sorted[0]; - } - if (pos > n) { - return sorted[getN() - 1]; - } - double lower = sorted[intPos - 1]; - double upper = sorted[intPos]; - return lower + d * (upper - lower); - } } 1.3 +93 -58 jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/ListUnivariateImpl.java Index: ListUnivariateImpl.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/ListUnivariateImpl.java,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- ListUnivariateImpl.java 22 Jun 2003 03:57:55 -0000 1.2 +++ ListUnivariateImpl.java 9 Jul 2003 21:45:23 -0000 1.3 @@ -54,132 +54,167 @@ package org.apache.commons.math.stat; import java.util.List; +import org.apache.commons.math.util.DefaultTransformer; +import org.apache.commons.math.util.NumberTransformer; /** - * @author <a href="mailto:[EMAIL PROTECTED]">Tim O'Brien</a> + * @version $Revision$ $Date$ */ -public class ListUnivariateImpl extends AbstractStoreUnivariate { +public class ListUnivariateImpl + extends AbstractStoreUnivariate + implements StoreUnivariate { - // Holds the value of the windowSize, initial windowSize is the constant - // Univariate.INFINITE_WINDOW - private int windowSize = Univariate.INFINITE_WINDOW; - - // Holds a reference to a list - GENERICs are going to make - // out lives easier here as we could only accept List<Number> - List list; + /** + * Holds a reference to a list - GENERICs are going to make + * out lives easier here as we could only accept List<Number> + */ + protected List list; + + /** Number Transformer maps Objects to Number for us. */ + protected NumberTransformer transformer; + /** + * Construct a ListUnivariate with a specific List. + * @param list The list that will back this Univariate + */ public ListUnivariateImpl(List list) { + super(); + this.list = list; + transformer = new DefaultTransformer(); + } + + /** + * Construct a ListUnivariate with a specific List. + * @param list The list that will back this Univariate + */ + public ListUnivariateImpl(List list, NumberTransformer transformer) { + super(); this.list = list; + this.transformer = transformer; } - /* (non-Javadoc) + /** * @see org.apache.commons.math.StoreUnivariate#getValues() */ public double[] getValues() { - int startIndex = 0; - int endIndex = list.size() - 1; - + int length = list.size(); // If the window size is not INFINITE_WINDOW AND // the current list is larger that the window size, we need to // take into account only the last n elements of the list // as definied by windowSize - if (windowSize != Univariate.INFINITE_WINDOW && - windowSize < list.size()) { - startIndex = (list.size() - 1) - windowSize; + + if (windowSize != Univariate.INFINITE_WINDOW + && windowSize < list.size()) { + length = list.size() - Math.max(0, list.size() - windowSize); } // Create an array to hold all values - double[] copiedArray = new double[list.size() - startIndex]; + double[] copiedArray = new double[length]; - for( int i = startIndex; i <= endIndex; i++ ) { - Number n = (Number) getInternalIndex( i ); - copiedArray[i] = n.doubleValue(); - i++; + for (int i = 0; i < copiedArray.length; i++) { + copiedArray[i] = getElement(i); } - return copiedArray; } - /* (non-Javadoc) + /** * @see org.apache.commons.math.StoreUnivariate#getElement(int) */ public double getElement(int index) { double value = Double.NaN; - if (windowSize != Univariate.INFINITE_WINDOW && - windowSize < list.size()) { - int calcIndex = (list.size() - windowSize) + index; + int calcIndex = index; - Number n = (Number) getInternalIndex(calcIndex); - value = n.doubleValue(); - } else { - Number n = (Number) getInternalIndex(index); - value = n.doubleValue(); + if (windowSize != Univariate.INFINITE_WINDOW + && windowSize < list.size()) { + calcIndex = (list.size() - windowSize) + index; } + + try { + value = transformer.transform(list.get(calcIndex)); + } catch (Exception e) { + e.printStackTrace(); + } + return value; } - /* (non-Javadoc) + /** * @see org.apache.commons.math.Univariate#getN() */ public int getN() { - int N = 0; + int n = 0; if (windowSize != Univariate.INFINITE_WINDOW) { if (list.size() > windowSize) { - N = windowSize; + n = windowSize; } else { - N = list.size(); + n = list.size(); } } else { - N = list.size(); + n = list.size(); } - return N; + return n; } - /* (non-Javadoc) + /** * @see org.apache.commons.math.Univariate#addValue(double) */ public void addValue(double v) { list.add(new Double(v)); } + + /** + * Adds an object to this list. + * @param o Object to add to the list + */ + public void addObject(Object o) { + list.add(o); + } - /* (non-Javadoc) + /** * @see org.apache.commons.math.Univariate#clear() */ public void clear() { + super.clear(); list.clear(); } - /* (non-Javadoc) - * @see org.apache.commons.math.Univariate#getWindowSize() + /** + * @see org.apache.commons.math.stat.AbstractUnivariate#internalValues() */ - public int getWindowSize() { - return windowSize; + protected double[] internalValues() { + return getValues(); } - /* (non-Javadoc) - * @see org.apache.commons.math.Univariate#setWindowSize(int) + /** + * @see org.apache.commons.math.stat.AbstractUnivariate#start() */ - public void setWindowSize(int windowSize) { - this.windowSize = windowSize; + protected int start() { + return 0; } /** - * This function exists to support the function of classes which - * extend the ListUnivariateImpl. - * - * @param index The location of the value in the internal List - * @return A Number object representing the value at a given - * index + * @see org.apache.commons.math.stat.AbstractUnivariate#size() */ - protected Number getInternalIndex(int index) { - - Number n = (Number) list.get( index ); - return n; + protected int size() { + return getN(); + } + /** + * @return + */ + public NumberTransformer getTransformer() { + return transformer; + } + /** + * @param transformer + */ + public void setTransformer(NumberTransformer transformer) { + this.transformer = transformer; } -} + +} \ No newline at end of file 1.4 +2 -1 jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/Frequency.java Index: Frequency.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/Frequency.java,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- Frequency.java 7 Jul 2003 23:19:20 -0000 1.3 +++ Frequency.java 9 Jul 2003 21:45:23 -0000 1.4 @@ -62,7 +62,8 @@ * Maintains a frequency distribution. <br> * Accepts int, long or string values, converting * all to Strings and maintaining frequency counts. - * + * + * @version $Revision$ $Date$ */ public class Frequency { /** name for this frequency distribution. */ 1.14 +1 -169 jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/StatUtils.java Index: StatUtils.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/StatUtils.java,v retrieving revision 1.13 retrieving revision 1.14 diff -u -r1.13 -r1.14 --- StatUtils.java 7 Jul 2003 23:25:13 -0000 1.13 +++ StatUtils.java 9 Jul 2003 21:45:23 -0000 1.14 @@ -57,6 +57,7 @@ * StatUtils provides easy static implementations of common double[] based * statistical methods. These return a single result value or in some cases, as * identified in the javadoc for each method, Double.NaN. + * @version $Revision$ $Date$ */ public class StatUtils { @@ -161,29 +162,6 @@ } /** - * Returns the geometric mean for this collection of values - * @param values Is a double[] containing the values - * @return the geometric mean or Double.NaN if the array is empty or - * any of the values are <= 0. - */ - public static double geometricMean(double[] values) { - return geometricMean(values, 0, values.length); - } - - /** - * Returns the geometric mean for this collection of values - * @param values Is a double[] containing the values - * @param begin processing at this point in the array - * @param length processing at this point in the array - * @return the geometric mean or Double.NaN if the array is empty or - * any of the values are <= 0. - */ - public static double geometricMean(double[] values, int begin, int length) { - testInput(values, begin, length); - return Math.exp(sumLog(values, begin, length) / (double) length ); - } - - /** * Returns the <a href=http://www.xycoon.com/arithmetic_mean.htm> * arithmetic mean </a> of the available values * @param values Is a double[] containing the values @@ -207,36 +185,6 @@ } /** - * - * @param values Is a double[] containing the values - * @return the result, Double.NaN if no values for an empty array - * or 0.0 for a single value set. - */ - public static double standardDeviation(double[] values) { - return standardDeviation(values, 0, values.length); - } - - /** - * - * @param values Is a double[] containing the values - * @param begin processing at this point in the array - * @param length processing at this point in the array - * @return the result, Double.NaN if no values for an empty array - * or 0.0 for a single value set. - */ - public static double standardDeviation( - double[] values, - int begin, - int length) { - testInput(values, begin, length); - double stdDev = Double.NaN; - if (values.length != 0) { - stdDev = Math.sqrt(variance(values, begin, length)); - } - return (stdDev); - } - - /** * Returns the variance of the available values. This uses a corrected * two pass algorithm of the following * <a href="http://lib-www.lanl.gov/numerical/bookcpdf/c14-1.pdf"> @@ -289,122 +237,6 @@ return variance; } - /** - * Returns the skewness of a collection of values. Skewness is a - * measure of the assymetry of a given distribution. - * @param values Is a double[] containing the values - * @return the skewness of the values or Double.NaN if the array is empty - */ - public static double skewness(double[] values) { - return skewness(values, 0, values.length); - } - /** - * Returns the skewness of a collection of values. Skewness is a - * measure of the assymetry of a given distribution. - * @param values Is a double[] containing the values - * @param begin processing at this point in the array - * @param length processing at this point in the array - * @return the skewness of the values or Double.NaN if the array is empty - */ - public static double skewness(double[] values, int begin, int length) { - - testInput(values, begin, length); - - // Initialize the skewness - double skewness = Double.NaN; - - // Get the mean and the standard deviation - double mean = mean(values, begin, length); - - // Calc the std, this is implemented here instead of using the - // standardDeviation method eliminate a duplicate pass to get the mean - double accum = 0.0; - double accum2 = 0.0; - for (int i = begin; i < begin + length; i++) { - accum += Math.pow((values[i] - mean), 2.0); - accum2 += (values[i] - mean); - } - double stdDev = - Math.sqrt( - (accum - (Math.pow(accum2, 2) / ((double) length))) - / (double) (length - 1)); - - // Calculate the skew as the sum the cubes of the distance - // from the mean divided by the standard deviation. - double accum3 = 0.0; - for (int i = begin; i < begin + length; i++) { - accum3 += Math.pow((values[i] - mean) / stdDev, 3.0); - } - - // Get N - double n = length; - - // Calculate skewness - skewness = (n / ((n - 1) * (n - 2))) * accum3; - - return skewness; - } - - /** - * Returns the kurtosis for this collection of values. Kurtosis is a - * measure of the "peakedness" of a distribution. - * @param values Is a double[] containing the values - * @return the kurtosis of the values or Double.NaN if the array is empty - */ - public static double kurtosis(double[] values) { - return kurtosis(values, 0, values.length); - } - - /** - * Returns the kurtosis for this collection of values. Kurtosis is a - * measure of the "peakedness" of a distribution. - * @param values Is a double[] containing the values - * @param begin processing at this point in the array - * @param length processing at this point in the array - * @return the kurtosis of the values or Double.NaN if the array is empty - */ - public static double kurtosis(double[] values, int begin, int length) { - testInput(values, begin, length); - - // Initialize the kurtosis - double kurtosis = Double.NaN; - - // Get the mean and the standard deviation - double mean = mean(values, begin, length); - - // Calc the std, this is implemented here instead of using the - // standardDeviation method eliminate a duplicate pass to get the mean - double accum = 0.0; - double accum2 = 0.0; - for (int i = begin; i < begin + length; i++) { - accum += Math.pow((values[i] - mean), 2.0); - accum2 += (values[i] - mean); - } - - double stdDev = - Math.sqrt( - (accum - (Math.pow(accum2, 2) / ((double) length))) - / (double) (length - 1)); - - // Sum the ^4 of the distance from the mean divided by the - // standard deviation - double accum3 = 0.0; - for (int i = begin; i < begin + length; i++) { - accum3 += Math.pow((values[i] - mean) / stdDev, 4.0); - } - - // Get N - double n = length; - - double coefficientOne = (n * (n + 1)) / ((n - 1) * (n - 2) * (n - 3)); - double termTwo = ((3 * Math.pow(n - 1, 2.0)) / ((n - 2) * (n - 3))); - - // Calculate kurtosis - kurtosis = (coefficientOne * accum3) - termTwo; - - return kurtosis; - } - /** * Returns the maximum of the available values * @param values Is a double[] containing the values 1.4 +1 -2 jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/TestStatistic.java Index: TestStatistic.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/TestStatistic.java,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- TestStatistic.java 7 Jul 2003 23:19:20 -0000 1.3 +++ TestStatistic.java 9 Jul 2003 21:45:23 -0000 1.4 @@ -55,8 +55,7 @@ /** * A collection of commonly used test statistics and statistical tests. * - * @version $Revision$ $Date$ - * + * @version $Revision$ $Date$ */ public interface TestStatistic { 1.4 +54 -34 jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/StoreUnivariateImpl.java Index: StoreUnivariateImpl.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/StoreUnivariateImpl.java,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- StoreUnivariateImpl.java 27 Jun 2003 20:58:28 -0000 1.3 +++ StoreUnivariateImpl.java 9 Jul 2003 21:45:23 -0000 1.4 @@ -56,81 +56,80 @@ import org.apache.commons.math.util.ContractableDoubleArray; /** - * @author <a href="mailto:[EMAIL PROTECTED]">Tim O'Brien</a> + * @version $Revision$ $Date$ */ public class StoreUnivariateImpl extends AbstractStoreUnivariate { - // Use an internal double array - ContractableDoubleArray eDA; - - // Store the windowSize - private int windowSize = Univariate.INFINITE_WINDOW; + /** A contractable double array is used. memory is reclaimed when + * the storage of the array becomes too empty. + */ + protected ContractableDoubleArray eDA; + /** + * Construct a StoreUnivariateImpl + */ public StoreUnivariateImpl() { - // A contractable double array is used. memory is reclaimed when - // the storage of the array becomes too empty. eDA = new ContractableDoubleArray(); } - /* (non-Javadoc) + /** * @see org.apache.commons.math.StoreUnivariate#getValues() */ public double[] getValues() { - double[] copiedArray = new double[ eDA.getNumElements() ]; - System.arraycopy( eDA.getElements(), 0, - copiedArray, 0, eDA.getNumElements()); + double[] copiedArray = new double[eDA.getNumElements()]; + System.arraycopy( + eDA.getElements(), + 0, + copiedArray, + 0, + eDA.getNumElements()); return copiedArray; } - /* (non-Javadoc) + /** * @see org.apache.commons.math.StoreUnivariate#getElement(int) */ public double getElement(int index) { return eDA.getElement(index); } - /* (non-Javadoc) + /** * @see org.apache.commons.math.Univariate#getN() */ public int getN() { return eDA.getNumElements(); } - /* (non-Javadoc) + /** * @see org.apache.commons.math.Univariate#addValue(double) */ public synchronized void addValue(double v) { - if( windowSize != Univariate.INFINITE_WINDOW ) { - if( getN() == windowSize ) { - eDA.addElementRolling( v ); - } else if( getN() < windowSize ) { + if (windowSize != Univariate.INFINITE_WINDOW) { + if (getN() == windowSize) { + eDA.addElementRolling(v); + } else if (getN() < windowSize) { eDA.addElement(v); } else { - String msg = "A window Univariate had more element than " + - "the windowSize. This is an inconsistent state."; - throw new RuntimeException( msg ); + String msg = + "A window Univariate had more element than " + + "the windowSize. This is an inconsistent state."; + throw new RuntimeException(msg); } } else { eDA.addElement(v); } } - /* (non-Javadoc) + /** * @see org.apache.commons.math.Univariate#clear() */ public synchronized void clear() { + super.clear(); eDA.clear(); } - /* (non-Javadoc) - * @see org.apache.commons.math.Univariate#getWindowSize() - */ - public int getWindowSize() { - return windowSize; - } - - /* (non-Javadoc) + /** * @see org.apache.commons.math.Univariate#setWindowSize(int) */ public synchronized void setWindowSize(int windowSize) { @@ -139,8 +138,29 @@ // We need to check to see if we need to discard elements // from the front of the array. If the windowSize is less than // the current number of elements. - if( windowSize < eDA.getNumElements() ) { - eDA.discardFrontElements( eDA.getNumElements() - windowSize); + if (windowSize < eDA.getNumElements()) { + eDA.discardFrontElements(eDA.getNumElements() - windowSize); } } -} + + /** + * @see org.apache.commons.math.stat.AbstractUnivariate#internalValues() + */ + protected double[] internalValues() { + return eDA.getValues(); + } + + /** + * @see org.apache.commons.math.stat.AbstractUnivariate#start() + */ + protected int start() { + return eDA.start(); + } + + /** + * @see org.apache.commons.math.stat.AbstractUnivariate#size() + */ + protected int size() { + return eDA.getNumElements(); + } +} \ No newline at end of file 1.1 jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/AbstractUnivariate.java Index: AbstractUnivariate.java =================================================================== /* ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2003 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, if * any, must include the following acknowlegement: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowlegement may appear in the software itself, * if and wherever such third-party acknowlegements normally appear. * * 4. The names "The Jakarta Project", "Commons", and "Apache Software * Foundation" must not be used to endorse or promote products derived * from this software without prior written permission. For written * permission, please contact [EMAIL PROTECTED] * * 5. Products derived from this software may not be called "Apache" * nor may "Apache" appear in their names without prior written * permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. */ package org.apache.commons.math.stat; import org.apache.commons.math.stat.univariate.moment.FourthMoment; import org.apache.commons.math.stat.univariate.moment.GeometricMean; import org.apache.commons.math.stat.univariate.moment.Kurtosis; import org.apache.commons.math.stat.univariate.moment.Mean; import org.apache.commons.math.stat.univariate.moment.Skewness; import org.apache.commons.math.stat.univariate.moment.Variance; import org.apache.commons.math.stat.univariate.rank.Max; import org.apache.commons.math.stat.univariate.rank.Min; import org.apache.commons.math.stat.univariate.summary.Sum; import org.apache.commons.math.stat.univariate.summary.SumOfLogs; import org.apache.commons.math.stat.univariate.summary.SumOfSquares; /** * Provides univariate measures for an array of doubles. * @version $Revision: 1.1 $ $Date: 2003/07/09 21:45:23 $ */ public abstract class AbstractUnivariate implements Univariate { /** hold the window size **/ protected int windowSize = Univariate.INFINITE_WINDOW; /** count of values that have been added */ protected int n = 0; /** FourthMoment is used in calculating mean, variance,skew and kurtosis */ protected FourthMoment moment = null; /** sum of values that have been added */ protected Sum sum = null; /** sum of the square of each value that has been added */ protected SumOfSquares sumsq = null; /** min of values that have been added */ protected Min min = null; /** max of values that have been added */ protected Max max = null; /** sumLog of values that have been added */ protected SumOfLogs sumLog = null; /** geoMean of values that have been added */ protected GeometricMean geoMean = null; /** mean of values that have been added */ protected Mean mean = null; /** variance of values that have been added */ protected Variance variance = null; /** skewness of values that have been added */ protected Skewness skewness = null; /** kurtosis of values that have been added */ protected Kurtosis kurtosis = null; /** * Construct an AbstractUnivariate */ public AbstractUnivariate() { super(); sum = new Sum(); sumsq = new SumOfSquares(); min = new Min(); max = new Max(); sumLog = new SumOfLogs(); geoMean = new GeometricMean(); moment = new FourthMoment(); mean = new Mean(moment); variance = new Variance(moment); skewness = new Skewness(moment); kurtosis = new Kurtosis(moment); } /** * Construct an AbstractUnivariate with a window * @param window The Window Size */ public AbstractUnivariate(int window) { this(); setWindowSize(window); } /** * Returns the internalValues array. * @return the array */ protected abstract double[] internalValues(); /** * Returns the start index of the array * @return start index */ protected abstract int start(); /** * Returns the size of the array appropriate for doing calculations. * @return Usually this is just numElements. */ protected abstract int size(); /** * If windowSize is set to Infinite, * statistics are calculated using the following * <a href="http://www.spss.com/tech/stat/Algorithms/11.5/descriptives.pdf"> * recursive strategy * </a>. * @see org.apache.commons.math.stat.Univariate#addValue(double) */ public abstract void addValue(double value); /** * @see org.apache.commons.math.stat.Univariate#getN() */ public int getN() { return n; } /** * @see org.apache.commons.math.stat.Univariate#getSum() */ public double getSum() { double[] v = internalValues(); if (v != null) { return sum.evaluate(v, this.start(), this.size()); } return sum.getResult(); } /** * @see org.apache.commons.math.stat.Univariate#getSumsq() */ public double getSumsq() { double[] v = internalValues(); if (v != null) { return sumsq.evaluate(v, this.start(), this.size()); } return sumsq.getResult(); } /** * @see org.apache.commons.math.stat.Univariate#getMean() */ public double getMean() { double[] v = internalValues(); if (v != null) { return mean.evaluate(v, this.start(), this.size()); } return mean.getResult(); } /** * Returns the standard deviation for this collection of values * @see org.apache.commons.math.stat.Univariate#getStandardDeviation() */ public double getStandardDeviation() { double stdDev = Double.NaN; if (getN() > 0) { if (getN() > 1) { stdDev = Math.sqrt(getVariance()); } else { stdDev = 0.0; } } return (stdDev); } /** * Returns the variance of the values that have been added via West's * algorithm as described by * <a href="http://doi.acm.org/10.1145/359146.359152">Chan, T. F. and * J. G. Lewis 1979, <i>Communications of the ACM</i>, * vol. 22 no. 9, pp. 526-531.</a>. * * @return The variance of a set of values. * Double.NaN is returned for an empty * set of values and 0.0 is returned for * a <= 1 value set. */ public double getVariance() { double[] v = internalValues(); if (v != null) { return variance.evaluate(v, this.start(), this.size()); } return variance.getResult(); } /** * Returns the skewness of the values that have been added as described by * <a href="http://mathworld.wolfram.com/k-Statistic.html"> * Equation (6) for k-Statistics</a>. * @return The skew of a set of values. Double.NaN is returned for * an empty set of values and 0.0 is returned for a * <= 2 value set. */ public double getSkewness() { double[] v = internalValues(); if (v != null) { return skewness.evaluate(v, this.start(), this.size()); } return skewness.getResult(); } /** * Returns the kurtosis of the values that have been added as described by * <a href="http://mathworld.wolfram.com/k-Statistic.html"> * Equation (7) for k-Statistics</a>. * * @return The kurtosis of a set of values. Double.NaN is returned for * an empty set of values and 0.0 is returned for a <= 3 * value set. */ public double getKurtosis() { double[] v = internalValues(); if (v != null) { return kurtosis.evaluate(v, this.start(), this.size()); } return kurtosis.getResult(); } /** * @see org.apache.commons.math.stat.StoreUnivariate#getKurtosisClass() */ public int getKurtosisClass() { int kClass = Univariate.MESOKURTIC; double kurtosis = getKurtosis(); if (kurtosis > 0) { kClass = Univariate.LEPTOKURTIC; } else if (kurtosis < 0) { kClass = Univariate.PLATYKURTIC; } return (kClass); } /** * @see org.apache.commons.math.stat.Univariate#getMax() */ public double getMax() { double[] v = internalValues(); if (v != null) { return max.evaluate(v, this.start(), this.size()); } return max.getResult(); } /** * @see org.apache.commons.math.stat.Univariate#getMin() */ public double getMin() { double[] v = internalValues(); if (v != null) { return min.evaluate(v, this.start(), this.size()); } return min.getResult(); } /** * @see org.apache.commons.math.stat.Univariate#getGeometricMean() */ public double getGeometricMean() { double[] v = internalValues(); if (v != null) { return geoMean.evaluate(v, this.start(), this.size()); } return geoMean.getResult(); } /** * Generates a text report displaying * univariate statistics from values that * have been added. * @return String with line feeds displaying statistics */ public String toString() { StringBuffer outBuffer = new StringBuffer(); outBuffer.append("UnivariateImpl:\n"); outBuffer.append("n: " + n + "\n"); outBuffer.append("min: " + min + "\n"); outBuffer.append("max: " + max + "\n"); outBuffer.append("mean: " + getMean() + "\n"); outBuffer.append("std dev: " + getStandardDeviation() + "\n"); outBuffer.append("skewness: " + getSkewness() + "\n"); outBuffer.append("kurtosis: " + getKurtosis() + "\n"); return outBuffer.toString(); } /** * @see org.apache.commons.math.Univariate#clear() */ public void clear() { this.n = 0; min.clear(); max.clear(); sum.clear(); sumLog.clear(); sumsq.clear(); geoMean.clear(); moment.clear(); mean.clear(); variance.clear(); skewness.clear(); kurtosis.clear(); } /** * @see org.apache.commons.math.Univariate#getWindowSize() */ public int getWindowSize() { return windowSize; } /** * @see org.apache.commons.math.Univariate#setWindowSize(int) */ public void setWindowSize(int windowSize) { clear(); this.windowSize = windowSize; } } 1.6 +1 -20 jakarta-commons-sandbox/math/src/test/org/apache/commons/math/stat/StatUtilsTest.java Index: StatUtilsTest.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/math/src/test/org/apache/commons/math/stat/StatUtilsTest.java,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- StatUtilsTest.java 7 Jul 2003 23:25:14 -0000 1.5 +++ StatUtilsTest.java 9 Jul 2003 21:45:24 -0000 1.6 @@ -99,11 +99,6 @@ assertEquals("sum", sum, StatUtils.sum(values), tolerance); assertEquals("sumsq", sumSq, StatUtils.sumSq(values), tolerance); assertEquals("var", var, StatUtils.variance(values), tolerance); - assertEquals( - "std", - std, - StatUtils.standardDeviation(values), - tolerance); assertEquals("mean", mean, StatUtils.mean(values), tolerance); assertEquals("min", min, StatUtils.min(values), tolerance); assertEquals("max", max, StatUtils.max(values), tolerance); @@ -116,9 +111,6 @@ "Mean of n = 0 set should be NaN", Double.isNaN(StatUtils.mean(values))); assertTrue( - "Standard Deviation of n = 0 set should be NaN", - Double.isNaN(StatUtils.standardDeviation(values))); - assertTrue( "Variance of n = 0 set should be NaN", Double.isNaN(StatUtils.variance(values))); @@ -128,10 +120,6 @@ "Mean of n = 1 set should be value of single item n1", StatUtils.mean(values) == one); assertTrue( - "StdDev of n = 1 set should be zero, instead it is: " - + StatUtils.standardDeviation(values), - StatUtils.standardDeviation(values) == 0); - assertTrue( "Variance of n = 1 set should be zero", StatUtils.variance(values) == 0); } @@ -165,8 +153,6 @@ assertEquals("mean", 12.40455, StatUtils.mean(values), 0.0001); assertEquals("variance", 10.00236, StatUtils.variance(values), 0.0001); - assertEquals("skewness", 1.437424, StatUtils.skewness(values), 0.0001); - assertEquals("kurtosis", 2.37719, StatUtils.kurtosis(values), 0.0001); } public void testProductAndGeometricMean() throws Exception { @@ -177,11 +163,6 @@ 24.0, StatUtils.product(values), Double.MIN_VALUE); - assertEquals( - "Geometric mean not expected", - 2.213364, - StatUtils.geometricMean(values), - 0.00001); } public void testArrayIndexConditions() throws Exception { 1.6 +2 -2 jakarta-commons-sandbox/math/src/test/org/apache/commons/math/stat/UnivariateImplTest.java Index: UnivariateImplTest.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/math/src/test/org/apache/commons/math/stat/UnivariateImplTest.java,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- UnivariateImplTest.java 7 Jul 2003 23:19:19 -0000 1.5 +++ UnivariateImplTest.java 9 Jul 2003 21:45:24 -0000 1.6 @@ -130,7 +130,7 @@ u.addValue(one); assertTrue("mean should be one (n = 1)", u.getMean() == one); - assertTrue("geometric should be one (n = 1)", + assertTrue("geometric should be one (n = 1) instead it is " + u.getGeometricMean(), u.getGeometricMean() == one); assertTrue("Std should be zero (n = 1)", u.getStandardDeviation() == 0.0); 1.1 jakarta-commons-sandbox/math/src/test/org/apache/commons/math/stat/MixedListUnivariateImplTest.java Index: MixedListUnivariateImplTest.java =================================================================== /* ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2003 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, if * any, must include the following acknowlegement: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowlegement may appear in the software itself, * if and wherever such third-party acknowlegements normally appear. * * 4. The names "The Jakarta Project", "Commons", and "Apache Software * Foundation" must not be used to endorse or promote products derived * from this software without prior written permission. For written * permission, please contact [EMAIL PROTECTED] * * 5. Products derived from this software may not be called "Apache" * nor may "Apache" appear in their names without prior written * permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. */ package org.apache.commons.math.stat; import java.util.ArrayList; import java.util.List; import org.apache.commons.math.util.NumberTransformer; import org.apache.commons.math.util.TransformerMap; import junit.framework.Test; import junit.framework.TestCase; import junit.framework.TestSuite; /** * Test cases for the [EMAIL PROTECTED] Univariate} class. * * @version $Revision: 1.1 $ $Date: 2003/07/09 21:45:24 $ */ public final class MixedListUnivariateImplTest extends TestCase { private double one = 1; private float two = 2; private int three = 3; private double mean = 2; private double sumSq = 18; private double sum = 8; private double var = 0.666666666666666666667; private double std = Math.sqrt(var); private double n = 4; private double min = 1; private double max = 3; private double skewness = 0; private double kurtosis = 0.5; private int kClass = StoreUnivariate.LEPTOKURTIC; private double tolerance = 10E-15; private TransformerMap transformers = new TransformerMap(); public MixedListUnivariateImplTest(String name) { super(name); transformers = new TransformerMap(); transformers.putTransformer(Foo.class, new NumberTransformer() { public double transform(Object o) { return Double.parseDouble(((Foo) o).heresFoo()); } }); transformers.putTransformer(Bar.class, new NumberTransformer() { public double transform(Object o) { return Double.parseDouble(((Bar) o).heresBar()); } }); } public void setUp() { } public static Test suite() { TestSuite suite = new TestSuite(MixedListUnivariateImplTest.class); suite.setName("Mixed List Tests"); return suite; } /** test stats */ public void testStats() { List externalList = new ArrayList(); StoreUnivariate u = new ListUnivariateImpl(externalList,transformers); assertEquals("total count", 0, u.getN(), tolerance); u.addValue(one); u.addValue(two); u.addValue(two); u.addValue(three); assertEquals("N", n, u.getN(), tolerance); assertEquals("sum", sum, u.getSum(), tolerance); assertEquals("sumsq", sumSq, u.getSumsq(), tolerance); assertEquals("var", var, u.getVariance(), tolerance); assertEquals("std", std, u.getStandardDeviation(), tolerance); assertEquals("mean", mean, u.getMean(), tolerance); assertEquals("min", min, u.getMin(), tolerance); assertEquals("max", max, u.getMax(), tolerance); u.clear(); assertEquals("total count", 0, u.getN(), tolerance); } public void testN0andN1Conditions() throws Exception { List list = new ArrayList(); StoreUnivariate u = new ListUnivariateImpl(new ArrayList(),transformers); assertTrue( "Mean of n = 0 set should be NaN", Double.isNaN(u.getMean())); assertTrue( "Standard Deviation of n = 0 set should be NaN", Double.isNaN(u.getStandardDeviation())); assertTrue( "Variance of n = 0 set should be NaN", Double.isNaN(u.getVariance())); u.addValue(one); assertTrue( "Mean of n = 1 set should be value of single item n1, instead it is " + u.getMean() , u.getMean() == one); assertTrue( "StdDev of n = 1 set should be zero, instead it is: " + u.getStandardDeviation(), u.getStandardDeviation() == 0); assertTrue( "Variance of n = 1 set should be zero", u.getVariance() == 0); } public void testSkewAndKurtosis() { ListUnivariateImpl u = new ListUnivariateImpl(new ArrayList(), transformers); u.addObject("12.5"); u.addObject(new Integer(12)); u.addObject("11.8"); u.addObject("14.2"); u.addObject(new Foo()); u.addObject("14.5"); u.addObject(new Long(21)); u.addObject("8.2"); u.addObject("10.3"); u.addObject("11.3"); u.addObject(new Float(14.1)); u.addObject("9.9"); u.addObject("12.2"); u.addObject(new Bar()); u.addObject("12.1"); u.addObject("11"); u.addObject(new Double(19.8)); u.addObject("11"); u.addObject("10"); u.addObject("8.8"); u.addObject("9"); u.addObject("12.3"); assertEquals("mean", 12.40455, u.getMean(), 0.0001); assertEquals("variance", 10.00236, u.getVariance(), 0.0001); assertEquals("skewness", 1.437424, u.getSkewness(), 0.0001); assertEquals("kurtosis", 2.37719, u.getKurtosis(), 0.0001); } public void testProductAndGeometricMean() throws Exception { ListUnivariateImpl u = new ListUnivariateImpl(new ArrayList(),transformers); u.setWindowSize(10); u.addValue(1.0); u.addValue(2.0); u.addValue(3.0); u.addValue(4.0); assertEquals( "Geometric mean not expected", 2.213364, u.getGeometricMean(), 0.00001); // Now test rolling - UnivariateImpl should discount the contribution // of a discarded element for (int i = 0; i < 10; i++) { u.addValue(i + 2); } // Values should be (2,3,4,5,6,7,8,9,10,11) assertEquals( "Geometric mean not expected", 5.755931, u.getGeometricMean(), 0.00001); } public final class Foo { public String heresFoo() { return "14.9"; } } public final class Bar { public String heresBar() { return "12.0"; } } }
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]