mdiggory    2003/06/18 06:47:35

  Modified:    math/src/java/org/apache/commons/math/stat
                        UnivariateImpl.java
  Log:
  Degegates to StatUtils now for "window" case. Implemented skew and kurt using 
recursive moments.
  
  Revision  Changes    Path
  1.10      +316 -345  
jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/UnivariateImpl.java
  
  Index: UnivariateImpl.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/UnivariateImpl.java,v
  retrieving revision 1.9
  retrieving revision 1.10
  diff -u -r1.9 -r1.10
  --- UnivariateImpl.java       17 Jun 2003 17:10:15 -0000      1.9
  +++ UnivariateImpl.java       18 Jun 2003 13:47:35 -0000      1.10
  @@ -18,7 +18,7 @@
    *
    * 3. The end-user documentation included with the redistribution, if
    *    any, must include the following acknowlegement:
  - *       "This product includes software developed by the
  + *       "This sumLog includes software developed by the
    *        Apache Software Foundation (http://www.apache.org/)."
    *    Alternately, this acknowlegement may appear in the software itself,
    *    if and wherever such third-party acknowlegements normally appear.
  @@ -76,347 +76,318 @@
   */
   public class UnivariateImpl implements Univariate, Serializable {
   
  -    /** hold the window size **/
  -    private int windowSize = Univariate.INFINITE_WINDOW;
  +     /** hold the window size **/
  +     private int windowSize = Univariate.INFINITE_WINDOW;
   
  -    /** Just in case the windowSize is not infinite, we need to
  -     *  keep an array to remember values 0 to N
  -     */
  -    private DoubleArray doubleArray;
  -
  -    /** count of values that have been added */
  -    private int n = 0;
  -
  -    /** min of values that have been added */
  -    private double min = Double.MAX_VALUE;
  -
  -    /** max of values that have been added */
  -    private double max = Double.MIN_VALUE;
  -
  -    /** product of values that have been added */
  -    private double product = Double.NaN;
  -
  -    /** mean of values that have been added */
  -    private double mean = Double.NaN ;
  -
  -    /** running ( variance * (n - 1) ) of values that have been added */
  -    private double pre_variance = Double.NaN ;
  -
  -    /** variance of values that have been added */
  -    private double variance = Double.NaN ;
  -
  -    /** running sum of values that have been added */
  -    private double sum = 0.0;
  -
  -    /** running sum of squares that have been added */
  -    private double sumsq = 0.0;
  -
  -    /** running sum of 3rd powers that have been added */
  -    private double sumCube = 0.0;
  -
  -    /** running sum of 4th powers that have been added */
  -    private double sumQuad = 0.0;
  -
  -    /** Creates new univariate with an infinite window */
  -    public UnivariateImpl() {
  -        clear();
  -    }
  -
  -    /** Creates a new univariate with a fixed window **/
  -    public UnivariateImpl(int window) {
  -        windowSize = window;
  -        doubleArray = new FixedDoubleArray( window );
  -    }
  -
  -    /**
  -     * @see org.apache.commons.math.stat.Univariate#addValue(double)
  -     */
  -    public void addValue(double v) {
  -        insertValue(v);
  -    }
  -
  -    /**
  -     * @see org.apache.commons.math.stat.Univariate#getMean()
  -     */
  -    public double getMean() {
  -        return mean ;
  -    }
  -
  -    /**
  -     * @see org.apache.commons.math.stat.Univariate#getGeometricMean()
  -     */
  -    public double getGeometricMean() {
  -        if ((product <= 0.0) || (n == 0)) {
  -            return Double.NaN;
  -        } else {
  -            return Math.pow(product,( 1.0 / (double) n ) );
  -        }
  -    }
  -
  -    /**
  -     * @see org.apache.commons.math.stat.Univariate#getProduct()
  -     */
  -    public double getProduct() {
  -        return product;
  -    }
  -
  -    /**
  -     * @see org.apache.commons.math.stat.Univariate#getStandardDeviation()
  -     */
  -    public double getStandardDeviation() {
  -        double variance = getVariance();
  -
  -        if ((variance == 0.0) || (variance == Double.NaN)) {
  -            return variance;
  -        } else {
  -            return Math.sqrt(variance);
  -        }
  -    }
  -
  -    /**
  -     * Returns the variance of the values that have been added via West's
  -     * algorithm as described by
  -     * <a href="http://doi.acm.org/10.1145/359146.359152";>Chan, T. F. and
  -     * J. G. Lewis 1979, <i>Communications of the ACM</i>,
  -     * vol. 22 no. 9, pp. 526-531.</a>.
  -     *
  -     * @return The variance of a set of values.  Double.NaN is returned for
  -     *         an empty set of values and 0.0 is returned for a &lt;= 1 value set.
  -     */
  -    public double getVariance() {
  -        return variance ;
  -    }
  -
  -    /**
  -     * Returns the skewness of the values that have been added as described by
  -     * <a href="http://mathworld.wolfram.com/k-Statistic.html";>Equation (6) for 
k-Statistics</a>.
  -     *
  -     * @return The skew of a set of values.  Double.NaN is returned for
  -     *         an empty set of values and 0.0 is returned for a &lt;= 2 value set.
  -     */
  -    public double getSkewness() {
  -
  -        if( n < 1) return Double.NaN;
  -        if( n <= 2 ) return 0.0;
  -
  -        return ( 2 * Math.pow(sum, 3) - 3 * sum * sumsq + ((double) (n * n)) * 
sumCube ) /
  -               ( (double) (n * (n - 1) * (n - 2)) ) ;
  -    }
  -
  -    /**
  -     * Returns the kurtosis of the values that have been added as described by
  -     * <a href="http://mathworld.wolfram.com/k-Statistic.html";>Equation (7) for 
k-Statistics</a>.
  -     *
  -     * @return The kurtosis of a set of values.  Double.NaN is returned for
  -     *         an empty set of values and 0.0 is returned for a &lt;= 3 value set.
  -     */
  -    public double getKurtosis() {
  -
  -        if( n < 1) return Double.NaN;
  -        if( n <= 3 ) return 0.0;
  -
  -        double x1 = -6 * Math.pow(sum, 4);
  -        double x2 = 12 * ((double) n) * Math.pow(sum, 2) * sumsq;
  -        double x3 = -3 * ((double) (n * (n - 1))) * Math.pow(sumsq,2);
  -        double x4 = -4 * ((double) (n * (n + 1))) * sum * sumCube;
  -        double x5 = Math.pow(((double) n),2) * ((double) (n+1)) * sumQuad;
  -
  -        return (x1 + x2 + x3 + x4 + x5) /
  -               ( (double) (n * (n - 1) * (n - 2) * (n - 3)) );
  -    }
  -
  -    /**
  -     * Called in "addValue" to insert a new value into the statistic.
  -     * @param v The value to be added.
  -     */
  -    private void insertValue(double v) {
  -        // The default value of product is NaN, if you
  -        // try to retrieve the product for a univariate with
  -        // no values, we return NaN.
  -        //
  -        // If this is the first call to insertValue, we want
  -        // to set product to 1.0, so that our first element
  -        // is not "cancelled" out by the NaN.
  -        //
  -        // For the first value added, the mean is that value,
  -        // and the variance is zero.
  -        if( n == 0 ) {
  -            product = 1.0 ;
  -            mean = v ;
  -            pre_variance = 0.0 ;
  -            variance = 0.0 ;
  -        }
  -
  -        if( windowSize != Univariate.INFINITE_WINDOW ) {
  -            if( windowSize == n ) {
  -                double discarded = doubleArray.addElementRolling( v );
  -
  -                // Remove the influence of the discarded
  -                sum -= discarded;
  -                sumsq -= discarded * discarded;
  -                sumCube -= Math.pow(discarded, 3);
  -                sumQuad -= Math.pow(discarded, 4);
  -
  -                if(discarded == min) {
  -                    min = doubleArray.getMin();
  -                } else if(discarded == max){
  -                    max = doubleArray.getMax();
  -                }
  -
  -                if(product != 0.0){
  -                    // can safely remove discarded value
  -                    product *= v / discarded;
  -                } else if(discarded == 0.0){
  -                    // need to recompute product
  -                    product = 1.0;
  -                    double[] elements = doubleArray.getElements();
  -                    for( int i = 0; i < elements.length; i++ ) {
  -                        product *= elements[i];
  -                    }
  -                } // else product = 0 and will still be 0 after discard
  -
  -            } else {
  -                doubleArray.addElement( v );
  -                n += 1 ;
  -                if (v < min) {
  -                    min = v;
  -                }
  -                if (v > max) {
  -                    max = v;
  -                }
  -                product *= v;
  -            }
  -        } else {
  -            // If the windowSize is infinite please don't take the time to
  -            // worry about storing any values.  We don't need to discard the
  -            // influence of any single item.
  -            n += 1 ;
  -            if (v < min) {
  -                min = v;
  -            }
  -            if (v > max) {
  -                max = v;
  -            }
  -            product *= v;
  -
  -            if ( n > 1 )
  -            {
  -                double deviationFromMean = v - mean ;
  -                double deviationFromMean_overN = deviationFromMean / n ;
  -                mean += deviationFromMean_overN ;
  -                pre_variance += (n - 1) * deviationFromMean * 
deviationFromMean_overN ;
  -                variance = pre_variance / (n - 1) ;
  -            }
  -        }
  -
  -        sum += v;
  -        sumsq += v * v;
  -        sumCube += Math.pow(v,3);
  -        sumQuad += Math.pow(v,4);
  -    }
  -
  -    /** Getter for property max.
  -     * @return Value of property max.
  -     */
  -    public double getMax() {
  -        if (n == 0) {
  -            return Double.NaN;
  -        } else {
  -            return max;
  -        }
  -    }
  -
  -    /** Getter for property min.
  -     * @return Value of property min.
  -     */
  -    public double getMin() {
  -        if (n == 0) {
  -            return Double.NaN;
  -        } else {
  -            return min;
  -        }
  -    }
  -
  -    /** Getter for property n.
  -     * @return Value of property n.
  -     */
  -    public int getN() {
  -        return n;
  -    }
  -
  -    /** Getter for property sum.
  -     * @return Value of property sum.
  -     */
  -    public double getSum() {
  -        return sum;
  -    }
  -
  -    /** Getter for property sumsq.
  -     * @return Value of property sumsq.
  -     */
  -    public double getSumsq() {
  -        return sumsq;
  -    }
  -
  -    /** Getter for property sumCube.
  -     * @return Value of property sumCube.
  -     */
  -    public double getSumCube() {
  -        return sumCube;
  -    }
  -
  -    /** Getter for property sumQuad.
  -     * @return Value of property sumQuad.
  -     */
  -    public double getSumQuad() {
  -        return sumQuad;
  -    }
  -
  -    /**
  -     * Generates a text report displaying
  -     * univariate statistics from values that
  -     * have been added.
  -     * @return String with line feeds displaying statistics
  -     */
  -    public String toString() {
  -        StringBuffer outBuffer = new StringBuffer();
  -        outBuffer.append("UnivariateImpl:\n");
  -        outBuffer.append("n: " + n + "\n");
  -        outBuffer.append("min: " + min + "\n");
  -        outBuffer.append("max: " + max + "\n");
  -        outBuffer.append("mean: " + getMean() + "\n");
  -        outBuffer.append("std dev: " + getStandardDeviation() + "\n");
  -        outBuffer.append("skewness: " + getSkewness() + "\n");
  -        outBuffer.append("kurtosis: " + getKurtosis() + "\n");
  -        return outBuffer.toString();
  -    }
  -
  -    /**
  -     * Resets all sums, product, mean, and variance to 0; resets min and max.
  -     */
  -    public void clear() {
  -        this.sum = this.sumsq = this.sumCube = this.sumQuad = 0.0;
  -        this.n = 0;
  -        this.min = Double.MAX_VALUE;
  -        this.max = Double.MIN_VALUE;
  -        this.product = Double.NaN;
  -        this.mean = Double.NaN ;
  -        this.variance = this.pre_variance = Double.NaN ;
  -    }
  -
  -    /* (non-Javadoc)
  -     * @see org.apache.commons.math.Univariate#getWindowSize()
  -     */
  -    public int getWindowSize() {
  -        return windowSize;
  -    }
  -
  -    /* (non-Javadoc)
  -     * @see org.apache.commons.math.Univariate#setWindowSize(int)
  -     */
  -    public void setWindowSize(int windowSize) {
  -        String msg = "A fixed window size must be set via the " +
  -            "UnivariateImpl constructor";
  -        throw new RuntimeException( msg );
  -    }
  -}
  +     /** Just in case the windowSize is not infinite, we need to
  +      *  keep an array to remember values 0 to N
  +      */
  +     private DoubleArray doubleArray;
  +
  +     /** count of values that have been added */
  +     private int n = 0;
  +
  +     /** sum of values that have been added */
  +     private double sum = Double.NaN;
  +
  +     /** sum of the square of each value that has been added */
  +     private double sumsq = Double.NaN;
  +
  +     /** min of values that have been added */
  +     private double min = Double.NaN;
  +
  +     /** max of values that have been added */
  +     private double max = Double.NaN;
  +
  +     /** sumLog of values that have been added */
  +     private double sumLog = Double.NaN;
  +
  +     /** mean of values that have been added */
  +     private double mean = Double.NaN;
  +
  +     /** second moment of values that have been added */
  +     private double s2 = Double.NaN;
  +
  +     /** third moment of values that have been added */
  +     private double s3 = Double.NaN;
  +
  +     /** fourth moment of values that have been added */
  +     private double s4 = Double.NaN;
  +
  +     /** variance of values that have been added */
  +     private double variance = Double.NaN;
  +
  +     /** skewness of values that have been added */
  +     private double skewness = Double.NaN;
  +
  +     /** kurtosis of values that have been added */
  +     private double kurtosis = Double.NaN;
  +
  +     /** Creates new univariate with an infinite window */
  +     public UnivariateImpl() {
  +     }
  +
  +     /** Creates a new univariate with a fixed window **/
  +     public UnivariateImpl(int window) {
  +             setWindowSize(window);
  +     }
  +
  +     /* (non-Javadoc)
  +      * @see org.apache.commons.math.stat.Univariate#getN()
  +      */
  +     public int getN() {
  +             return n;
  +     }
  +
  +     /* (non-Javadoc)
  +      * @see org.apache.commons.math.stat.Univariate#getSum()
  +      */
  +     public double getSum() {
  +             if (windowSize != Univariate.INFINITE_WINDOW) {
  +                     return StatUtils.sum(doubleArray.getElements());
  +             }
  +
  +             return sum;
  +     }
  +
  +     /* (non-Javadoc)
  +      * @see org.apache.commons.math.stat.Univariate#getSumsq()
  +      */
  +     public double getSumsq() {
  +             if (windowSize != Univariate.INFINITE_WINDOW) {
  +                     return StatUtils.sumSq(doubleArray.getElements());
  +             }
  +
  +             return sumsq;
  +     }
  +
  +     /* (non-Javadoc)
  +      * @see org.apache.commons.math.stat.Univariate#getMean()
  +      */
  +     public double getMean() {
  +             if (windowSize != Univariate.INFINITE_WINDOW) {
  +                     return StatUtils.mean(doubleArray.getElements());
  +             }
  +
  +             return mean;
  +     }
  +
  +     /**
  +      * Returns the standard deviation for this collection of values
  +      * @see org.apache.commons.math.stat.Univariate#getStandardDeviation()
  +      */
  +     public double getStandardDeviation() {
  +             double stdDev = Double.NaN;
  +             if (getN() != 0) {
  +                     stdDev = Math.sqrt(getVariance());
  +             }
  +             return (stdDev);
  +     }
  +
  +     /**
  +      * Returns the variance of the values that have been added via West's
  +      * algorithm as described by
  +      * <a href="http://doi.acm.org/10.1145/359146.359152";>Chan, T. F. and
  +      * J. G. Lewis 1979, <i>Communications of the ACM</i>,
  +      * vol. 22 no. 9, pp. 526-531.</a>.
  +      *
  +      * @return The variance of a set of values.  Double.NaN is returned for
  +      *         an empty set of values and 0.0 is returned for a &lt;= 1 value set.
  +      */
  +     public double getVariance() {
  +             if (windowSize != Univariate.INFINITE_WINDOW) {
  +                     variance = StatUtils.variance(doubleArray.getElements());
  +             }
  +             return variance;
  +     }
  +
  +     /**
  +      * Returns the skewness of the values that have been added as described by
  +      * <a href="http://mathworld.wolfram.com/k-Statistic.html";>Equation (6) for 
k-Statistics</a>.
  +      *
  +      * @return The skew of a set of values.  Double.NaN is returned for
  +      *         an empty set of values and 0.0 is returned for a &lt;= 2 value set.
  +      */
  +     public double getSkewness() {
  +             if (windowSize != Univariate.INFINITE_WINDOW) {
  +                     return StatUtils.skewness(doubleArray.getElements());
  +             }
  +             return skewness;
  +     }
  +
  +     /**
  +      * Returns the kurtosis of the values that have been added as described by
  +      * <a href="http://mathworld.wolfram.com/k-Statistic.html";>Equation (7) for 
k-Statistics</a>.
  +      *
  +      * @return The kurtosis of a set of values.  Double.NaN is returned for
  +      *         an empty set of values and 0.0 is returned for a &lt;= 3 value set.
  +      */
  +     public double getKurtosis() {
  +             if (windowSize != Univariate.INFINITE_WINDOW) {
  +                     return StatUtils.kurtosis(doubleArray.getElements());
  +             }
  +             return kurtosis;
  +     }
  +
  +     /* (non-Javadoc)
  +      * @see org.apache.commons.math.stat.Univariate#getMax()
  +      */
  +     public double getMax() {
  +             if (windowSize != Univariate.INFINITE_WINDOW) {
  +                     return StatUtils.max(doubleArray.getElements());
  +             }
  +             return max;
  +     }
  +
  +     /* (non-Javadoc)
  +      * @see org.apache.commons.math.stat.Univariate#getMin()
  +      */
  +     public double getMin() {
  +             if (windowSize != Univariate.INFINITE_WINDOW) {
  +                     return StatUtils.min(doubleArray.getElements());
  +             }
  +             return min;
  +     }
  +
  +     /* (non-Javadoc)
  +      * @see org.apache.commons.math.stat.Univariate#getProduct()
  +      */
  +     public double getProduct() {
  +             if (windowSize != Univariate.INFINITE_WINDOW) {
  +                     return StatUtils.product(doubleArray.getElements());
  +             }
  +
  +             return sumLog;
  +     }
  +
  +     /* (non-Javadoc)
  +     * @see org.apache.commons.math.stat.Univariate#getGeometricMean()
  +     */
  +     public double getGeometricMean() {
  +
  +             if (windowSize != Univariate.INFINITE_WINDOW) {
  +                     return StatUtils.geometricMean(doubleArray.getElements());
  +             }
  +
  +             if (n == 0) {
  +                     return Double.NaN;
  +             } else {
  +                     return Math.exp(sumLog / (double) n);
  +             }
  +     }
  +
  +     /* If windowSize is set to Infinite, moments are calculated using the 
following 
  +      * <a href="http://www.spss.com/tech/stat/Algorithms/11.5/descriptives.pdf";>
  +     * recursive strategy
  +     * </a>.
  +     * Otherwise, stat methods delegate to StatUtils.
  +      * @see org.apache.commons.math.stat.Univariate#addValue(double)
  +      */
  +     public void addValue(double value) {
  +
  +             if (windowSize != Univariate.INFINITE_WINDOW) {
  +                     /* then all getters deligate to StatUtils
  +                      * and this clause simply adds/rolls a value in the storage 
array 
  +                      */
  +                     if (windowSize == n) {
  +                             doubleArray.addElementRolling(value);
  +                     } else {
  +                             n++;
  +                             doubleArray.addElement(value);
  +                     }
  +
  +             } else {
  +                     /* If the windowSize is infinite don't store any values and 
there 
  +                      * is no need to discard the influence of any single item.
  +                      */
  +                     n++;
  +
  +                     if (n <= 1) {
  +                             /* if n <= 1, initialize the sumLog, min, max, mean, 
variance and pre-variance */
  +                             sumLog = 0.0;
  +                             sum = min = max = mean = value;
  +                             sumsq = Math.pow(value, 2);
  +                             variance = s2 = 0.0;
  +                             skewness = kurtosis = 0.0;
  +
  +                     } else {
  +                             /* otherwise calc these values */
  +                             sumLog += Math.log(value);
  +                             sum += value;
  +                             sumsq += Math.pow(value, 2);
  +                             min = Math.min(min, value);
  +                             max = Math.max(max, value);
  +
  +                             double dev = value - mean;
  +                             double v = dev / ((double) n);
  +                             double v2 = Math.pow(v, 2);
  +                             double n1 = ((double) n - 1);
  +
  +                             s4 += v
  +                                     * (
  +                                             - 4.0 * s3
  +                                             + v * (6.0 * s2 + n1 * (1 + 
Math.pow((double) n, 3)) * v2));
  +
  +                             s3 += v * (-3.0 * s2 + (double) n * n1 * (n - 2) * 
Math.pow(v, 2));
  +                             s2 += n1 * dev * v;
  +
  +                             mean += v;
  +                             variance = 
  +                    (n <= 1) ? 0.0 : s2 / n1;
  +                             skewness =
  +                                     (n <= 2) ? 0.0 : s3 / ((double) n * 
Math.sqrt(variance) * variance);
  +                             kurtosis =
  +                                     (n <= 3) ? 0.0 : s4 / ((double) n * 
Math.pow(variance, 2)) - 3;
  +                     }
  +             }
  +     }
  +
  +     /**
  +      * Generates a text report displaying
  +      * univariate statistics from values that
  +      * have been added.
  +      * @return String with line feeds displaying statistics
  +      */
  +     public String toString() {
  +             StringBuffer outBuffer = new StringBuffer();
  +             outBuffer.append("UnivariateImpl:\n");
  +             outBuffer.append("n: " + n + "\n");
  +             outBuffer.append("min: " + min + "\n");
  +             outBuffer.append("max: " + max + "\n");
  +             outBuffer.append("mean: " + getMean() + "\n");
  +             outBuffer.append("std dev: " + getStandardDeviation() + "\n");
  +             outBuffer.append("skewness: " + getSkewness() + "\n");
  +             outBuffer.append("kurtosis: " + getKurtosis() + "\n");
  +             return outBuffer.toString();
  +     }
  +
  +     /* (non-Javadoc)
  +      * @see org.apache.commons.math.Univariate#clear()
  +      */
  +     public void clear() {
  +             this.n = 0;
  +             this.min = this.max = Double.NaN;
  +             this.sumLog = this.mean = Double.NaN;
  +             this.variance = this.skewness = this.kurtosis = Double.NaN;
  +        this.s2 = this.s3 = this.s4 = Double.NaN;
  +             if (doubleArray != null)
  +                     doubleArray = new FixedDoubleArray(windowSize);
  +     }
  +
  +     /* (non-Javadoc)
  +      * @see org.apache.commons.math.Univariate#getWindowSize()
  +      */
  +     public int getWindowSize() {
  +             return windowSize;
  +     }
  +
  +     /* (non-Javadoc)
  +      * @see org.apache.commons.math.Univariate#setWindowSize(int)
  +      */
  +     public void setWindowSize(int windowSize) {
  +             clear();
  +             this.windowSize = windowSize;
  +             doubleArray = new FixedDoubleArray(windowSize);
  +     }
  +
  +}
  \ No newline at end of file
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to