>From b5f88cf3488fba43d079580050dc055322cc8096 Mon Sep 17 00:00:00 2001 From: tpapp <tkp...@gmail.com> Date: Sat, 21 Jan 2012 16:04:10 +0100 Subject: [PATCH] Made variance and standard-deviation generic, single pass, removed biased version, added sse.
Rationale for the changes: 1. Similarly to the mean and the median (see my previous patch), objects othen than sequences have means and medians, including other collections (eg arrays) and objects which are not collections of elements (eg random variables). 2. A single pass algorithm (and the provided macro, not yet exported) should be useful for implementing these methods for objects which are costly to travel. According to my benchmarks, the performance is about 10% better than the previous version. 3. Removed biased variance and standard-deviation. Perhaps this is the most controversial of the proposed changes, but variance and especially standard deviation normalized by n instead of n-1 is usually not used per se. If needed, these can be obtained from SSE. 4. SSE is useful in itself, eg in Bayesian statistics. --- numbers.lisp | 62 +++++++++++++++++++++++++++++++++++++++++---------------- package.lisp | 3 +- tests.lisp | 16 +++++++++++++- 3 files changed, 60 insertions(+), 21 deletions(-) diff --git a/numbers.lisp b/numbers.lisp index 0440491..4565fdf 100644 --- a/numbers.lisp +++ b/numbers.lisp @@ -137,25 +137,51 @@ define new methods.") ;; For implementations supporting custom sequence types. (median-in-place (copy-sequence 'vector object)))) -(declaim (inline variance)) -(defun variance (sample &key (biased t)) - "Variance of SAMPLE. Returns the biased variance if BIASED is true (the default), -and the unbiased estimator of variance if BIASED is false. SAMPLE must be a -sequence of numbers." - (let ((mean (mean sample))) - (/ (reduce (lambda (a b) - (+ a (expt (- b mean) 2))) - sample - :initial-value 0) - (- (length sample) (if biased 0 1))))) +(defmacro with-sse-one-pass ((add sse &key (mean (make-gensym '#:mean)) + (count (make-gensym '#:count))) + &body body) + "Calculate the sum of squared errors (from the mean) for elements added with +ADD in BODY. The result is available within body in the variable SSE. MEAN +and COUNT contain the mean and the number of elements. Uses a +one-pass (on-line) algorithm, with reasonably good numerical properties." + `(let ((,count 0) + (,mean 0) + (,sse 0)) + (flet ((,add (value) + (let ((difference (- value ,mean))) + (incf ,count) + (incf ,mean (/ difference ,count)) + (incf ,sse (* (- value ,mean) difference))))) + ,@body))) -(declaim (inline standard-deviation)) -(defun standard-deviation (sample &key (biased t)) - "Standard deviation of SAMPLE. Returns the biased standard deviation if -BIASED is true (the default), and the square root of the unbiased estimator -for variance if BIASED is false (which is not the same as the unbiased -estimator for standard deviation). SAMPLE must be a sequence of numbers." - (sqrt (variance sample :biased biased))) +(defgeneric sse (sample) + (:documentation + "Sum of squared errors (from the mean) for SAMPLE. The mean is returned as +a the second value, and the count of elements as the third value.") + (:method ((object sequence)) + (with-sse-one-pass (add sse :mean mean :count count) + (map nil #'add object) + (values sse mean count))) + (:method ((object array)) + (with-sse-one-pass (add sse :mean mean :count count) + (declare (type fixnum count)) + (dotimes (index (array-total-size object)) + (add (row-major-aref object index))) + (values sse mean count)))) + +(defgeneric variance (object) + (:documentation "Variance of OBJECT. For samples, return the unbiased +estimator for the variance (normalizing by 1-n).") + (:method (object) + (multiple-value-bind (sse mean count) (sse object) + (declare (ignore mean)) + (/ sse (1- count))))) + +(defgeneric standard-deviation (object) + (:documentation "Standard deviation of OBJECT. For samples, return the +sample standard deviation, normalized by 1-n.") + (:method (object) + (sqrt (variance object)))) (define-modify-macro maxf (&rest numbers) max "Modify-macro for MAX. Sets place designated by the first argument to the diff --git a/package.lisp b/package.lisp index 935c698..f7dd255 100644 --- a/package.lisp +++ b/package.lisp @@ -239,4 +239,5 @@ #:destructuring-case #:destructuring-ccase #:destructuring-ecase - #:flatten-array)) + #:flatten-array + #:sse)) diff --git a/tests.lisp b/tests.lisp index 7338b83..bcf2bad 100644 --- a/tests.lisp +++ b/tests.lisp @@ -1000,12 +1000,24 @@ (98 2 97 96))) 193/2) +(deftest sse.1 + (multiple-value-bind (sse mean count) (sse #(0 2 4)) + (and (= sse 8) + (= mean 2) + (= count 3))) + t) + (deftest variance.1 (variance (list 1 2 3)) - 2/3) + 1) + +(deftest variance.2 + (variance #2A((-1 3) + (1 1))) + 8/3) (deftest standard-deviation.1 - (< 0 (standard-deviation (list 1 2 3)) 1) + (equal (standard-deviation (list 1 2 3)) 1.0) t) (deftest maxf.1 -- 1.7.8.3
_______________________________________________ alexandria-devel mailing list alexandria-devel@common-lisp.net http://lists.common-lisp.net/cgi-bin/mailman/listinfo/alexandria-devel