Author: spadkins
Date: Wed Dec 20 07:44:54 2006
New Revision: 8411
Modified:
p5ee/trunk/App-Repository/lib/App/Repository.pm
Log:
fix max(), mode(), stddev() in extended summary
Modified: p5ee/trunk/App-Repository/lib/App/Repository.pm
==============================================================================
--- p5ee/trunk/App-Repository/lib/App/Repository.pm (original)
+++ p5ee/trunk/App-Repository/lib/App/Repository.pm Wed Dec 20 07:44:54 2006
@@ -2921,7 +2921,7 @@
if (!defined $ext_column_summary->{min} ||
$ext_column_summary->{min} > $value) {
$ext_column_summary->{min} = $value;
}
- if (!defined $ext_column_summary->{max} ||
$ext_column_summary->{max} > $value) {
+ if (!defined $ext_column_summary->{max} ||
$ext_column_summary->{max} < $value) {
$ext_column_summary->{max} = $value;
}
}
@@ -2929,18 +2929,19 @@
}
}
if ($ext_summaries) {
- my ($count, $sum, $sum_sq, $num, $median_count, $median);
+ my ($count, $sum, $sum_sq, $num, $median_count, $median,
$mode_count, $mode);
foreach $i (@$sum_column_idx) {
$column = $columns->[$i];
$ext_column_summary = $ext_summaries->{$column};
if ($ext_column_summary && $ext_column_summary->{count}) {
$ext_column_summary->{average} =
$ext_column_summary->{sum}/$ext_column_summary->{count};
- $ext_column_summary->{mode} =
($ext_column_summary->{max} + $ext_column_summary->{min})/2;
$count =
$ext_column_summary->{count};
+ $mode = undef;
+ $mode_count = 0;
if ($count > 1) {
$sum =
$ext_column_summary->{sum};
$sum_sq =
$ext_column_summary->{sum_sq};
- $value = ($count * $sum_sq + $sum
* $sum)/($count * ($count - 1));
+ $value = ($count * $sum_sq - $sum
* $sum)/($count * ($count - 1));
if ($value > 0) {
$ext_column_summary->{stddev} = sqrt($value);
}
@@ -2950,6 +2951,10 @@
$median_count = ($count - 1)/2 + 1;
foreach $value (sort { $a <=> $b } keys
%{$ext_column_summary->{distinct}}) {
$num += $ext_column_summary->{distinct}{$value};
+ if ($count > $mode_count || ($count == $mode_count
&& $num <= $median_count)) {
+ $mode = $value;
+ $mode_count = $count;
+ }
if ($num >= $median_count) {
$ext_column_summary->{median} = $value;
last;
@@ -2962,6 +2967,10 @@
$median = undef;
foreach $value (sort { $a <=> $b } keys
%{$ext_column_summary->{distinct}}) {
$num += $ext_column_summary->{distinct}{$value};
+ if ($count > $mode_count || ($count == $mode_count
&& $num <= $median_count)) {
+ $mode = $value;
+ $mode_count = $count;
+ }
if (!defined $median) {
if ($num >= $median_count + 1) {
$ext_column_summary->{median} = $value;
@@ -2979,6 +2988,7 @@
}
}
}
+ $ext_column_summary->{mode} = $mode;
}
}
}