Author: spadkins
Date: Tue Dec 12 11:33:13 2006
New Revision: 8387
Modified:
p5ee/trunk/App-Repository/lib/App/Repository.pm
Log:
enhance summarize_rows() to do extended summaries
Modified: p5ee/trunk/App-Repository/lib/App/Repository.pm
==============================================================================
--- p5ee/trunk/App-Repository/lib/App/Repository.pm (original)
+++ p5ee/trunk/App-Repository/lib/App/Repository.pm Tue Dec 12 11:33:13 2006
@@ -2762,17 +2762,33 @@
$summarized_rows = $rep->summarize_rows([EMAIL PROTECTED], [EMAIL
PROTECTED], [EMAIL PROTECTED], \%formulas);
@rows = (
- { id=>5, "Jim", "Green", 13.5, 320, },
- { id=>3, "Bob", "Green", 4.2, 230, },
- { id=>9, "Ken", "Green", 27.4, 170, },
- { id=>2, "Kim", "Blue", 11.7, 440, },
- { id=>7, "Jan", "Blue", 55.1, 90, },
- { id=>1, "Ben", "Blue", 22.6, 195, },
+ { id=>5, name=>"Jim", team=>"Green", rating=>13.5, score=>320, },
+ { id=>3, name=>"Bob", team=>"Green", rating=> 4.2, score=>230, },
+ { id=>9, name=>"Ken", team=>"Green", rating=>27.4, score=>170, },
+ { id=>2, name=>"Kim", team=>"Blue", rating=>11.7, score=>440, },
+ { id=>7, name=>"Jan", team=>"Blue", rating=>55.1, score=> 90, },
+ { id=>1, name=>"Ben", team=>"Blue", rating=>22.6, score=>195, },
);
@columns = ( "rating", "score" ); # summarize a subset of the columns
@summary_keys = ( "team" );
- %options = ();
+ %options = (
+ ext_summaries => \%summaries, # extended summaries
+ ext_summary_columns => [ "rating", "score", "team", ], # optional
+ ext_summary_functions => { # optional
+ sum => 1,
+ count => 1,
+ sum_sq => 1,
+ distinct => 1,
+ min => 1,
+ max => 1,
+ average => 1, # requires sum, count
+ median => 1, # requires distinct
+ mode => 1, # requires min, max
+ stddev => 1, # requires sum, sum_sq, count
+ },
+ );
+ # returns the "natural" summaries
$summarized_rows = $rep->summarize_rows([EMAIL PROTECTED], [EMAIL
PROTECTED], [EMAIL PROTECTED], \%options);
=cut
@@ -2785,6 +2801,24 @@
my $column_def = $self->{table}{$table}{column};
+ my ($ext_summaries, $ext_column_summary, $ext_summary_columns,
$ext_summary_functions);
+ $ext_summaries = $options->{ext_summaries};
+ if ($ext_summaries) {
+ $ext_summary_columns = $options->{ext_summary_columns};
+ $ext_summary_functions = $options->{ext_summary_functions} || { # do
all of them
+ count => 1,
+ distinct => 1,
+ sum => 1,
+ sum_sq => 1,
+ min => 1,
+ max => 1,
+ average => 1, # requires sum, count
+ median => 1, # requires distinct
+ mode => 1, # requires min, max
+ stddev => 1, # requires sum, sum_sq, count
+ };
+ }
+
my (@summary_rows, $summary_row, %summary_row);
my ($key, $row, $hash_rows, $hash_row, $i, $rowidx, $colidx, $column,
$value);
my $row_type = "ARRAY";
@@ -2866,7 +2900,86 @@
push(@summary_key_values, $key);
}
foreach $i (@$sum_column_idx) {
- $summary_row->[$i] += $row->[$i];
+ if (defined $row->[$i]) {
+ $summary_row->[$i] += $row->[$i];
+ }
+ }
+ if ($ext_summaries) {
+ foreach $i (@$sum_column_idx) {
+ $column = $columns->[$i];
+ $value = $row->[$i];
+ if (defined $value) {
+ $ext_column_summary = $ext_summaries->{$column};
+ if (!$ext_column_summary) {
+ $ext_column_summary = {};
+ $ext_summaries->{$column} = $ext_column_summary;
+ }
+ $ext_column_summary->{count} ++;
+ $ext_column_summary->{distinct}{$value} ++;
+ $ext_column_summary->{sum} += $value;
+ $ext_column_summary->{sum_sq} += $value*$value;
+ if (!defined $ext_column_summary->{min} ||
$ext_column_summary->{min} > $value) {
+ $ext_column_summary->{min} = $value;
+ }
+ if (!defined $ext_column_summary->{max} ||
$ext_column_summary->{max} > $value) {
+ $ext_column_summary->{max} = $value;
+ }
+ }
+ }
+ }
+ }
+ if ($ext_summaries) {
+ my ($count, $sum, $sum_sq, $num, $median_count, $median);
+ foreach $i (@$sum_column_idx) {
+ $column = $columns->[$i];
+ $ext_column_summary = $ext_summaries->{$column};
+ if ($ext_column_summary && $ext_column_summary->{count}) {
+ $ext_column_summary->{average} =
$ext_column_summary->{sum}/$ext_column_summary->{count};
+ $ext_column_summary->{mode} =
($ext_column_summary->{max} + $ext_column_summary->{min})/2;
+ $count =
$ext_column_summary->{count};
+ if ($count > 1) {
+ $sum =
$ext_column_summary->{sum};
+ $sum_sq =
$ext_column_summary->{sum_sq};
+ $value = ($count * $sum_sq + $sum
* $sum)/($count * ($count - 1));
+ if ($value > 0) {
+ $ext_column_summary->{stddev} = sqrt($value);
+ }
+ }
+ if ($count % 2 == 1) {
+ $num = 0;
+ $median_count = ($count - 1)/2 + 1;
+ foreach $value (sort { $a <=> $b } keys
%{$ext_column_summary->{distinct}}) {
+ $num += $ext_column_summary->{distinct}{$value};
+ if ($num >= $median_count) {
+ $ext_column_summary->{median} = $value;
+ last;
+ }
+ }
+ }
+ else {
+ $num = 0;
+ $median_count = $count/2;
+ $median = undef;
+ foreach $value (sort { $a <=> $b } keys
%{$ext_column_summary->{distinct}}) {
+ $num += $ext_column_summary->{distinct}{$value};
+ if (!defined $median) {
+ if ($num >= $median_count + 1) {
+ $ext_column_summary->{median} = $value;
+ last;
+ }
+ elsif ($num == $median_count) {
+ $median = $value;
+ }
+ }
+ else {
+ if ($num >= $median_count + 1) {
+ $ext_column_summary->{median} = ($median +
$value)/2;
+ last;
+ }
+ }
+ }
+ }
+ }
}
}