The Avg Load percentages on the ganglia web frontend currently show the
latest measured values for the grid/cluster.  When looking at historical
data, these numbers can be misleading when compared to the graphs right
next to them.  I created a patch which changes this behavior by using
rrdtool to calculate the average loads over the displayed time range
instead of the latest value, see attachment.  Any comments, suggestions?

~Jason


-- 
/------------------------------------------------------------------\
|  Jason A. Smith                          Email:  [EMAIL PROTECTED] |
|  Atlas Computing Facility, Bldg. 510M    Phone:  (631)344-4226   |
|  Brookhaven National Lab, P.O. Box 5000  Fax:    (631)344-7616   |
|  Upton, NY 11973-5000                                            |
\------------------------------------------------------------------/

diff -uNr ganglia-3.0.2-dist/web/cluster_view.php ganglia-3.0.2/web/cluster_view.php
--- ganglia-3.0.2-dist/web/cluster_view.php	2005-02-02 21:22:48.028343984 -0500
+++ ganglia-3.0.2/web/cluster_view.php	2005-12-14 14:59:06.814632671 -0500
@@ -21,10 +21,12 @@
 $tpl->assign("cpu_num", $cpu_num);
 $tpl->assign("localtime", date("Y-m-d H:i", $cluster['LOCALTIME']));
 
-if (!$cpu_num) $cpu_num = 1;
-$cluster_load15 = sprintf("%.0f", ((double) $load_fifteen_sum / $cpu_num) * 100);
-$cluster_load5 = sprintf("%.0f", ((double) $load_five_sum / $cpu_num) * 100);
-$cluster_load1 = sprintf("%.0f", ((double) $load_one_sum / $cpu_num) * 100);
+$avg_cpu_num = find_avg($clustername, "cpu_num");
+if (!$avg_cpu_num) $avg_cpu_num = 1;
+$cluster_load15 = sprintf("%.0f", ((double) find_avg($clustername, "load_fifteen") / $avg_cpu_num ) * 100);
+$cluster_load5 = sprintf("%.0f", ((double) find_avg($clustername, "load_five") / $avg_cpu_num ) * 100);
+$cluster_load1 = sprintf("%.0f", ((double) find_avg($clustername, "load_one") / $avg_cpu_num ) * 100);
+
 $tpl->assign("cluster_load", "$cluster_load15%, $cluster_load5%, $cluster_load1%");
 
 $cluster_url=rawurlencode($clustername);
diff -uNr ganglia-3.0.2-dist/web/functions.php ganglia-3.0.2/web/functions.php
--- ganglia-3.0.2-dist/web/functions.php	2005-02-02 21:22:48.960352626 -0500
+++ ganglia-3.0.2/web/functions.php	2005-12-14 14:55:01.951738807 -0500
@@ -237,6 +237,25 @@
 
 #-------------------------------------------------------------------------------
 #
+# Finds the avg of the given cluster & metric from the summary rrds.
+#
+function find_avg($clustername, $metricname)
+{
+   global $rrds, $start, $end;
+   $avg = 0;
+
+   $sum_dir = "$rrds/$clustername/__SummaryInfo__";
+   $command = RRDTOOL . " graph '' --start $start --end $end ".
+     "DEF:avg='$sum_dir/$metricname.rrd':'sum':AVERAGE ".
+     "PRINT:avg:AVERAGE:%.2lf ";
+   exec($command, $out);
+   $avg = $out[1];
+   #echo "$sum_dir: avg($metricname)=$avg<br>\n";
+   return $avg;
+}
+
+#-------------------------------------------------------------------------------
+#
 # Generates the colored Node cell HTML. Used in Physical
 # view and others. Intended to be used to build a table, output
 # begins with "<tr><td>" and ends the same.
diff -uNr ganglia-3.0.2-dist/web/meta_view.php ganglia-3.0.2/web/meta_view.php
--- ganglia-3.0.2-dist/web/meta_view.php	2005-02-02 21:22:48.526407786 -0500
+++ ganglia-3.0.2/web/meta_view.php	2005-12-14 15:00:37.016184164 -0500
@@ -89,10 +89,12 @@
             $class = "cluster";
          }
 
-      $cpu_num = $m["cpu_num"]['SUM'] ? $m["cpu_num"]['SUM'] : 1;
-      $cluster_load15 = sprintf("%.0f", ((double) $m["load_fifteen"]['SUM'] / $cpu_num) * 100);
-      $cluster_load5 = sprintf("%.0f", ((double) $m["load_five"]['SUM'] / $cpu_num) * 100);
-      $cluster_load1 = sprintf("%.0f", ((double) $m["load_one"]['SUM'] / $cpu_num) * 100);
+      $clusname = $grid[$source][GRID] ? '' : $source;
+      $avg_cpu_num = find_avg($clusname, "cpu_num");
+      if (!$avg_cpu_num) $avg_cpu_num = 1;
+      $cluster_load15 = sprintf("%.0f", ((double) find_avg($clusname, "load_fifteen") / $avg_cpu_num ) * 100);
+      $cluster_load5 = sprintf("%.0f", ((double) find_avg($clusname, "load_five") / $avg_cpu_num ) * 100);
+      $cluster_load1 = sprintf("%.0f", ((double) find_avg($clusname, "load_one") / $avg_cpu_num ) * 100);
       $cluster_load = "$cluster_load15%, $cluster_load5%, $cluster_load1%";
 
       $tpl->newBlock ("source_info");

Reply via email to