Hi all,

The slope attribute, given to gmetric, can be either zero, +ve, -ve or 
both.  "Zero" slope is for constant values, "both" slope is for gauge values 
(i.e. data should be plotted) and "positive" is for counters.

[Aside: do we have any actual use-cases for -ve slope?  I couldn't think of 
any]

If one follows the code as a metric travels from gmetric to gmond to gmetad, 
the support for counter metrics seems to peter out.  I've attached a patch 
that completes this support, using RRDTool's internal support for counters: 
for new metrics, RRDTool is told to create the rrd files with a COUNTER 
data-source type (instead of the usual GAUGE).  RRDTool will then plot the 
rate of change (per second) for the counter; no changes are needed for the 
web front-end.  The COUNTER data-source type also understands 32- and 64-bit 
overruns and will not be affected by them.

The attached patch is against trunk.  Comments are welcome!

Cheers,

Paul.

Technical aside:
        RRDTool supports two similar data-source types for counters: COUNTER 
and 
DERIVE.  I've gone for COUNTER, but there pros and cons for both.

COUNTER is perhaps the more correct.  It knows about 32-bit and 64-bit 
overflows and will deal with these correctly.  However, it assumes that 
overflows are the only reason a counter value is less than the previous 
value: if a counter is reset (instead of wrapping), one gets an incorrect 
large spike in the data.  I believe this can be countered by setting a 
suitable maximum.

DERIVE is obtained by drawing a line between previous and current measurements 
and plotting this line's slope (i.e. taking the 1st derivative).  It doesn't 
know about counters, so an overflow would appear as a very large negative 
value on the graph.  However, one can limit the data so it's never negative 
(by setting min=0).  This has the benefit of not producing any outrageous 
spikes in the data, but the disadvantages of no handling of overflows: in 
both cases there would be a gap in the graph.  Also, if the rate of change 
approaches the 32-bit value, there would be more counter overruns and these 
gaps would increase.

This is discussed more within the rrdcreate(1) man page.
Index: gmetad/process_xml.c
===================================================================
--- gmetad/process_xml.c	(revision 870)
+++ gmetad/process_xml.c	(working copy)
@@ -5,12 +5,9 @@
 #include <sys/time.h>
 #include "expat.h"
 #include "gmetad.h"
+#include "lib/ganglia.h"
+#include "rrd_helpers.h"
 
-
-
-extern int write_data_to_rrd( const char *source, const char *host, 
-                const char *metric, const char *sum, const char *num, 
-                unsigned int step, unsigned int time_polled);
 extern int zero_out_summary(datum_t *key, datum_t *val, void *arg);
 extern char* getfield(char *buf, short int index);
 
@@ -568,6 +565,7 @@
 startElement_METRIC(void *data, const char *el, const char **attr)
 {
    xmldata_t *xmldata = (xmldata_t *)data;
+   ganglia_slope_t slope = GANGLIA_SLOPE_UNSPECIFIED;
    struct xml_tag *xt;
    struct type_tag *tt;
    datum_t *hash_datum = NULL;
@@ -602,6 +600,8 @@
                case TYPE_TAG:
                   type = attr[i+1];
                   break;
+  	       case SLOPE_TAG:
+		  slope = cstr_to_slope(attr[i+1]);
                default:
                   break;
             }
@@ -629,7 +629,8 @@
                                   xmldata->hostname, name);
                   xmldata->rval = write_data_to_rrd(xmldata->sourcename,
                         xmldata->hostname, name, metricval, NULL,
-                        xmldata->ds->step, xmldata->source.localtime);
+		        xmldata->ds->step, xmldata->source.localtime,
+		        slope);
             }
          metric->id = METRIC_NODE;
          metric->report_start = metric_report_start;
@@ -935,7 +936,9 @@
 	       xmldata->sourcename, name);
 	
 	   xmldata->rval = write_data_to_rrd(xmldata->sourcename, NULL, name,
-	           sum, num, xmldata->ds->step, xmldata->source.localtime);
+					     sum, num, xmldata->ds->step,
+					     xmldata->source.localtime,
+					     cstr_to_slope(getfield(metric->strings, metric->slope)));
    }
 
    return xmldata->rval;
Index: gmetad/gmetad.c
===================================================================
--- gmetad/gmetad.c	(revision 870)
+++ gmetad/gmetad.c	(working copy)
@@ -12,6 +12,7 @@
 #include <cmdline.h>
 #include "daemon_init.h"
 
+#include "rrd_helpers.h"
 
 /* Holds our data sources. */
 hash_t *sources;
@@ -30,8 +31,6 @@
 extern int parse_config_file ( char *config_file );
 extern int number_of_datasources ( char *config_file );
 extern struct type_tag* in_type_list (char *, unsigned int);
-extern int write_data_to_rrd( const char *source, const char *host, const char *metric,
-   const char *sum, const char *num, unsigned int step, unsigned int time_polled);
 
 struct gengetopt_args_info args_info;
 
@@ -244,7 +243,7 @@
    /* err_msg("Writing Overall Summary for metric %s (%s)", name, sum); */
 
    /* Save the data to a round robin database */
-   rc = write_data_to_rrd( NULL, NULL, name, sum, num, 15, 0);
+   rc = write_data_to_rrd( NULL, NULL, name, sum, num, 15, 0, metric->slope);
    if (rc)
       {
          err_msg("Unable to write meta data for metric %s to RRD", name);
Index: gmetad/rrd_helpers.c
===================================================================
--- gmetad/rrd_helpers.c	(revision 870)
+++ gmetad/rrd_helpers.c	(working copy)
@@ -11,6 +11,10 @@
 #include <pthread.h>
 #include <time.h>
 
+#include "lib/ganglia.h"
+
+#include "rrd_helpers.h"
+
 #define PATHSIZE 4096
 extern gmetad_config_t gmetad_config;
 
@@ -64,8 +68,10 @@
 
 /* Warning: RRD_create will overwrite a RRdb if it already exists */
 static int
-RRD_create( char *rrd, int summary, unsigned int step, unsigned int process_time)
+RRD_create( char *rrd, int summary, unsigned int step, unsigned int process_time,
+	    ganglia_slope_t slope)
 {
+   const char *data_source_type = "GAUGE";
    char *argv[128];
    int  argc=0;
    int heartbeat;
@@ -77,6 +83,19 @@
    /* Our heartbeat is twice the step interval. */
    heartbeat = 8*step;
 
+   switch( slope) {
+   case GANGLIA_SLOPE_POSITIVE:
+     data_source_type = "COUNTER";
+     break;
+
+   case GANGLIA_SLOPE_ZERO:
+   case GANGLIA_SLOPE_NEGATIVE:
+   case GANGLIA_SLOPE_BOTH:
+   case GANGLIA_SLOPE_UNSPECIFIED:
+     data_source_type = "GAUGE";
+     break;
+   }
+
    argv[argc++] = "dummy";
    argv[argc++] = rrd;
    argv[argc++] = "--step";
@@ -85,10 +104,14 @@
    argv[argc++] = "--start";
    sprintf(start, "%u", process_time-1);
    argv[argc++] = start;
-   sprintf(sum,"DS:sum:GAUGE:%d:U:U", heartbeat);
+   sprintf(sum,"DS:sum:%s:%d:U:U",
+	   data_source_type,
+	   heartbeat);
    argv[argc++] = sum;
    if (summary) {
-      sprintf(num,"DS:num:GAUGE:%d:U:U", heartbeat);
+      sprintf(num,"DS:num:%s:%d:U:U", 
+	      data_source_type,
+	      heartbeat);
       argv[argc++] = num;
    }
 
@@ -125,7 +148,8 @@
    host rrds only have "sum" (since num is always 1) */
 static int
 push_data_to_rrd( char *rrd, const char *sum, const char *num, 
-   unsigned int step, unsigned int process_time)
+		  unsigned int step, unsigned int process_time,
+		  ganglia_slope_t slope)
 {
    int rval;
    int summary;
@@ -142,7 +166,7 @@
 
    if( stat(rrd, &st) )
       {
-         rval = RRD_create( rrd, summary, step, process_time );
+         rval = RRD_create( rrd, summary, step, process_time, slope);
          if( rval )
             return rval;
       }
@@ -152,7 +176,8 @@
 /* Assumes num argument will be NULL for a host RRD. */
 int
 write_data_to_rrd ( const char *source, const char *host, const char *metric, 
-   const char *sum, const char *num, unsigned int step, unsigned int process_time )
+		    const char *sum, const char *num, unsigned int step,
+		    unsigned int process_time, ganglia_slope_t slope)
 {
    char rrd[ PATHSIZE ];
    char *summary_dir = "__SummaryInfo__";
@@ -181,5 +206,5 @@
    strncat(rrd, metric, PATHSIZE);
    strncat(rrd, ".rrd", PATHSIZE);
 
-   return push_data_to_rrd( rrd, sum, num, step, process_time );
+   return push_data_to_rrd( rrd, sum, num, step, process_time, slope);
 }
Index: gmond/gmond.c
===================================================================
--- gmond/gmond.c	(revision 870)
+++ gmond/gmond.c	(working copy)
@@ -1870,8 +1870,9 @@
             type = apr_pstrdup(gmetric->pool, host_metric_type(cb->info->type));
         
             errors = Ganglia_gmetric_set(gmetric, cb->info->name, val, type,
-                        cb->info->units, !strcmp(cb->info->slope,"zero")? 0: 3,
+                        cb->info->units, cstr_to_slope( cb->info->slope),
                         cb->info->tmax, 0);
+
             if (errors) 
               {
                 err_msg("Error %d setting the modular data for %s\n", errors, cb->name);
-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
_______________________________________________
Ganglia-developers mailing list
Ganglia-developers@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/ganglia-developers

Reply via email to