Hi all,
The slope attribute, given to gmetric, can be either zero, +ve, -ve or
both. "Zero" slope is for constant values, "both" slope is for gauge values
(i.e. data should be plotted) and "positive" is for counters.
[Aside: do we have any actual use-cases for -ve slope? I couldn't think of
any]
If one follows the code as a metric travels from gmetric to gmond to gmetad,
the support for counter metrics seems to peter out. I've attached a patch
that completes this support, using RRDTool's internal support for counters:
for new metrics, RRDTool is told to create the rrd files with a COUNTER
data-source type (instead of the usual GAUGE). RRDTool will then plot the
rate of change (per second) for the counter; no changes are needed for the
web front-end. The COUNTER data-source type also understands 32- and 64-bit
overruns and will not be affected by them.
The attached patch is against trunk. Comments are welcome!
Cheers,
Paul.
Technical aside:
RRDTool supports two similar data-source types for counters: COUNTER
and
DERIVE. I've gone for COUNTER, but there pros and cons for both.
COUNTER is perhaps the more correct. It knows about 32-bit and 64-bit
overflows and will deal with these correctly. However, it assumes that
overflows are the only reason a counter value is less than the previous
value: if a counter is reset (instead of wrapping), one gets an incorrect
large spike in the data. I believe this can be countered by setting a
suitable maximum.
DERIVE is obtained by drawing a line between previous and current measurements
and plotting this line's slope (i.e. taking the 1st derivative). It doesn't
know about counters, so an overflow would appear as a very large negative
value on the graph. However, one can limit the data so it's never negative
(by setting min=0). This has the benefit of not producing any outrageous
spikes in the data, but the disadvantages of no handling of overflows: in
both cases there would be a gap in the graph. Also, if the rate of change
approaches the 32-bit value, there would be more counter overruns and these
gaps would increase.
This is discussed more within the rrdcreate(1) man page.
Index: gmetad/process_xml.c
===================================================================
--- gmetad/process_xml.c (revision 870)
+++ gmetad/process_xml.c (working copy)
@@ -5,12 +5,9 @@
#include <sys/time.h>
#include "expat.h"
#include "gmetad.h"
+#include "lib/ganglia.h"
+#include "rrd_helpers.h"
-
-
-extern int write_data_to_rrd( const char *source, const char *host,
- const char *metric, const char *sum, const char *num,
- unsigned int step, unsigned int time_polled);
extern int zero_out_summary(datum_t *key, datum_t *val, void *arg);
extern char* getfield(char *buf, short int index);
@@ -568,6 +565,7 @@
startElement_METRIC(void *data, const char *el, const char **attr)
{
xmldata_t *xmldata = (xmldata_t *)data;
+ ganglia_slope_t slope = GANGLIA_SLOPE_UNSPECIFIED;
struct xml_tag *xt;
struct type_tag *tt;
datum_t *hash_datum = NULL;
@@ -602,6 +600,8 @@
case TYPE_TAG:
type = attr[i+1];
break;
+ case SLOPE_TAG:
+ slope = cstr_to_slope(attr[i+1]);
default:
break;
}
@@ -629,7 +629,8 @@
xmldata->hostname, name);
xmldata->rval = write_data_to_rrd(xmldata->sourcename,
xmldata->hostname, name, metricval, NULL,
- xmldata->ds->step, xmldata->source.localtime);
+ xmldata->ds->step, xmldata->source.localtime,
+ slope);
}
metric->id = METRIC_NODE;
metric->report_start = metric_report_start;
@@ -935,7 +936,9 @@
xmldata->sourcename, name);
xmldata->rval = write_data_to_rrd(xmldata->sourcename, NULL, name,
- sum, num, xmldata->ds->step, xmldata->source.localtime);
+ sum, num, xmldata->ds->step,
+ xmldata->source.localtime,
+ cstr_to_slope(getfield(metric->strings, metric->slope)));
}
return xmldata->rval;
Index: gmetad/gmetad.c
===================================================================
--- gmetad/gmetad.c (revision 870)
+++ gmetad/gmetad.c (working copy)
@@ -12,6 +12,7 @@
#include <cmdline.h>
#include "daemon_init.h"
+#include "rrd_helpers.h"
/* Holds our data sources. */
hash_t *sources;
@@ -30,8 +31,6 @@
extern int parse_config_file ( char *config_file );
extern int number_of_datasources ( char *config_file );
extern struct type_tag* in_type_list (char *, unsigned int);
-extern int write_data_to_rrd( const char *source, const char *host, const char *metric,
- const char *sum, const char *num, unsigned int step, unsigned int time_polled);
struct gengetopt_args_info args_info;
@@ -244,7 +243,7 @@
/* err_msg("Writing Overall Summary for metric %s (%s)", name, sum); */
/* Save the data to a round robin database */
- rc = write_data_to_rrd( NULL, NULL, name, sum, num, 15, 0);
+ rc = write_data_to_rrd( NULL, NULL, name, sum, num, 15, 0, metric->slope);
if (rc)
{
err_msg("Unable to write meta data for metric %s to RRD", name);
Index: gmetad/rrd_helpers.c
===================================================================
--- gmetad/rrd_helpers.c (revision 870)
+++ gmetad/rrd_helpers.c (working copy)
@@ -11,6 +11,10 @@
#include <pthread.h>
#include <time.h>
+#include "lib/ganglia.h"
+
+#include "rrd_helpers.h"
+
#define PATHSIZE 4096
extern gmetad_config_t gmetad_config;
@@ -64,8 +68,10 @@
/* Warning: RRD_create will overwrite a RRdb if it already exists */
static int
-RRD_create( char *rrd, int summary, unsigned int step, unsigned int process_time)
+RRD_create( char *rrd, int summary, unsigned int step, unsigned int process_time,
+ ganglia_slope_t slope)
{
+ const char *data_source_type = "GAUGE";
char *argv[128];
int argc=0;
int heartbeat;
@@ -77,6 +83,19 @@
/* Our heartbeat is twice the step interval. */
heartbeat = 8*step;
+ switch( slope) {
+ case GANGLIA_SLOPE_POSITIVE:
+ data_source_type = "COUNTER";
+ break;
+
+ case GANGLIA_SLOPE_ZERO:
+ case GANGLIA_SLOPE_NEGATIVE:
+ case GANGLIA_SLOPE_BOTH:
+ case GANGLIA_SLOPE_UNSPECIFIED:
+ data_source_type = "GAUGE";
+ break;
+ }
+
argv[argc++] = "dummy";
argv[argc++] = rrd;
argv[argc++] = "--step";
@@ -85,10 +104,14 @@
argv[argc++] = "--start";
sprintf(start, "%u", process_time-1);
argv[argc++] = start;
- sprintf(sum,"DS:sum:GAUGE:%d:U:U", heartbeat);
+ sprintf(sum,"DS:sum:%s:%d:U:U",
+ data_source_type,
+ heartbeat);
argv[argc++] = sum;
if (summary) {
- sprintf(num,"DS:num:GAUGE:%d:U:U", heartbeat);
+ sprintf(num,"DS:num:%s:%d:U:U",
+ data_source_type,
+ heartbeat);
argv[argc++] = num;
}
@@ -125,7 +148,8 @@
host rrds only have "sum" (since num is always 1) */
static int
push_data_to_rrd( char *rrd, const char *sum, const char *num,
- unsigned int step, unsigned int process_time)
+ unsigned int step, unsigned int process_time,
+ ganglia_slope_t slope)
{
int rval;
int summary;
@@ -142,7 +166,7 @@
if( stat(rrd, &st) )
{
- rval = RRD_create( rrd, summary, step, process_time );
+ rval = RRD_create( rrd, summary, step, process_time, slope);
if( rval )
return rval;
}
@@ -152,7 +176,8 @@
/* Assumes num argument will be NULL for a host RRD. */
int
write_data_to_rrd ( const char *source, const char *host, const char *metric,
- const char *sum, const char *num, unsigned int step, unsigned int process_time )
+ const char *sum, const char *num, unsigned int step,
+ unsigned int process_time, ganglia_slope_t slope)
{
char rrd[ PATHSIZE ];
char *summary_dir = "__SummaryInfo__";
@@ -181,5 +206,5 @@
strncat(rrd, metric, PATHSIZE);
strncat(rrd, ".rrd", PATHSIZE);
- return push_data_to_rrd( rrd, sum, num, step, process_time );
+ return push_data_to_rrd( rrd, sum, num, step, process_time, slope);
}
Index: gmond/gmond.c
===================================================================
--- gmond/gmond.c (revision 870)
+++ gmond/gmond.c (working copy)
@@ -1870,8 +1870,9 @@
type = apr_pstrdup(gmetric->pool, host_metric_type(cb->info->type));
errors = Ganglia_gmetric_set(gmetric, cb->info->name, val, type,
- cb->info->units, !strcmp(cb->info->slope,"zero")? 0: 3,
+ cb->info->units, cstr_to_slope( cb->info->slope),
cb->info->tmax, 0);
+
if (errors)
{
err_msg("Error %d setting the modular data for %s\n", errors, cb->name);
-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems? Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
_______________________________________________
Ganglia-developers mailing list
Ganglia-developers@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/ganglia-developers