Re: [Ganglia-general] linux monitor implementation

2002-04-09 Thread matt massie
asaph-

this is a much better way of collecting the metrics on linux.  i like that 
your method eliminates 3 threads and all the mutex locking.  i'll 
try out the code and likely include it in the next release.

-matt

Today, Asaph Zemach wrote forth saying...

> Here iks a drop-in replacement to linux.c that does not
> use the extra threads and gets rid of the now-unneeded 
> locking. It seems to work. I think it's a little cleaner
> and more maintainable (e.g. no forgotten locking) for the future.
> 
> Decide if you want to keep it.
> 
>   Asaph
> 
> 
> --
> #include 
> #include "ganglia.h"
> #include "metric_typedefs.h"
> 
> /*
> #include "set_metric_val.h"
> */
> 
> #define OSNAME "Linux"
> #define OSNAME_LEN strlen(OSNAME)
> 
> /* Never changes */
> char proc_cpuinfo[BUFFSIZE];
> char proc_sys_kernel_osrelease[BUFFSIZE];
> 
> typedef struct {
>   int last_read;
>   int thresh;
>   char *name;
>   char buffer[BUFFSIZE];
> } timely_file;
> 
> timely_file proc_stat= { 0, 15, "/proc/stat" };
> timely_file proc_loadavg = { 0, 15, "/proc/loadavg" };
> timely_file proc_meminfo = { 0, 30, "/proc/meminfo" };
> 
> char *update_file(timely_file *tf)
> {
>   int now,rval;
>   now = time(0);
>   if(now - tf->last_read > tf->thresh) {
> rval = slurpfile(tf->name, tf->buffer, BUFFSIZE);
> if(rval == SYNAPSE_FAILURE) {
>   err_msg("update_file() got an error from slurpfile() reading %s",
> tf->name);
> }
> else tf->last_read = now;
>   }
>   return tf->buffer;
> }
> 
> 
> 
> 
> 
> 
> /*
>  * This function is called only once by the gmond.  Use to 
>  * initialize data structures, etc or just return SYNAPSE_SUCCESS;
>  */
> g_val_t
> metric_init(void)
> {
>g_val_t rval;
> 
>rval.int32 = slurpfile("/proc/cpuinfo", proc_cpuinfo, BUFFSIZE);
>if ( rval.int32 == SYNAPSE_FAILURE )
>   {
>  err_msg("metric_init() got an error from slurpfile() /proc/cpuinfo");
>  return rval;
>   }  
> 
>rval.int32 = slurpfile( "/proc/sys/kernel/osrelease", 
>proc_sys_kernel_osrelease, BUFFSIZE);
>if ( rval.int32 == SYNAPSE_FAILURE )
>   {
>  err_msg("kernel_func() got an error from slurpfile()");
>  return rval;
>   }   
> 
>/* Get rid of pesky \n in osrelease */
>proc_sys_kernel_osrelease[rval.int32-1] = '\0';
> 
>rval.int32 = SYNAPSE_SUCCESS;
>return rval;
> }
> 
> /*
>  * 
>  */
> 
> g_val_t
> cpu_num_func ( void )
> {
>FILE *f;
>static int cpu_num = 0;
>char line[80];
>g_val_t val;
> 
>/* Only need to do this once */
>if (! cpu_num)
>   {
>  f = fopen("/proc/stat", "r");
>  while (fscanf(f, "%s", line) != EOF)
> if (strncmp(line, "cpu", 3) == 0)
>cpu_num++;
>  fclose(f);
>   }
>val.uint16 = cpu_num - 1;
>return val;
> }
> 
> g_val_t
> cpu_speed_func ( void )
> {
>char *p;
>static g_val_t val = {0};
> 
>if (! val.uint32 )
>   {
>  p = proc_cpuinfo;  
>  p = strstr( p, "cpu MHz" );
>  p = strchr( p, ':' );
>  p++;
>  p = skip_whitespace(p);
>  val.uint32 = (uint32_t)strtol( p, (char **)NULL , 10 );
>   }
>return val;
> }
> 
> g_val_t
> mem_total_func ( void )
> {
>char *p;
>g_val_t val;
> 
>p = strstr( update_file(&proc_meminfo), "MemTotal:");
>p = skip_token(p);
>val.uint32 = strtol( p, (char **)NULL, 10 );
> 
>return val;
> }
> 
> g_val_t
> swap_total_func ( void )
> {
>char *p;
>g_val_t val;
>  
>p = strstr( update_file(&proc_meminfo), "SwapTotal:" );
>p = skip_token(p);
>val.uint32 = strtol( p, (char **)NULL, 10 );  
> 
>return val;
> }
> 
> g_val_t
> boottime_func ( void )
> {
>char *p;
>g_val_t val;
> 
>p = update_file(&proc_stat); 
> 
>p = strstr ( p, "btime" );
>p = skip_token ( p );
>val.uint32 = strtod ( p, (char **)NULL );
> 
>return val;
> }
> 
> g_val_t
> sys_clock_func ( void )
> {
>g_val_t val;
> 
>val.uint32 = time(NULL);
>return val;
> }
> 
> g_val_t
> machine_type_func ( void )
> {
>g_val_t val;
>  
> #ifdef IA64
>snprintf(val.str, MAX_G_STRING_SIZE, "ia64");
> #endif
> #ifdef __i386__
>snprintf(val.str, MAX_G_STRING_SIZE, "x86");
> #endif
> #ifdef __alpha__
>snprintf(val.str, MAX_G_STRING_SIZE, "alpha");
> #endif
>return val;
> }
> 
> g_val_t
> os_name_func ( void )
> {
>g_val_t val;
> 
>snprintf(val.str, MAX_G_STRING_SIZE, "Linux");
>return val;
> }
> 
> g_val_t
> os_release_func ( void )
> {
>g_val_t val;
> 
>snprintf(val.str, MAX_G_STRING_SIZE, "%s", proc_sys_kernel_osrelease);
>return val;
> }
> 
> /*
>  * A helper function to return the total number of cpu jiffies
>  */
> unsigned long
> total_jiffies_func ( void )
> {
>char *p;
>unsigned long user_jiffies, nice_jiffies, system_jiffies, idle_jiffies;
>

Re: [Ganglia-general] linux monitor implementation

2002-04-09 Thread Asaph Zemach
Here iks a drop-in replacement to linux.c that does not
use the extra threads and gets rid of the now-unneeded 
locking. It seems to work. I think it's a little cleaner
and more maintainable (e.g. no forgotten locking) for the future.

Decide if you want to keep it.

Asaph


--
#include 
#include "ganglia.h"
#include "metric_typedefs.h"

/*
#include "set_metric_val.h"
*/

#define OSNAME "Linux"
#define OSNAME_LEN strlen(OSNAME)

/* Never changes */
char proc_cpuinfo[BUFFSIZE];
char proc_sys_kernel_osrelease[BUFFSIZE];

typedef struct {
  int last_read;
  int thresh;
  char *name;
  char buffer[BUFFSIZE];
} timely_file;

timely_file proc_stat= { 0, 15, "/proc/stat" };
timely_file proc_loadavg = { 0, 15, "/proc/loadavg" };
timely_file proc_meminfo = { 0, 30, "/proc/meminfo" };

char *update_file(timely_file *tf)
{
  int now,rval;
  now = time(0);
  if(now - tf->last_read > tf->thresh) {
rval = slurpfile(tf->name, tf->buffer, BUFFSIZE);
if(rval == SYNAPSE_FAILURE) {
  err_msg("update_file() got an error from slurpfile() reading %s",
  tf->name);
}
else tf->last_read = now;
  }
  return tf->buffer;
}






/*
 * This function is called only once by the gmond.  Use to 
 * initialize data structures, etc or just return SYNAPSE_SUCCESS;
 */
g_val_t
metric_init(void)
{
   g_val_t rval;

   rval.int32 = slurpfile("/proc/cpuinfo", proc_cpuinfo, BUFFSIZE);
   if ( rval.int32 == SYNAPSE_FAILURE )
  {
 err_msg("metric_init() got an error from slurpfile() /proc/cpuinfo");
 return rval;
  }  

   rval.int32 = slurpfile( "/proc/sys/kernel/osrelease", 
   proc_sys_kernel_osrelease, BUFFSIZE);
   if ( rval.int32 == SYNAPSE_FAILURE )
  {
 err_msg("kernel_func() got an error from slurpfile()");
 return rval;
  }   

   /* Get rid of pesky \n in osrelease */
   proc_sys_kernel_osrelease[rval.int32-1] = '\0';

   rval.int32 = SYNAPSE_SUCCESS;
   return rval;
}

/*
 * 
 */

g_val_t
cpu_num_func ( void )
{
   FILE *f;
   static int cpu_num = 0;
   char line[80];
   g_val_t val;

   /* Only need to do this once */
   if (! cpu_num)
  {
 f = fopen("/proc/stat", "r");
 while (fscanf(f, "%s", line) != EOF)
if (strncmp(line, "cpu", 3) == 0)
   cpu_num++;
 fclose(f);
  }
   val.uint16 = cpu_num - 1;
   return val;
}

g_val_t
cpu_speed_func ( void )
{
   char *p;
   static g_val_t val = {0};

   if (! val.uint32 )
  {
 p = proc_cpuinfo;  
 p = strstr( p, "cpu MHz" );
 p = strchr( p, ':' );
 p++;
 p = skip_whitespace(p);
 val.uint32 = (uint32_t)strtol( p, (char **)NULL , 10 );
  }
   return val;
}

g_val_t
mem_total_func ( void )
{
   char *p;
   g_val_t val;

   p = strstr( update_file(&proc_meminfo), "MemTotal:");
   p = skip_token(p);
   val.uint32 = strtol( p, (char **)NULL, 10 );

   return val;
}

g_val_t
swap_total_func ( void )
{
   char *p;
   g_val_t val;
 
   p = strstr( update_file(&proc_meminfo), "SwapTotal:" );
   p = skip_token(p);
   val.uint32 = strtol( p, (char **)NULL, 10 );  

   return val;
}

g_val_t
boottime_func ( void )
{
   char *p;
   g_val_t val;

   p = update_file(&proc_stat); 

   p = strstr ( p, "btime" );
   p = skip_token ( p );
   val.uint32 = strtod ( p, (char **)NULL );

   return val;
}

g_val_t
sys_clock_func ( void )
{
   g_val_t val;

   val.uint32 = time(NULL);
   return val;
}

g_val_t
machine_type_func ( void )
{
   g_val_t val;
 
#ifdef IA64
   snprintf(val.str, MAX_G_STRING_SIZE, "ia64");
#endif
#ifdef __i386__
   snprintf(val.str, MAX_G_STRING_SIZE, "x86");
#endif
#ifdef __alpha__
   snprintf(val.str, MAX_G_STRING_SIZE, "alpha");
#endif
   return val;
}

g_val_t
os_name_func ( void )
{
   g_val_t val;

   snprintf(val.str, MAX_G_STRING_SIZE, "Linux");
   return val;
}

g_val_t
os_release_func ( void )
{
   g_val_t val;

   snprintf(val.str, MAX_G_STRING_SIZE, "%s", proc_sys_kernel_osrelease);
   return val;
}

/*
 * A helper function to return the total number of cpu jiffies
 */
unsigned long
total_jiffies_func ( void )
{
   char *p;
   unsigned long user_jiffies, nice_jiffies, system_jiffies, idle_jiffies;

   p = update_file(&proc_stat);
   p = skip_token(p);
   p = skip_whitespace(p);
   user_jiffies = strtod( p, &p );
   p = skip_whitespace(p);
   nice_jiffies = strtod( p, &p ); 
   p = skip_whitespace(p);
   system_jiffies = strtod( p , &p ); 
   p = skip_whitespace(p);
   idle_jiffies = strtod( p , &p );
  
   return user_jiffies + nice_jiffies + system_jiffies + idle_jiffies; 
}   

g_val_t
cpu_user_func ( void )
{
   char *p;
   g_val_t val;
   static double last_user_jiffies,  user_jiffies, 
 last_total_jiffies, total_jiffies, diff;

   p = update_file(&proc_stat);
 
   p = skip_token(p);
   user_jiffies  = strtod( p , (char **)NULL );
   total_jiffies = total_jiffies_func();

Re: [Ganglia-general] linux monitor implementation

2002-04-08 Thread matt massie
Today, Asaph Zemach wrote forth saying...

> Hi,
> 
>   I've been looking over the gmond sources, and I was wondering
> why you saw the need to create the three threads:
> proc_stat_thr
> proc_loadavg_thr
> proc_meminfo_thr
> Was there some problem in having the monitor thread perform
> these actions?
> 
> I couldn't find an answer in the documentation. Sorry if
> I missed it.

Asaph-

Ganglia is built to be easily portable to other architectures.  If I were 
to lock it into being simply a Linux tool, then having the 
proc_stat_thr/proc_loadavg_thr/proc_meminfo_thr threads would 
not be necessary as I could merge them into the monitor thread.  However I 
wanted the monitor thread to work at a more abstract machine-independent 
level.  The monitor thread shouldn't care about the specifics of how the 
metrics are collected.. it is only responsible to keeping track of value 
and time thresholds and making sure new data gets multicast.

When I was building the machine-specific file for Linux, I spun off the 
proc_stat_thr, proc_loadavg_thr, and proc_meminfo_thr to reduce the number 
of times that the /proc/stat, /proc/loadavg and /proc/meminfo files where 
opened.  Many of the metric functions in read the same file and didn't 
want each one opening and closing the same file.  For example, load_one 
load_five, load_fifteen, proc_total and proc_run all come from the 
/proc/loadavg file.  To prevent those functions from opening/closing the 
/proc/loadavg file each time they were called.. they read the file from 
memory which is updated by the proc_loadavg_thr as necessary.

I hope this makes sense.  If not, feel free to email back.
-matt




[Ganglia-general] linux monitor implementation

2002-04-08 Thread Asaph Zemach
Hi,

  I've been looking over the gmond sources, and I was wondering
why you saw the need to create the three threads:
proc_stat_thr
proc_loadavg_thr
proc_meminfo_thr
Was there some problem in having the monitor thread perform
these actions?

I couldn't find an answer in the documentation. Sorry if
I missed it.

Thanks,
Asaph