Influenced by several sources including gexec_process.c, Steven's email below, and top.c from procps 2.0.7, I have written a process-gathering testbench that directly parses the /proc filesystem in linux.

Take a look (and try it, it's only one file). I make some perhaps controversial decisions on how to report memory usage. When considering them, keep in mind how fork() and copy-on-write work. Also, check the
/usr/src/linux/fs/proc/array.c to see how /proc/pid/statm is reported.

The processes are sorted by %CPU, and only the top 5 are reported by default. Hopefully this test will form the base for process reporting in gmond.

Fed.

/*
 * A simple test bench for reporting the top 5 processes by CPU
 * usage. Based on code from GNU's procps 2.0.7 package,
 * Copyright (C) 1996 Charles L. Blake, 1998 Michael K. Johnson,
 * and inherits the terms of the GNU Library General Public License.
 *
 * Compile with:
 * gcc -I. -g ps.c -o myps
 *
 * Copyright 2002 Federico Sacerdoti <[EMAIL PROTECTED]>
 * and SDSC.
 */

#include <stdlib.h>
#include <stdio.h>
#include <pwd.h>
#include <sys/time.h>
#include <sys/types.h>
#include <dirent.h>
#include <sys/param.h>     /* for HZ */
#include <asm/page.h>      /* for PAGE_SHIFT */
#include <sys/stat.h>
#include <unistd.h>

/* useful macros */
#define bytetok(x)   (((x) + 512) >> 10)
#define pagetok(x)   ((x) << (PAGE_SHIFT - 10))

/* The number of processes to report. */
#define SHOWPROCS 5

struct process {
    char user[32];
    int pid;
    char name[64];
    float percent_cpu;
    unsigned long utime;
    unsigned long stime;
    unsigned long starttime;
    float percent_mem;
    unsigned int text;       /* Program text, bss memory size, in KB */
    unsigned int data;      /* Program heap+stack size, in KB */
    unsigned int shared;   /* Size of shared libs + IPC shm. */
    unsigned int resident; /* The total number of pages resident in core mem. */
    unsigned int vm;       /* The total number of pages allocated by Virtual 
Mem */
    unsigned int major_faults;   /* Page faults that required a read from disk 
*/
};

/* This is junk of course, to be replaced soon */
const char *uid_to_user(int uid)
{
   static struct passwd *p;
   static char txt[64];
   p = getpwuid(uid);
   if(p) return p->pw_name;
   sprintf(txt, "%d", uid);
   return txt;
}

/* Returns the interval between calls to this function, in seconds. */
static float
get_elapsed_time(void)
{
   struct timeval t;
   static struct timeval oldtime;
   struct timezone timez;
   float elapsed_time;

   gettimeofday(&t, &timez);
   elapsed_time = (t.tv_sec - oldtime.tv_sec)
      + (float) (t.tv_usec - oldtime.tv_usec) / 1000000.0;
   oldtime.tv_sec  = t.tv_sec;
   oldtime.tv_usec = t.tv_usec;

   return elapsed_time;
}

static unsigned long
total_mem_kb(void)
{
   int len;
   FILE *meminfo;
   char buffer[2048], *p;
   unsigned long memtotal;

   meminfo = fopen("/proc/meminfo", "r");
   if(!meminfo)
      return 0;

   len = fread(buffer, sizeof(char), sizeof(buffer)-1, meminfo);
   buffer[len] = '\0';
   fclose(meminfo);

   p = (char*) strstr( buffer, "MemTotal:" );
   if (!p)
      return 0;

   sscanf(p, "MemTotal: %lu ", &memtotal);
   return memtotal;
}

/* A more efficient, thread-safe pid->uid->username mapper. */
static void
pid2name(int pid, char *name)
{
   struct stat sb;
   char buffer[32];
   struct passwd *pw;
   int rc, uid;

   /* To insure a NULL string is returned on error. */
   *name='\0';

   sprintf(buffer, "/proc/%d", pid);

   rc=stat(buffer, &sb);
   if (rc<0) return;
   uid=sb.st_uid;

   pw=getpwuid(uid);
   if (!pw)
      sprintf(name,"%d",uid);
   else
      strcpy(name, pw->pw_name);

   /* Cannot free this? free(pw); */
}

int percent_cpu_sort (struct process *P, struct process *Q)
{
   if (P->percent_cpu < Q->percent_cpu) return 1;
   if (P->percent_cpu < Q->percent_cpu) return -1;
   return 0;
}

static void read_proc(int pid, struct process *procs, int index)
{
   char filename[128];
   char line[512];
   FILE * stat, *statm;
   struct process *proc;
   char *tmp;
   int i, rc, n;
   unsigned long resident, vmsize;

   proc = &procs[index];
   if (!proc) return;

   sprintf(filename, "/proc/%d/stat", pid);
   stat = fopen(filename, "r");
   if (!stat) {
     printf("WARNING: process %d could not be found.",pid);
     return;
   }
   fgets(line, sizeof(line), stat);
   if (! *line) return;
   n=sscanf(line, "%d (%s %*c %*d %*d %*d %*d %*d "  /* Start: pid, End: tgpid 
*/
      "%*lu %*lu %*lu %lu %*lu " /* Start: flags, End: cmajflt */
      "%lu %lu %*ld %*ld %*ld "  /* Start: utime, End: priority */
      "%*ld %*lu %*ld %lu ",   /* Start: nice, End: starttime */
      &proc->pid, proc->name,
      &proc->major_faults,
      &proc->utime, &proc->stime,
      &proc->starttime);
   /* Remove the trailing ')' from the name */
   proc->name[strlen(proc->name)-1]='\0';
   fclose(stat);

   /* Use the percent_cpu field to hold the number of new jiffies this process
      used since last time. */
   proc->percent_cpu = proc->utime + proc->stime - proc->percent_cpu;

   /* Get virtual memory statistics */
   sprintf(filename, "/proc/%d/statm", pid);
   statm = fopen(filename, "r");
   if (!statm) {
     printf("WARNING: process %d could not be found.",pid);
     return;
   }
   fgets(line, sizeof(line), stat);
   if (! *line) return;
   /* The actual format is different than reported by the proc manpage.
      See /usr/src/linux/fs/proc/array.c.
      Size, Resident, Shared, Trs, Lrs, Drs, Dt */
   sscanf(line, "%lu %lu %lu %lu %*lu %*lu %*lu",
      &proc->vm, &proc->resident, &proc->shared, &proc->text);
   fclose(statm);

   /* Data is defined as resident - shared. This may include text, and may not.
      Read-write heap pages are never shared. The stack is never shared. */
   proc->data = proc->resident - proc->shared;

   /* Convert to KB */
   proc->resident = pagetok(proc->resident);
   proc->text = pagetok(proc->text);
   proc->data = pagetok(proc->data);
   proc->shared = pagetok(proc->shared);
   proc->vm = pagetok(proc->vm);
}

/* Data is defined as resident - shared. This may include text, and may not.
Read-write heap pages are never shared. The stack is never shared.

Fork() calls will increase the number of shared pages because of copy-on-write
semantics. A page is "shared" if its reference count > 1. This figure counts 
more than
IPC shared memory pages.
*/

int
main(void)
{
   int pid, npids=0;
   int i=0;
   struct process *procs;      /* The top five processes by cpu usage */
   struct process *p;
   DIR *d;
   struct dirent *de;
   float delta;

   if (!(d = opendir ("/proc"))) {
     perror ("/proc"); exit (1);
   }
   /* First pass to see how many processes we have running */
   while (de = readdir (d))
      if (pid = atoi (de->d_name))
         npids++;
   procs = (struct process *) calloc(npids, sizeof(*procs));

   /* Second pass to populate our procs list */
   rewinddir(d);
   get_elapsed_time();
   i=0;
   while (de = readdir (d))
      if (pid = atoi (de->d_name)) {
         /* Insure no extra pids have been created since we counted. */
         if (i>=npids) break;
         read_proc(pid, procs, i);
         i++;
      }

   /* Sleep a bit so we don't grab all the %cpu while measuring it.
      Also improves the quality of measurements by increasing delta. */
   usleep(800000);

   /* Third pass to calculate percent CPU */
   rewinddir(d);
   delta = get_elapsed_time();
   i=0;
   while (de = readdir (d))
      if (pid = atoi (de->d_name)) {
         /* Insure no extra pids have been created since we counted. */
         if (i>=npids) break;
         read_proc(pid, procs, i);
         i++;
      }
   closedir (d);

   printf("Total Main Memory (from /proc/meminfo): %lu KB\n", total_mem_kb());
   printf("Time Delta for %%CPU=%.4fs\n",delta);

   /* Fill in the remaining process fields. */
   for (i=0; i<npids; i++) {
      p=&procs[i];
      /* Remember we have stored the new ticks of this process in 
p->percent_cpu. */
      p->percent_cpu = (p->percent_cpu * 100/HZ) / delta;
      if (p->percent_cpu > 99.9) p->percent_cpu=99.9;

      p->percent_mem = 100.00 * (p->resident / (float) total_mem_kb());
      if (p->percent_mem > 99.9) p->percent_mem=99.9;

      pid2name(p->pid, p->user);
   }

   /* Sort by %CPU */
   qsort(procs, npids, sizeof(*procs), (void*)percent_cpu_sort);

   printf("The top %d processes, sorted by %%CPU:\n", SHOWPROCS);
   for (i=0; i<SHOWPROCS; i++) {
      if (i>=npids) break;
      p=&procs[i];

      printf("Process %d, %s (%s): %%CPU=%.1f %%MEM=%.1f. Memory: "
         "text=%d, data=%d, shared=%d, VMtotal=%d (KB).\n",
         p->pid, p->name, p->user, p->percent_cpu, p->percent_mem,
         p->text, p->data, p->shared, p->vm);
   }
   return 0;
}



On Tuesday, August 27, 2002, at 04:16 PM, Steven Wagner wrote:

I don't have code but I'm happy to dump core on the subject.

The one thing that various OSes all seem to have in common so far is they all have /proc or a similar interface for each running process. Although I haven't found a nice flat file sort of interface in the (info-rich) Linux procfs, I can tell you what top does on all supported platforms.

It walks /proc (or calls the equivalent system call, if /proc isn't available). In Linux's case it would go through all numeric directories in /proc and sum up each process's utime and stime jiffies, then run through all the running processes again and divide each one by the total to determine percentages.

I found the sample code in top 3.9beta5. I'll paste it at the end of this message.

In any case, Solaris, Tru64 and (apparently) IRIX actually have this information in the procinfo struct, which saves a couple walks through that particular hash. So this implementation is Linux-specific (I'm also leaving out the supporting functions although I think they're mostly self-explanatory)... there's another function which actually does the second walk but this should give you an idea.

Nice work on gmetad, BTW ... after 2.5.0 releases and it's deployed throughout my Linux cluster I'll "return to the fold" and ditch my hacked 0.1 version ... ;)

From $TOP_BUILD_ROOT/machine/m_linux.c:

static void
read_one_proc_stat(pid_t pid, struct top_proc *proc)
{
    char buffer[4096], *p;

    /* grab the proc stat info in one go */
    {
        int fd, len;

        sprintf(buffer, "%d/stat", pid);

        fd = open(buffer, O_RDONLY);
        len = read(fd, buffer, sizeof(buffer)-1);
        close(fd);

        buffer[len] = '\0';
    }

    proc->uid = proc_owner(pid);

    /* parse out the status */

    p = buffer;
    p = strchr(p, '(')+1;                       /* skip pid */
    {
        char *q = strrchr(p, ')');
        int len = q-p;
        if (len >= sizeof(proc->name))
            len = sizeof(proc->name)-1;
        memcpy(proc->name, p, len);
        proc->name[len] = 0;
        p = q+1;
    }

    p = skip_ws(p);
    switch (*p++)
    {
      case 'R': proc->state = 1; break;
      case 'S': proc->state = 2; break;
      case 'D': proc->state = 3; break;
      case 'Z': proc->state = 4; break;
      case 'T': proc->state = 5; break;
      case 'W': proc->state = 6; break;
    }

    p = skip_token(p);                          /* skip ppid */
    p = skip_token(p);                          /* skip pgrp */
    p = skip_token(p);                          /* skip session */
    p = skip_token(p);                          /* skip tty */
    p = skip_token(p);                          /* skip tty pgrp */
    p = skip_token(p);                          /* skip flags */
    p = skip_token(p);                          /* skip min flt */
    p = skip_token(p);                          /* skip cmin flt */
    p = skip_token(p);                          /* skip maj flt */
    p = skip_token(p);                          /* skip cmaj flt */

    proc->time = strtoul(p, &p, 10);            /* utime */
    proc->time += strtoul(p, &p, 10);           /* stime */

    p = skip_token(p);                          /* skip cutime */
    p = skip_token(p);                          /* skip cstime */

    proc->pri = strtol(p, &p, 10);              /* priority */
    proc->nice = strtol(p, &p, 10);             /* nice */

    p = skip_token(p);                          /* skip timeout */
    p = skip_token(p);                          /* skip it_real_val */
    p = skip_token(p);                          /* skip start_time */

    proc->size = bytetok(strtoul(p, &p, 10));   /* vsize */
    proc->rss = pagetok(strtoul(p, &p, 10));    /* rss */
#if 0
    /* for the record, here are the rest of the fields */
    p = skip_token(p);                          /* skip rlim */
    p = skip_token(p);                          /* skip start_code */
    p = skip_token(p);                          /* skip end_code */
    p = skip_token(p);                          /* skip start_stack */
    p = skip_token(p);                          /* skip sp */
    p = skip_token(p);                          /* skip pc */
    p = skip_token(p);                          /* skip signal */
    p = skip_token(p);                          /* skip sigblocked */
    p = skip_token(p);                          /* skip sigignore */
    p = skip_token(p);                          /* skip sigcatch */
    p = skip_token(p);                          /* skip wchan */
#endif
}

Federico Sacerdoti wrote:
Hi,
I'm trying to write a test bench program that reports basic things about the top five processes, based on percent cpu usage (in Linux). Since it seems you are working on this problem, I thought I would ask you for some help. You recently mentioned some vengence was brought to bear on the prickly problem of determining %CPU. That seems exactly what it needs.
Do you have some code that would help shed some light on this matter?
Thanks,
Federico
On Wednesday, August 21, 2002, at 11:46 AM, Steven Wagner wrote:
Steven Wagner wrote:

matt massie wrote:

guys-

i just checked out our latest source on freebsd and solaris and it
wasn't happy. i was a little too naive about the way i stitched in the libdnet source. i've added the necessary files to make ganglia happy again on solaris, freebsd and likely other oses as well (since i added the
complete intf and eth support files, autoconf tests etc).

i added the mtu_func to solaris.c (it was a simple cut and paste from linux.c).

since i'm running the monitoring core on source forge's compile farm .. i'm not able to test them as i'd like. let me know what you guys find on FreeBSD, Solaris, et al.

Basically what it comes down to is that top (and, as far as I know, the Linux kernel) does a weighted average between the last calculated value and the current value, adjusting the weight according to how much time it's been since the last update. My code isn't doing this so it looks very boring.
[but don't hold the release up on my account]


Actually, top is sneakier than I thought. The code that I thought was doing CPU utilization was the per-process cycle counter, not the per-state cycle counter. The percentage calculation for CPU states was in another (non-Solaris-specific) function, and uses Crazy Number Magic[tm] to get the percentages it does.

Sheesh, I thought you'd just take the difference between your last measured value and the current measured value from each state counter and divide that by the sum of the difference from all the state counters, multiply by a hundred, maybe shave it to a couple significant figures and *bam* you've got the percentages for each state...

Anyway, diff enclosed. It has some code that, as they say, "has no direct application" at this time but it will be used fo some lightweight process work that I'm going to have to do in order to get the proc_run metric.

And in the spirit of 'fattest disk partition,' how about 'busiest process' stats? CPU/mem/owner + name + args? Just a thought...




-------------------------------------------------------
This sf.net email is sponsored by: OSDN - Tired of that same old
cell phone?  Get a new here for FREE!
https://www.inphonic.com/r.asp?r=sourceforge1&refcode1=vs3390
_______________________________________________
Ganglia-developers mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/ganglia-developers

Federico
Rocks Cluster Group, Camp X-Ray, SDSC, San Diego
GPG Fingerprint: 3C5E 47E7 BDF8 C14E ED92  92BB BA86 B2E6 0390 8845



Federico

Rocks Cluster Group, Camp X-Ray, SDSC, San Diego
GPG Fingerprint: 3C5E 47E7 BDF8 C14E ED92  92BB BA86 B2E6 0390 8845

Reply via email to