Module Name: src Committed By: matt Date: Sat Jan 29 17:35:25 UTC 2011
Modified Files: src/sys/kern: subr_evcnt.c src/sys/sys: sysctl.h Log Message: Add a sysctl to retrieve evcnts from the kernel. You can tell it to limit to a specific type and/or all or just evcnts with non-zero counts. To generate a diff of this commit: cvs rdiff -u -r1.8 -r1.9 src/sys/kern/subr_evcnt.c cvs rdiff -u -r1.191 -r1.192 src/sys/sys/sysctl.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/kern/subr_evcnt.c diff -u src/sys/kern/subr_evcnt.c:1.8 src/sys/kern/subr_evcnt.c:1.9 --- src/sys/kern/subr_evcnt.c:1.8 Tue Jan 18 08:16:43 2011 +++ src/sys/kern/subr_evcnt.c Sat Jan 29 17:35:24 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: subr_evcnt.c,v 1.8 2011/01/18 08:16:43 matt Exp $ */ +/* $NetBSD: subr_evcnt.c,v 1.9 2011/01/29 17:35:24 matt Exp $ */ /* * Copyright (c) 1996, 2000 Christopher G. Demetriou @@ -77,17 +77,20 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: subr_evcnt.c,v 1.8 2011/01/18 08:16:43 matt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: subr_evcnt.c,v 1.9 2011/01/29 17:35:24 matt Exp $"); #include <sys/param.h> -#include <sys/device.h> +#include <sys/evcnt.h> +#include <sys/kmem.h> #include <sys/mutex.h> +#include <sys/sysctl.h> #include <sys/systm.h> /* list of all events */ struct evcntlist allevents = TAILQ_HEAD_INITIALIZER(allevents); -static kmutex_t evmtx; +static kmutex_t evcnt_lock __cacheline_aligned; static bool init_done; +static uint32_t evcnt_generation; /* * We need a dummy object to stuff into the evcnt link set to @@ -108,7 +111,7 @@ KASSERT(!init_done); - mutex_init(&evmtx, MUTEX_DEFAULT, IPL_NONE); + mutex_init(&evcnt_lock, MUTEX_DEFAULT, IPL_NONE); init_done = true; @@ -146,9 +149,9 @@ #endif ev->ev_namelen = len; - mutex_enter(&evmtx); + mutex_enter(&evcnt_lock); TAILQ_INSERT_TAIL(&allevents, ev, ev_list); - mutex_exit(&evmtx); + mutex_exit(&evcnt_lock); } /* @@ -186,7 +189,171 @@ evcnt_detach(struct evcnt *ev) { - mutex_enter(&evmtx); + mutex_enter(&evcnt_lock); TAILQ_REMOVE(&allevents, ev, ev_list); - mutex_exit(&evmtx); + evcnt_generation++; + mutex_exit(&evcnt_lock); +} + +struct xevcnt_sysctl { + struct evcnt_sysctl evs; + char ev_strings[2*EVCNT_STRING_MAX]; +}; + +static size_t +sysctl_fillevcnt(const struct evcnt *ev, struct xevcnt_sysctl *xevs, + size_t *copylenp) +{ + const size_t copylen = offsetof(struct evcnt_sysctl, ev_strings) + + ev->ev_grouplen + 1 + ev->ev_namelen + 1; + const size_t len = roundup2(copylen, sizeof(uint64_t)); + if (xevs != NULL) { + xevs->evs.ev_count = ev->ev_count; + xevs->evs.ev_addr = PTRTOUINT64(ev); + xevs->evs.ev_parent = PTRTOUINT64(ev->ev_parent); + xevs->evs.ev_type = ev->ev_type; + xevs->evs.ev_grouplen = ev->ev_grouplen; + xevs->evs.ev_namelen = ev->ev_namelen; + xevs->evs.ev_len = len / sizeof(uint64_t); + strcpy(xevs->evs.ev_strings, ev->ev_group); + strcpy(xevs->evs.ev_strings + ev->ev_grouplen + 1, ev->ev_name); + } + + *copylenp = copylen; + return len; +} + +static int +sysctl_doevcnt(SYSCTLFN_ARGS) +{ + struct xevcnt_sysctl *xevs0 = NULL, *xevs; + const struct evcnt *ev; + int error; + int retries; + size_t needed, len; + char *dp; + + if (namelen == 1 && name[0] == CTL_QUERY) + return (sysctl_query(SYSCTLFN_CALL(rnode))); + + if (namelen != 2) + return (EINVAL); + + /* + * We can filter on the type of evcnt. + */ + const int filter = name[0]; + if (filter != EVCNT_TYPE_ANY + && filter != EVCNT_TYPE_MISC + && filter != EVCNT_TYPE_INTR + && filter != EVCNT_TYPE_TRAP) + return (EINVAL); + + const u_int count = name[1]; + if (count != KERN_EVCNT_COUNT_ANY + && count != KERN_EVCNT_COUNT_NONZERO) + return (EINVAL); + + sysctl_unlock(); + + if (oldp != NULL && xevs0 == NULL) + xevs0 = kmem_alloc(sizeof(*xevs0), KM_SLEEP); + + retries = 100; + retry: + dp = oldp; + len = (oldp != NULL) ? *oldlenp : 0; + xevs = xevs0; + error = 0; + needed = 0; + + mutex_enter(&evcnt_lock); + TAILQ_FOREACH(ev, &allevents, ev_list) { + if (filter != EVCNT_TYPE_ANY && filter != ev->ev_type) + continue; + if (count == KERN_EVCNT_COUNT_NONZERO && ev->ev_count == 0) + continue; + + /* + * Prepare to copy. If xevs is NULL, fillevcnt will just + * how big the item is. + */ + size_t copylen; + const size_t elem_size = sysctl_fillevcnt(ev, xevs, ©len); + needed += elem_size; + + if (len < elem_size) { + xevs = NULL; + continue; + } + + KASSERT(xevs != NULL); + KASSERT(xevs->evs.ev_grouplen != 0); + KASSERT(xevs->evs.ev_namelen != 0); + KASSERT(xevs->evs.ev_strings[0] != 0); + + const uint32_t last_generation = evcnt_generation; + mutex_exit(&evcnt_lock); + + /* + * Only copy the actual number of bytes, not the rounded + * number. If we did the latter we'd have to zero them + * first or we'd leak random kernel memory. + */ + error = copyout(xevs, dp, copylen); + + mutex_enter(&evcnt_lock); + if (error) + break; + + if (__predict_false(last_generation != evcnt_generation)) { + /* + * This sysctl node is only for statistics. + * Retry; if the queue keeps changing, then + * bail out. + */ + if (--retries == 0) { + error = EAGAIN; + break; + } + mutex_exit(&evcnt_lock); + goto retry; + } + + /* + * Now we deal with the pointer/len since we aren't going to + * toss their values away. + */ + dp += elem_size; + len -= elem_size; + } + mutex_exit(&evcnt_lock); + + if (xevs0 != NULL) + kmem_free(xevs0, sizeof(*xevs0)); + + sysctl_relock(); + + *oldlenp = needed; + if (oldp == NULL) + *oldlenp += 1024; + + return (error); +} + + + +SYSCTL_SETUP(sysctl_evcnt_setup, "sysctl kern.evcnt subtree setup") +{ + sysctl_createv(clog, 0, NULL, NULL, + CTLFLAG_PERMANENT, + CTLTYPE_NODE, "kern", NULL, + NULL, 0, NULL, 0, + CTL_KERN, CTL_EOL); + sysctl_createv(clog, 0, NULL, NULL, + CTLFLAG_PERMANENT, + CTLTYPE_STRUCT, "evcnt", + SYSCTL_DESCR("Kernel evcnt information"), + sysctl_doevcnt, 0, NULL, 0, + CTL_KERN, KERN_EVCNT, CTL_EOL); } Index: src/sys/sys/sysctl.h diff -u src/sys/sys/sysctl.h:1.191 src/sys/sys/sysctl.h:1.192 --- src/sys/sys/sysctl.h:1.191 Fri Jan 28 18:44:45 2011 +++ src/sys/sys/sysctl.h Sat Jan 29 17:35:23 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: sysctl.h,v 1.191 2011/01/28 18:44:45 pooka Exp $ */ +/* $NetBSD: sysctl.h,v 1.192 2011/01/29 17:35:23 matt Exp $ */ /* * Copyright (c) 1989, 1993 @@ -259,7 +259,8 @@ #define KERN_ARND 81 /* void *buf, size_t siz random */ #define KERN_SYSVIPC 82 /* node: SysV IPC parameters */ #define KERN_BOOTTIME 83 /* struct: time kernel was booted */ -#define KERN_MAXID 84 /* number of valid kern ids */ +#define KERN_EVCNT 84 /* struct: evcnts */ +#define KERN_MAXID 85 /* number of valid kern ids */ #define CTL_KERN_NAMES { \ @@ -347,6 +348,7 @@ { "arandom", CTLTYPE_STRUCT }, \ { "sysvipc", CTLTYPE_STRUCT }, \ { "boottime", CTLTYPE_STRUCT }, \ + { "evcnt", CTLTYPE_STRUCT }, \ } /* @@ -782,6 +784,30 @@ #define KERN_FILESLOP 10 /* + * kern.evcnt returns an array of these structures, which are designed both to + * be immune to 32/64 bit emulation issues. Note that the struct here differs + * from the real struct evcnt but contains the same information in order to + * accomodate sysctl. + */ +struct evcnt_sysctl { + uint64_t ev_count; /* current count */ + uint64_t ev_addr; /* kernel address of evcnt */ + uint64_t ev_parent; /* kernel address of parent */ + uint8_t ev_type; /* EVCNT_TRAP_* */ + uint8_t ev_grouplen; /* length of group with NUL */ + uint8_t ev_namelen; /* length of name with NUL */ + uint8_t ev_len; /* multiply by 8 */ + /* + * Now the group and name strings follow (both include the trailing + * NUL). ev_name start at &ev_strings[ev_grouplen+1] + */ + char ev_strings[0]; +}; + +#define KERN_EVCNT_COUNT_ANY 0 +#define KERN_EVCNT_COUNT_NONZERO 1 + +/* * CTL_HW identifiers */ #define HW_MACHINE 1 /* string: machine class */