Module Name:    src
Committed By:   matt
Date:           Sat Jan 29 17:35:25 UTC 2011

Modified Files:
        src/sys/kern: subr_evcnt.c
        src/sys/sys: sysctl.h

Log Message:
Add a sysctl to retrieve evcnts from the kernel.  You can tell it to
limit to a specific type and/or all or just evcnts with non-zero counts.


To generate a diff of this commit:
cvs rdiff -u -r1.8 -r1.9 src/sys/kern/subr_evcnt.c
cvs rdiff -u -r1.191 -r1.192 src/sys/sys/sysctl.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/subr_evcnt.c
diff -u src/sys/kern/subr_evcnt.c:1.8 src/sys/kern/subr_evcnt.c:1.9
--- src/sys/kern/subr_evcnt.c:1.8	Tue Jan 18 08:16:43 2011
+++ src/sys/kern/subr_evcnt.c	Sat Jan 29 17:35:24 2011
@@ -1,4 +1,4 @@
-/* $NetBSD: subr_evcnt.c,v 1.8 2011/01/18 08:16:43 matt Exp $ */
+/* $NetBSD: subr_evcnt.c,v 1.9 2011/01/29 17:35:24 matt Exp $ */
 
 /*
  * Copyright (c) 1996, 2000 Christopher G. Demetriou
@@ -77,17 +77,20 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: subr_evcnt.c,v 1.8 2011/01/18 08:16:43 matt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: subr_evcnt.c,v 1.9 2011/01/29 17:35:24 matt Exp $");
 
 #include <sys/param.h>
-#include <sys/device.h>
+#include <sys/evcnt.h>
+#include <sys/kmem.h>
 #include <sys/mutex.h>
+#include <sys/sysctl.h>
 #include <sys/systm.h>
 
 /* list of all events */
 struct evcntlist allevents = TAILQ_HEAD_INITIALIZER(allevents);
-static kmutex_t evmtx;
+static kmutex_t evcnt_lock __cacheline_aligned;
 static bool init_done;
+static uint32_t evcnt_generation;
 
 /*
  * We need a dummy object to stuff into the evcnt link set to
@@ -108,7 +111,7 @@
 
 	KASSERT(!init_done);
 
-	mutex_init(&evmtx, MUTEX_DEFAULT, IPL_NONE);
+	mutex_init(&evcnt_lock, MUTEX_DEFAULT, IPL_NONE);
 
 	init_done = true;
 
@@ -146,9 +149,9 @@
 #endif
 	ev->ev_namelen = len;
 
-	mutex_enter(&evmtx);
+	mutex_enter(&evcnt_lock);
 	TAILQ_INSERT_TAIL(&allevents, ev, ev_list);
-	mutex_exit(&evmtx);
+	mutex_exit(&evcnt_lock);
 }
 
 /*
@@ -186,7 +189,171 @@
 evcnt_detach(struct evcnt *ev)
 {
 
-	mutex_enter(&evmtx);
+	mutex_enter(&evcnt_lock);
 	TAILQ_REMOVE(&allevents, ev, ev_list);
-	mutex_exit(&evmtx);
+	evcnt_generation++;
+	mutex_exit(&evcnt_lock);
+}
+
+struct xevcnt_sysctl {
+	struct evcnt_sysctl evs;
+	char ev_strings[2*EVCNT_STRING_MAX];
+};
+
+static size_t
+sysctl_fillevcnt(const struct evcnt *ev, struct xevcnt_sysctl *xevs,
+	size_t *copylenp)
+{
+	const size_t copylen = offsetof(struct evcnt_sysctl, ev_strings)
+	    + ev->ev_grouplen + 1 + ev->ev_namelen + 1;
+	const size_t len = roundup2(copylen, sizeof(uint64_t));
+	if (xevs != NULL) {
+		xevs->evs.ev_count = ev->ev_count;
+		xevs->evs.ev_addr = PTRTOUINT64(ev);
+		xevs->evs.ev_parent = PTRTOUINT64(ev->ev_parent);
+		xevs->evs.ev_type = ev->ev_type;
+		xevs->evs.ev_grouplen = ev->ev_grouplen;
+		xevs->evs.ev_namelen = ev->ev_namelen;
+		xevs->evs.ev_len = len / sizeof(uint64_t);
+		strcpy(xevs->evs.ev_strings, ev->ev_group);
+		strcpy(xevs->evs.ev_strings + ev->ev_grouplen + 1, ev->ev_name);
+	}
+
+	*copylenp = copylen;
+	return len;
+}
+
+static int
+sysctl_doevcnt(SYSCTLFN_ARGS)
+{       
+	struct xevcnt_sysctl *xevs0 = NULL, *xevs;
+	const struct evcnt *ev;
+	int error;
+	int retries;
+	size_t needed, len;
+	char *dp;
+ 
+        if (namelen == 1 && name[0] == CTL_QUERY)
+                return (sysctl_query(SYSCTLFN_CALL(rnode)));
+
+	if (namelen != 2)
+		return (EINVAL);
+
+	/*
+	 * We can filter on the type of evcnt.
+	 */
+	const int filter = name[0];
+	if (filter != EVCNT_TYPE_ANY
+	    && filter != EVCNT_TYPE_MISC
+	    && filter != EVCNT_TYPE_INTR
+	    && filter != EVCNT_TYPE_TRAP)
+		return (EINVAL);
+
+	const u_int count = name[1];
+	if (count != KERN_EVCNT_COUNT_ANY
+	    && count != KERN_EVCNT_COUNT_NONZERO)
+		return (EINVAL);
+
+	sysctl_unlock();
+
+	if (oldp != NULL && xevs0 == NULL)
+		xevs0 = kmem_alloc(sizeof(*xevs0), KM_SLEEP);
+
+	retries = 100;
+ retry:
+	dp = oldp;
+	len = (oldp != NULL) ? *oldlenp : 0;
+	xevs = xevs0;
+	error = 0;
+	needed = 0;
+
+	mutex_enter(&evcnt_lock);
+	TAILQ_FOREACH(ev, &allevents, ev_list) {
+		if (filter != EVCNT_TYPE_ANY && filter != ev->ev_type)
+			continue;
+		if (count == KERN_EVCNT_COUNT_NONZERO && ev->ev_count == 0)
+			continue;
+
+		/*
+		 * Prepare to copy.  If xevs is NULL, fillevcnt will just
+		 * how big the item is.
+		 */
+		size_t copylen;
+		const size_t elem_size = sysctl_fillevcnt(ev, xevs, &copylen);
+		needed += elem_size;
+
+		if (len < elem_size) {
+			xevs = NULL;
+			continue;
+		}
+
+		KASSERT(xevs != NULL);
+		KASSERT(xevs->evs.ev_grouplen != 0);
+		KASSERT(xevs->evs.ev_namelen != 0);
+		KASSERT(xevs->evs.ev_strings[0] != 0);
+
+		const uint32_t last_generation = evcnt_generation;
+		mutex_exit(&evcnt_lock);
+
+		/*
+		 * Only copy the actual number of bytes, not the rounded
+		 * number.  If we did the latter we'd have to zero them
+		 * first or we'd leak random kernel memory.
+		 */
+		error = copyout(xevs, dp, copylen);
+
+		mutex_enter(&evcnt_lock);
+		if (error)
+			break;
+
+		if (__predict_false(last_generation != evcnt_generation)) {
+			/*
+			 * This sysctl node is only for statistics.
+			 * Retry; if the queue keeps changing, then
+			 * bail out.
+			 */
+			if (--retries == 0) {
+				error = EAGAIN;
+				break;
+			}
+			mutex_exit(&evcnt_lock);
+			goto retry;
+		}
+
+		/*
+		 * Now we deal with the pointer/len since we aren't going to
+		 * toss their values away.
+		 */
+		dp += elem_size;
+		len -= elem_size;
+	}
+	mutex_exit(&evcnt_lock);
+
+	if (xevs0 != NULL)
+		kmem_free(xevs0, sizeof(*xevs0));
+
+	sysctl_relock();
+
+	*oldlenp = needed;
+	if (oldp == NULL)
+		*oldlenp += 1024;
+
+	return (error);
+}
+
+
+
+SYSCTL_SETUP(sysctl_evcnt_setup, "sysctl kern.evcnt subtree setup")
+{
+	sysctl_createv(clog, 0, NULL, NULL,
+		       CTLFLAG_PERMANENT, 
+		       CTLTYPE_NODE, "kern", NULL,
+		       NULL, 0, NULL, 0,
+		       CTL_KERN, CTL_EOL);
+	sysctl_createv(clog, 0, NULL, NULL,
+		       CTLFLAG_PERMANENT,
+		       CTLTYPE_STRUCT, "evcnt",
+		       SYSCTL_DESCR("Kernel evcnt information"),
+		       sysctl_doevcnt, 0, NULL, 0,
+		       CTL_KERN, KERN_EVCNT, CTL_EOL);
 }

Index: src/sys/sys/sysctl.h
diff -u src/sys/sys/sysctl.h:1.191 src/sys/sys/sysctl.h:1.192
--- src/sys/sys/sysctl.h:1.191	Fri Jan 28 18:44:45 2011
+++ src/sys/sys/sysctl.h	Sat Jan 29 17:35:23 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: sysctl.h,v 1.191 2011/01/28 18:44:45 pooka Exp $	*/
+/*	$NetBSD: sysctl.h,v 1.192 2011/01/29 17:35:23 matt Exp $	*/
 
 /*
  * Copyright (c) 1989, 1993
@@ -259,7 +259,8 @@
 #define	KERN_ARND		81	/* void *buf, size_t siz random */
 #define	KERN_SYSVIPC		82	/* node: SysV IPC parameters */
 #define	KERN_BOOTTIME		83	/* struct: time kernel was booted */
-#define	KERN_MAXID		84	/* number of valid kern ids */
+#define	KERN_EVCNT		84	/* struct: evcnts */
+#define	KERN_MAXID		85	/* number of valid kern ids */
 
 
 #define	CTL_KERN_NAMES { \
@@ -347,6 +348,7 @@
 	{ "arandom", CTLTYPE_STRUCT }, \
 	{ "sysvipc", CTLTYPE_STRUCT }, \
 	{ "boottime", CTLTYPE_STRUCT }, \
+	{ "evcnt", CTLTYPE_STRUCT }, \
 }
 
 /*
@@ -782,6 +784,30 @@
 #define	KERN_FILESLOP		10
 
 /*
+ * kern.evcnt returns an array of these structures, which are designed both to
+ * be immune to 32/64 bit emulation issues.  Note that the struct here differs
+ * from the real struct evcnt but contains the same information in order to
+ * accomodate sysctl.
+ */
+struct evcnt_sysctl {
+	uint64_t	ev_count;		/* current count */
+	uint64_t	ev_addr;		/* kernel address of evcnt */
+	uint64_t	ev_parent;		/* kernel address of parent */
+	uint8_t		ev_type;		/* EVCNT_TRAP_* */
+	uint8_t		ev_grouplen;		/* length of group with NUL */
+	uint8_t		ev_namelen;		/* length of name with NUL */
+	uint8_t		ev_len;			/* multiply by 8 */
+	/*
+	 * Now the group and name strings follow (both include the trailing
+	 * NUL).  ev_name start at &ev_strings[ev_grouplen+1]
+	 */
+	char		ev_strings[0];
+};
+
+#define	KERN_EVCNT_COUNT_ANY		0
+#define	KERN_EVCNT_COUNT_NONZERO	1
+
+/*
  * CTL_HW identifiers
  */
 #define	HW_MACHINE	 1		/* string: machine class */

Reply via email to