Hello,
        
   This module sends a signal to one or several processes (in user
space) when a fork occurs in the kernel. It relays information about
forks (parent and child pid) to a user space application. The relay fork
module adds a hook in the do_fork() routine that can be used by other
modules and it uses the sysfs file system. The main directory is called
"/sys/relayfork" and it contains two files. One keeps the number of the
signal that is send when a fork occurred and the other is a list of
processes that are registered. Those files are readable and writable.
The tree structure is as follow:
         sys/
           |
           --> relayfork/
                  |
                  ---> processes
                  ---> signal

   If you want to register a process, just "echoing" its PID in the file
"processes". Example: 
        - Register process 123 
          echo 123 > /sys/relayfork/processes
        - Un-Register process 123 
          echo -123 > /sys/relayfork/processes

   By default the module sends the RT signal "33". We use a RT signal
because if a signal occurs when an another one is still in the queue it
is added. If it's not a RT signal, the signal is lost. The administrator
is free to change it.
        echo XX > /sys/relayfork/signal

   I compile a kernel 2.6.10 to see performances impact:

      # time sh -c 'make bzImage && make modules'

      with a vanilla kernel: real    8m10.797s
                             user    7m29.652s
                             sys     0m49.275s

      with kernel + patch  : real    8m11.137s
                             user    7m30.034s
                             sys     0m49.994s

      with kernel + patch + ELSA(*) : real     8m12.162s
                                      user     7m30.835s
                                      sys      0m50.082s
        
   (*) ELSA uses the patch in order to perform per-group of processes 
       accounting. It's available at http://elsa.sf.net

   You can compile it as a module or directly in the kernel.

   This patch is used by the Enhanced Linux System Accounting tool that
can be downloaded from http://elsa.sf.net

Regards,
Guillaume

$ diffstat linux-2.6.11-rc3-mm1-relayfork.patch
 include/linux/generic_hooks.h |    9
 init/Kconfig                  |   25 ++
 kernel/Makefile               |    1
 kernel/fork.c                 |   27 ++
 kernel/relay_fork.c           |  477 ++++++++....++++++++++++++
 5 files changed, 539 insertions(+)

Signed-off-by: Guillaume Thouvenin <[EMAIL PROTECTED]>


diff -uprN -X dontdiff linux-2.6.11-rc3-mm1/include/linux/generic_hooks.h 
linux-2.6.11-rc3-mm1-relayfork/include/linux/generic_hooks.h
--- linux-2.6.11-rc3-mm1/include/linux/generic_hooks.h  1970-01-01 
01:00:00.000000000 +0100
+++ linux-2.6.11-rc3-mm1-relayfork/include/linux/generic_hooks.h        
2005-02-07 10:43:41.000000000 +0100
@@ -0,0 +1,9 @@
+#ifndef GENERIC_HOOKS_H
+#define GENERIC_HOOKS_H
+
+struct do_fork_cb {
+       struct list_head list;
+       void (* handler)(struct task_struct *, struct task_struct *);
+};
+
+#endif
diff -uprN -X dontdiff linux-2.6.11-rc3-mm1/init/Kconfig 
linux-2.6.11-rc3-mm1-relayfork/init/Kconfig
--- linux-2.6.11-rc3-mm1/init/Kconfig   2005-02-07 10:16:38.000000000 +0100
+++ linux-2.6.11-rc3-mm1-relayfork/init/Kconfig 2005-02-07 10:45:06.000000000 
+0100
@@ -199,6 +199,31 @@ config KOBJECT_UEVENT
          Say Y, unless you are building a system requiring minimal memory
          consumption.
 
+config RELAY_FORK
+       tristate "Relay Fork Event"
+       depends on SYSFS
+       default n
+       ---help---
+         This module sends a signal to one or several processes when a fork
+         occurs in the kernel. It relays information about forks.
+
+         The relay fork module is configured by using the sysfs. The main
+         directory is called "/sys/relayfork" and it contains two files. One
+         keeps the number of the signal that is send when a fork occurred and
+         the other is a list of processes that are registered. Those files
+         are writable. The tree structure is as follow:
+               sys/
+               sys/relayfork/
+               sys/relayfork/processes
+               sys/relayfork/signal
+
+         If you want to register a process, just "echoing" its PID in the
+         file "processes". Example:
+               - Register process 123
+               echo 123 > /sys/relayfork/processes
+               - Un-Register process 123
+               echo -123 > /sys/relayfork/processes
+
 config IKCONFIG
        bool "Kernel .config support"
        ---help---
diff -uprN -X dontdiff linux-2.6.11-rc3-mm1/kernel/fork.c 
linux-2.6.11-rc3-mm1-relayfork/kernel/fork.c
--- linux-2.6.11-rc3-mm1/kernel/fork.c  2005-02-07 10:16:38.000000000 +0100
+++ linux-2.6.11-rc3-mm1-relayfork/kernel/fork.c        2005-02-07 
10:48:20.000000000 +0100
@@ -41,6 +41,7 @@
 #include <linux/profile.h>
 #include <linux/rmap.h>
 #include <linux/acct.h>
+#include <linux/generic_hooks.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -63,6 +64,26 @@ DEFINE_PER_CPU(unsigned long, process_co
 
 EXPORT_SYMBOL(tasklist_lock);
 
+static LIST_HEAD(do_fork_cb_list);
+static spinlock_t do_fork_cb_spinlock = SPIN_LOCK_UNLOCKED;
+
+void do_fork_cb_register(struct do_fork_cb *p)
+{
+       spin_lock(&do_fork_cb_spinlock);
+       list_add(&p->list, &do_fork_cb_list);
+       spin_unlock(&do_fork_cb_spinlock);
+}
+EXPORT_SYMBOL(do_fork_cb_register);
+
+void do_fork_cb_unregister(struct do_fork_cb *p)
+{
+       spin_lock(&do_fork_cb_spinlock);
+       list_del(&p->list);
+       spin_unlock(&do_fork_cb_spinlock);
+}
+EXPORT_SYMBOL(do_fork_cb_unregister);
+
+
 int nr_processes(void)
 {
        int cpu;
@@ -1209,6 +1230,7 @@ long do_fork(unsigned long clone_flags,
         */
        if (!IS_ERR(p)) {
                struct completion vfork;
+               struct do_fork_cb *cb;
 
                if (clone_flags & CLONE_VFORK) {
                        p->vfork_done = &vfork;
@@ -1238,6 +1260,11 @@ long do_fork(unsigned long clone_flags,
                        if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
                                ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | 
SIGTRAP);
                }
+
+               spin_lock(&do_fork_cb_spinlock);
+               list_for_each_entry(cb, &do_fork_cb_list,list)
+                       cb->handler(current, p);
+               spin_unlock(&do_fork_cb_spinlock);
        } else {
                free_pidmap(pid);
                pid = PTR_ERR(p);
diff -uprN -X dontdiff linux-2.6.11-rc3-mm1/kernel/Makefile 
linux-2.6.11-rc3-mm1-relayfork/kernel/Makefile
--- linux-2.6.11-rc3-mm1/kernel/Makefile        2005-02-07 10:16:38.000000000 
+0100
+++ linux-2.6.11-rc3-mm1-relayfork/kernel/Makefile      2005-02-07 
10:48:43.000000000 +0100
@@ -30,6 +30,7 @@ obj-$(CONFIG_SYSFS) += ksysfs.o
 obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
 obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
 obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
+obj-$(CONFIG_RELAY_FORK) += relay_fork.o
 
 ifneq ($(CONFIG_IA64),y)
 # According to Alan Modra <[EMAIL PROTECTED]>, the -fno-omit-frame-pointer is
diff -uprN -X dontdiff linux-2.6.11-rc3-mm1/kernel/relay_fork.c 
linux-2.6.11-rc3-mm1-relayfork/kernel/relay_fork.c
--- linux-2.6.11-rc3-mm1/kernel/relay_fork.c    1970-01-01 01:00:00.000000000 
+0100
+++ linux-2.6.11-rc3-mm1-relayfork/kernel/relay_fork.c  2005-02-07 
10:49:02.000000000 +0100
@@ -0,0 +1,477 @@
+/*
+ * Relay fork interface
+ *
+ * Author: Guillaume Thouvenin <[EMAIL PROTECTED]>
+ *
+ * This module is a relay for fork monitoring. You use the 
+ * /sys/relayfork/processes to register your process or to 
+ * display registered processes. 
+ * The interface send a signal (default is a the RT signal 33)
+ * to all registered processes when a fork occurs in the kernel. 
+ * The signal can be change by using /sys/relayfork/signal. 
+ *
+ * Conventions:
+ *     structures are prefixed by rfork_
+ *     functions  are prefixed by rfd_
+ *     variable  are prefixed by relayf_
+ */
+
+#include <linux/module.h>      /* for all modules      */
+#include <linux/kernel.h>      /* for KERN_ALERT       */
+#include <linux/init.h>                /* for the macros       */
+
+#include <linux/spinlock.h>    /* for spinlock         */
+#include <linux/sched.h>       /* for task_struct      */
+#include <linux/kobject.h>
+#include <linux/generic_hooks.h>
+
+#define RELAY_FORK_SIGNAL      33
+
+MODULE_AUTHOR("Guillaume Thouvenin <[EMAIL PROTECTED]>");
+MODULE_DESCRIPTION("sysfs interface to relay fork device");
+MODULE_LICENSE("GPL");
+
+/* available if the relay-fork patch is applied */
+extern void do_fork_cb_register(struct do_fork_cb *p);
+extern void do_fork_cb_unregister(struct do_fork_cb *p);
+
+/*******************
+ * local definition 
+ *******************/
+
+
+struct rfork_device {
+       struct kobject kobj;
+       struct list_head proclist;      /* registered processes  */
+       unsigned long signal;   /* signal to send to processes */
+};
+
+struct rfork_attribute {
+       struct attribute attr;
+        ssize_t(*show) (struct rfork_device * dev, char *buf);
+        ssize_t(*store) (struct rfork_device * dev, const char *buf,
+                         size_t count);
+};
+
+struct rfork_proclist {
+       struct list_head list;
+       unsigned long pid;
+
+};
+
+/*****************************
+ * header for local functions
+ *****************************/
+
+
+ssize_t rfd_attr_show(struct kobject *kobj, struct attribute *attr,
+                     char *buf);
+ssize_t rfd_attr_store(struct kobject *kobj, struct attribute *attr,
+                      const char *buf, size_t size);
+
+ssize_t rfd_proclist_show(struct rfork_device *dev, char *buf);
+ssize_t rfd_proclist_store(struct rfork_device *dev, const char *buf,
+                          size_t size);
+ssize_t rfd_signal_show(struct rfork_device *dev, char *buf);
+ssize_t rfd_signal_store(struct rfork_device *dev, const char *buf,
+                        size_t size);
+
+
+/*****************
+ * local variable 
+ *****************/
+
+static spinlock_t relayf_lock = SPIN_LOCK_UNLOCKED;    /* protect relayf */
+
+struct sysfs_ops relayf_sysfs_ops = {
+       .show = rfd_attr_show,
+       .store = rfd_attr_store,
+};
+
+struct kobj_type relayf_kobj_type = {
+       .sysfs_ops = &relayf_sysfs_ops,
+};
+
+static struct rfork_device relayf;
+
+static struct do_fork_cb fork_hdler;
+
+#define RELAYF_ATTR(_name,_mode,_show,_store)                          \
+struct rfork_attribute relayf_attr_##_name = {                         \
+        .attr = {.name  = __stringify(_name) , .mode   = _mode },              
\
+        .show   = _show,                                               \
+        .store  = _store,                                              \
+};
+/* give write access only to root, otherwise it's a security hole */
+static RELAYF_ATTR(processes, 0644, rfd_proclist_show, rfd_proclist_store);
+static RELAYF_ATTR(signal, 0644, rfd_signal_show, rfd_signal_store);
+
+
+/**************************
+ * body of local functions 
+ **************************/
+
+
+/**
+ * rfd_strtoi - convert a string to an unsigned long
+ * @name: the string to convert
+ *
+ * Convert a string to an unsigned long. When we found 
+ * a character that is not a number, we return immediatly.
+ * The first character can be '-' but if it is, we just 
+ * ignore it and return the positive value.
+ */
+static unsigned long rfd_strtoi(const char *name)
+{
+       unsigned long val = 0;
+
+       /* If it's a negative value we just forgot the '-' */
+       if (*name == '-')
+               name++;
+
+       for (;; name++) {
+               switch (*name) {
+               case '0'...'9':
+                       val = 10 * val + (*name - '0');
+                       break;
+               default:
+                       return val;
+               }
+       }
+}
+
+/**
+ * rfd_nbdigits - return the number of digits of an integer
+ * @i: the integer
+ *
+ * example:  For "0" it returns 1
+ *           For "324" it returns 3
+ *           For "-324" it returns 3
+ */
+
+static inline int rfd_nbdigits(int i)
+{
+       int res = 0;
+
+       if (i < 0)
+               i *= -1;
+
+       do {
+               i = i / 10;
+               res++;
+       } while (i != 0);
+
+       return res;
+}
+
+#define KOBJ_TO_RFORK(obj) container_of(obj, struct rfork_device, kobj)
+#define ATTR_TO_RFORK(obj) container_of(obj, struct rfork_attribute, attr)
+
+/**
+ * rfd_attr_show - method called when a file is read
+ * @kobj: the kobject
+ * @attr: the attribute
+ * @buf:  the buffer
+ *
+ * Translate the generic struct kobject and struct attribute 
+ * pointers to the appropriate pointer types, and calls the 
+ * associated methods.
+ */
+ssize_t rfd_attr_show(struct kobject * kobj, struct attribute * attr,
+                     char *buf)
+{
+       struct rfork_device *rf_dev = KOBJ_TO_RFORK(kobj);
+       struct rfork_attribute *rf_attr = ATTR_TO_RFORK(attr);
+       ssize_t ret = 0;
+
+       if (rf_attr->show)
+               ret = rf_attr->show(rf_dev, buf);
+
+       return ret;
+}
+
+/**
+ * rfd_attr_store - method called when a file is written
+ * @kobj: the kobject
+ * @attr: the attribute
+ * @buf:  the buffer
+ *
+ * Translate the generic struct kobject and struct attribute 
+ * pointers to the appropriate pointer types, and calls the 
+ * associated methods.
+ */
+ssize_t rfd_attr_store(struct kobject * kobj, struct attribute * attr,
+                      const char *buf, size_t size)
+{
+       struct rfork_device *rf_dev = KOBJ_TO_RFORK(kobj);
+       struct rfork_attribute *rf_attr = ATTR_TO_RFORK(attr);
+       ssize_t ret = 0;
+
+       if (rf_attr->store)
+               ret = rf_attr->store(rf_dev, buf, size);
+
+       return ret;
+}
+
+/**
+ * rfd_proclist_show - method called when processes attribute is read
+ * @dev: the relay fork device
+ * @buf: the buffer
+ * 
+ * The buffer is a buffer of size PAGE_SIZE (due to sysfs implementation).
+ * As the method is called only once for a read, the show() method should 
+ * fill the entire buffer. Buffer is filled with a list of processes that
+ * are registered. As the size of the buffer is limited, we also limit 
+ * how many processes are displayed.
+ * Method returns the number of bytes printed into the buffer. If a bad 
+ * value comes through, it returns an  error.
+ */
+ssize_t rfd_proclist_show(struct rfork_device * dev, char *buf)
+{
+       struct rfork_proclist *p;
+       struct list_head *pos;
+       struct list_head *next;
+       int char_print;
+       char tmp[PAGE_SIZE];
+
+       char_print = 0;
+
+       spin_lock_irq(&relayf_lock);
+       list_for_each_safe(pos, next, &dev->proclist) {
+               p = container_of(pos, struct rfork_proclist, list);
+               if (char_print + rfd_nbdigits(p->pid) + 1 < PAGE_SIZE - 1) {
+                       memset(tmp, 0, PAGE_SIZE);
+                       char_print += sprintf(tmp, "%lu ", p->pid);
+                       strcat(buf, tmp);
+               } else
+                       /* 
+                        * buf is full enough, we don't need to go through
+                        * the list
+                        */
+                       break;
+       }
+       spin_unlock_irq(&relayf_lock);
+       strncat(buf, "\n", 1);
+       char_print++;
+       return char_print;
+}
+
+/**
+ * rfd_proclist_store - method called when processes attribute is written
+ * @dev: the relay fork device
+ * @buf: the buffer
+ * @size:
+ * 
+ * As the method is called only once for a write, the store() method should 
+ * fill the entire buffer. Methods should return the number of bytes used 
+ * from the buffer. If a bad value comes through, we return an error.
+ */
+ssize_t rfd_proclist_store(struct rfork_device * dev, const char *buf,
+                          size_t size)
+{
+       struct rfork_proclist *p;
+       struct list_head *pos;  /* use as a loop counter */
+       struct list_head *next; /* temporary storage */
+       int pid;                /* process to add or remove */
+
+       pid = rfd_strtoi(buf);
+       if (pid == 0)
+               /* nothing to do */
+               return strlen(buf);
+
+       switch (*buf) {
+       case '-':
+               /* remove the process */
+               spin_lock_irq(&relayf_lock);
+               list_for_each_safe(pos, next, &dev->proclist) {
+                       p = container_of(pos, struct rfork_proclist, list);
+                       if (p->pid == pid) {
+                               list_del(&p->list);
+                               spin_unlock_irq(&relayf_lock);
+                               kfree(p);
+                               return strlen(buf);
+                       }
+               }
+               spin_unlock_irq(&relayf_lock);
+               break;
+       default:
+               /* add the process. We check if it is already in the list */
+               spin_lock_irq(&relayf_lock);
+               list_for_each_safe(pos, next, &dev->proclist) {
+                       p = container_of(pos, struct rfork_proclist, list);
+                       if (p->pid == pid) {
+                               spin_unlock_irq(&relayf_lock);
+                               return strlen(buf);
+                       }
+               }
+
+               /* check if it is a valid pid */
+               read_lock(&tasklist_lock);
+               if (!find_task_by_pid(pid)) {
+                       read_unlock(&tasklist_lock);
+                       spin_unlock_irq(&relayf_lock);
+                       return strlen(buf);
+               }
+               read_unlock(&tasklist_lock);
+               spin_unlock_irq(&relayf_lock);
+
+               p = kmalloc(sizeof(struct rfork_proclist), GFP_KERNEL);
+               if (!p)
+                       return -ENOMEM;
+
+               INIT_LIST_HEAD(&p->list);
+               p->pid = rfd_strtoi(buf);
+               spin_lock_irq(&relayf_lock);
+               list_add(&p->list, &dev->proclist);
+               spin_unlock_irq(&relayf_lock);
+       }
+
+       return strlen(buf);
+}
+
+/**
+ * rfd_signal_show - method called when signal attribute is read
+ * @dev: the relay fork device
+ * @buf: the buffer
+ *
+ * Buffer is filled by an integer which is the RT signal used by
+ * the relay fork interface to relay fork information to processes.
+ */
+ssize_t rfd_signal_show(struct rfork_device * dev, char *buf)
+{
+       return sprintf(buf, "%lu\n", dev->signal);
+}
+
+/**
+ * rfd_signal_store - method called when signal attribute is written
+ * @dev: the relay fork device
+ * @buf: the buffer
+ * @size:
+ *
+ * Read the string in the buffer and convert it to an unsigned long
+ * integer. This value is used to set the new RT signal. A RT signal 
+ * must be between SIGRTMIN and SIGRTMAX.
+ */
+ssize_t rfd_signal_store(struct rfork_device * dev, const char *buf,
+                        size_t size)
+{
+       unsigned long rtsignal = rfd_strtoi(buf);
+
+       spin_lock_irq(&relayf_lock);
+       if ((rtsignal > SIGRTMIN) && (rtsignal < SIGRTMAX))
+               dev->signal = rfd_strtoi(buf);
+       spin_unlock_irq(&relayf_lock);
+
+       return strlen(buf);
+}
+
+/**
+ * rfd_relay_fork_info - send information to all registered processes
+ * @parent: PID of the parent 
+ * @child: PID of the child
+ *
+ * Send information to all registered processes when a fork 
+ * occurs in the kernel. 
+ */
+void rfd_relay_fork_info(struct task_struct *parent, struct task_struct *child)
+{
+       struct list_head *pos;
+       struct list_head *next;
+       struct siginfo sinfo;
+       
+       /*pr_info("FORK: parent = %d, child = %d\n", parent, child); */
+
+       spin_lock_irq(&relayf_lock);
+       list_for_each_safe(pos, next, &relayf.proclist) {
+               struct task_struct *taskp;
+               struct rfork_proclist *p =
+                   container_of(pos, struct rfork_proclist, list);
+
+               read_lock(&tasklist_lock);
+               taskp = find_task_by_pid(p->pid);
+               if (taskp) {
+                       sinfo.si_signo = relayf.signal;
+                       sinfo.si_errno = 0;
+                       sinfo.si_code = SI_ASYNCIO;
+                       /*
+                        * should be the sender ID but in our case, it is set
+                        * to the pid of the process that initiates the fork.
+                        */
+                       sinfo.si_pid = parent->pid;
+                       /*
+                        * process created during the fork. This information
+                        *  is needed to keep jobs coherent.
+                        */
+                       sinfo.si_value.sival_int = child->pid;
+
+                       send_sig_info(relayf.signal, &sinfo, taskp);
+               } else {
+                       /* we can remove it from proclist */
+                       list_del(&p->list);
+                       kfree(p);
+               }
+               read_unlock(&tasklist_lock);
+       }
+       spin_unlock_irq(&relayf_lock);
+}
+
+/**
+ * Modules start and stop
+ **/
+
+/**
+ * rfd_init -
+ */
+static int __init rfd_init(void)
+{
+       int retval;
+
+       INIT_LIST_HEAD(&fork_hdler.list);
+       fork_hdler.handler = &rfd_relay_fork_info;
+
+       do_fork_cb_register(&fork_hdler);
+
+       /* Initialization of relayf */
+       kobject_set_name(&relayf.kobj, "relayfork");
+       relayf.kobj.ktype = &relayf_kobj_type;
+       INIT_LIST_HEAD(&relayf.proclist);
+       relayf.signal = RELAY_FORK_SIGNAL;
+
+       /* 
+        * Register the kobject. As our object doesn't have a parent
+        * or a dominant kset, a directory is created at the top-level
+        * of the sysfs partition.
+        */
+       retval = kobject_register(&relayf.kobj);
+       sysfs_create_file(&relayf.kobj, &relayf_attr_processes.attr);
+       sysfs_create_file(&relayf.kobj, &relayf_attr_signal.attr);
+
+       return retval;
+}
+
+/**
+ * rfd_cleanup - 
+ */
+static void __exit rfd_cleanup(void)
+{
+       struct list_head *pos;  /* use as a loop counter */
+       struct list_head *next; /* temporary storage */
+
+       do_fork_cb_register(&fork_hdler);
+
+       /* release entry that was dynamically allocated */
+       list_for_each_safe(pos, next, &relayf.proclist) {
+               struct rfork_proclist *p =
+                   container_of(pos, struct rfork_proclist, list);
+               list_del(&p->list);
+               kfree(p);
+       }
+
+       sysfs_remove_file(&relayf.kobj, &relayf_attr_processes.attr);
+       sysfs_remove_file(&relayf.kobj, &relayf_attr_signal.attr);
+       kobject_unregister(&relayf.kobj);
+}
+
+
+module_init(rfd_init);
+module_exit(rfd_cleanup);


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to