Kevent posix timer notifications.

Simple extensions to POSIX timers which allows
to deliver notification of the timer expiration
through kevent queue.

Example application posix_timer.c can be found
in archive on project homepage.

Signed-off-by: Evgeniy Polyakov <[EMAIL PROTECTED]>


diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h
index 8786e01..3768746 100644
--- a/include/asm-generic/siginfo.h
+++ b/include/asm-generic/siginfo.h
@@ -235,6 +235,7 @@ typedef struct siginfo {
 #define SIGEV_NONE     1       /* other notification: meaningless */
 #define SIGEV_THREAD   2       /* deliver via thread creation */
 #define SIGEV_THREAD_ID 4      /* deliver to thread */
+#define SIGEV_KEVENT   8       /* deliver through kevent queue */
 
 /*
  * This works because the alignment is ok on all current architectures
@@ -260,6 +261,8 @@ typedef struct sigevent {
                        void (*_function)(sigval_t);
                        void *_attribute;       /* really pthread_attr_t */
                } _sigev_thread;
+
+               int kevent_fd;
        } _sigev_un;
 } sigevent_t;
 
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index a7dd38f..4b9deb4 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -4,6 +4,7 @@
 #include <linux/spinlock.h>
 #include <linux/list.h>
 #include <linux/sched.h>
+#include <linux/kevent_storage.h>
 
 union cpu_time_count {
        cputime_t cpu;
@@ -49,6 +50,9 @@ struct k_itimer {
        sigval_t it_sigev_value;        /* value word of sigevent struct */
        struct task_struct *it_process; /* process to send signal to */
        struct sigqueue *sigq;          /* signal queue entry. */
+#ifdef CONFIG_KEVENT_TIMER
+       struct kevent_storage st;
+#endif
        union {
                struct {
                        struct hrtimer timer;
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 5fe87de..5ec805e 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -48,6 +48,8 @@
 #include <linux/wait.h>
 #include <linux/workqueue.h>
 #include <linux/module.h>
+#include <linux/kevent.h>
+#include <linux/file.h>
 
 /*
  * Management arrays for POSIX timers.  Timers are kept in slab memory
@@ -224,6 +226,100 @@ static int posix_ktime_get_ts(clockid_t which_clock, 
struct timespec *tp)
        return 0;
 }
 
+#ifdef CONFIG_KEVENT_TIMER
+static int posix_kevent_enqueue(struct kevent *k)
+{
+       /*
+        * It is not ugly - there is no pointer in the id field union, 
+        * but its size is 64bits, which is ok for any known pointer size.
+        */
+       struct k_itimer *tmr = (struct k_itimer *)(unsigned 
long)k->event.id.raw_u64;
+       return kevent_storage_enqueue(&tmr->st, k);
+}
+static int posix_kevent_dequeue(struct kevent *k)
+{
+       struct k_itimer *tmr = (struct k_itimer *)(unsigned 
long)k->event.id.raw_u64;
+       kevent_storage_dequeue(&tmr->st, k);
+       return 0;
+}
+static int posix_kevent_callback(struct kevent *k)
+{
+       return 1;
+}
+static int posix_kevent_init(void)
+{
+       struct kevent_callbacks tc = {
+               .callback = &posix_kevent_callback,
+               .enqueue = &posix_kevent_enqueue,
+               .dequeue = &posix_kevent_dequeue,
+               .flags = KEVENT_CALLBACKS_KERNELONLY};
+
+       return kevent_add_callbacks(&tc, KEVENT_POSIX_TIMER);
+}
+
+extern struct file_operations kevent_user_fops;
+
+static int posix_kevent_init_timer(struct k_itimer *tmr, int fd)
+{
+       struct ukevent uk;
+       struct file *file;
+       struct kevent_user *u;
+       int err;
+
+       file = fget(fd);
+       if (!file) {
+               err = -EBADF;
+               goto err_out;
+       }
+
+       if (file->f_op != &kevent_user_fops) {
+               err = -EINVAL;
+               goto err_out_fput;
+       }
+
+       u = file->private_data;
+
+       memset(&uk, 0, sizeof(struct ukevent));
+
+       uk.event = KEVENT_MASK_ALL;
+       uk.type = KEVENT_POSIX_TIMER;
+       uk.id.raw_u64 = (unsigned long)(tmr); /* Just cast to something unique 
*/
+       uk.req_flags = KEVENT_REQ_ONESHOT | KEVENT_REQ_ALWAYS_QUEUE;
+       uk.ptr = tmr->it_sigev_value.sival_ptr;
+
+       err = kevent_user_add_ukevent(&uk, u);
+       if (err)
+               goto err_out_fput;
+
+       fput(file);
+
+       return 0;
+
+err_out_fput:
+       fput(file);
+err_out:
+       return err;
+}
+
+static void posix_kevent_fini_timer(struct k_itimer *tmr)
+{
+       kevent_storage_fini(&tmr->st);
+}
+#else
+static int posix_kevent_init_timer(struct k_itimer *tmr, int fd)
+{
+       return -ENOSYS;
+}
+static int posix_kevent_init(void)
+{
+       return 0;
+}
+static void posix_kevent_fini_timer(struct k_itimer *tmr)
+{
+}
+#endif
+
+
 /*
  * Initialize everything, well, just everything in Posix clocks/timers ;)
  */
@@ -241,6 +337,11 @@ static __init int init_posix_timers(void)
        register_posix_clock(CLOCK_REALTIME, &clock_realtime);
        register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
 
+       if (posix_kevent_init()) {
+               printk(KERN_ERR "Failed to initialize kevent posix timers.\n");
+               BUG();
+       }
+
        posix_timers_cache = kmem_cache_create("posix_timers_cache",
                                        sizeof (struct k_itimer), 0, 0, NULL, 
NULL);
        idr_init(&posix_timers_id);
@@ -343,23 +444,29 @@ static int posix_timer_fn(struct hrtimer *timer)
 
        timr = container_of(timer, struct k_itimer, it.real.timer);
        spin_lock_irqsave(&timr->it_lock, flags);
+       
+       if (timr->it_sigev_notify == SIGEV_KEVENT) {
+#ifdef CONFIG_KEVENT_TIMER
+               kevent_storage_ready(&timr->st, NULL, KEVENT_MASK_ALL);
+#endif
+       } else {
+               if (timr->it.real.interval.tv64 != 0)
+                       si_private = ++timr->it_requeue_pending;
 
-       if (timr->it.real.interval.tv64 != 0)
-               si_private = ++timr->it_requeue_pending;
-
-       if (posix_timer_event(timr, si_private)) {
-               /*
-                * signal was not sent because of sig_ignor
-                * we will not get a call back to restart it AND
-                * it should be restarted.
-                */
-               if (timr->it.real.interval.tv64 != 0) {
-                       timr->it_overrun +=
-                               hrtimer_forward(timer,
-                                               timer->base->softirq_time,
-                                               timr->it.real.interval);
-                       ret = HRTIMER_RESTART;
-                       ++timr->it_requeue_pending;
+               if (posix_timer_event(timr, si_private)) {
+                       /*
+                        * signal was not sent because of sig_ignor
+                        * we will not get a call back to restart it AND
+                        * it should be restarted.
+                        */
+                       if (timr->it.real.interval.tv64 != 0) {
+                               timr->it_overrun +=
+                                       hrtimer_forward(timer,
+                                                       
timer->base->softirq_time,
+                                                       timr->it.real.interval);
+                               ret = HRTIMER_RESTART;
+                               ++timr->it_requeue_pending;
+                       }
                }
        }
 
@@ -407,6 +514,9 @@ static struct k_itimer * alloc_posix_timer(void)
                kmem_cache_free(posix_timers_cache, tmr);
                tmr = NULL;
        }
+#ifdef CONFIG_KEVENT_TIMER
+       kevent_storage_init(tmr, &tmr->st);
+#endif
        return tmr;
 }
 
@@ -424,6 +534,7 @@ static void release_posix_timer(struct k_itimer *tmr, int 
it_id_set)
        if (unlikely(tmr->it_process) &&
            tmr->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
                put_task_struct(tmr->it_process);
+       posix_kevent_fini_timer(tmr);
        kmem_cache_free(posix_timers_cache, tmr);
 }
 
@@ -496,40 +607,52 @@ sys_timer_create(const clockid_t which_clock,
                new_timer->it_sigev_signo = event.sigev_signo;
                new_timer->it_sigev_value = event.sigev_value;
 
-               read_lock(&tasklist_lock);
-               if ((process = good_sigevent(&event))) {
-                       /*
-                        * We may be setting up this process for another
-                        * thread.  It may be exiting.  To catch this
-                        * case the we check the PF_EXITING flag.  If
-                        * the flag is not set, the siglock will catch
-                        * him before it is too late (in exit_itimers).
-                        *
-                        * The exec case is a bit more invloved but easy
-                        * to code.  If the process is in our thread
-                        * group (and it must be or we would not allow
-                        * it here) and is doing an exec, it will cause
-                        * us to be killed.  In this case it will wait
-                        * for us to die which means we can finish this
-                        * linkage with our last gasp. I.e. no code :)
-                        */
+               if (event.sigev_notify == SIGEV_KEVENT) {
+                       error = posix_kevent_init_timer(new_timer, 
event._sigev_un.kevent_fd);
+                       if (error)
+                               goto out;
+
+                       process = current->group_leader;
                        spin_lock_irqsave(&process->sighand->siglock, flags);
-                       if (!(process->flags & PF_EXITING)) {
-                               new_timer->it_process = process;
-                               list_add(&new_timer->list,
-                                        &process->signal->posix_timers);
-                               
spin_unlock_irqrestore(&process->sighand->siglock, flags);
-                               if (new_timer->it_sigev_notify == 
(SIGEV_SIGNAL|SIGEV_THREAD_ID))
-                                       get_task_struct(process);
-                       } else {
-                               
spin_unlock_irqrestore(&process->sighand->siglock, flags);
-                               process = NULL;
+                       new_timer->it_process = process;
+                       list_add(&new_timer->list, 
&process->signal->posix_timers);
+                       spin_unlock_irqrestore(&process->sighand->siglock, 
flags);
+               } else {
+                       read_lock(&tasklist_lock);
+                       if ((process = good_sigevent(&event))) {
+                               /*
+                                * We may be setting up this process for another
+                                * thread.  It may be exiting.  To catch this
+                                * case the we check the PF_EXITING flag.  If
+                                * the flag is not set, the siglock will catch
+                                * him before it is too late (in exit_itimers).
+                                *
+                                * The exec case is a bit more invloved but easy
+                                * to code.  If the process is in our thread
+                                * group (and it must be or we would not allow
+                                * it here) and is doing an exec, it will cause
+                                * us to be killed.  In this case it will wait
+                                * for us to die which means we can finish this
+                                * linkage with our last gasp. I.e. no code :)
+                                */
+                               spin_lock_irqsave(&process->sighand->siglock, 
flags);
+                               if (!(process->flags & PF_EXITING)) {
+                                       new_timer->it_process = process;
+                                       list_add(&new_timer->list,
+                                                
&process->signal->posix_timers);
+                                       
spin_unlock_irqrestore(&process->sighand->siglock, flags);
+                                       if (new_timer->it_sigev_notify == 
(SIGEV_SIGNAL|SIGEV_THREAD_ID))
+                                               get_task_struct(process);
+                               } else {
+                                       
spin_unlock_irqrestore(&process->sighand->siglock, flags);
+                                       process = NULL;
+                               }
+                       }
+                       read_unlock(&tasklist_lock);
+                       if (!process) {
+                               error = -EINVAL;
+                               goto out;
                        }
-               }
-               read_unlock(&tasklist_lock);
-               if (!process) {
-                       error = -EINVAL;
-                       goto out;
                }
        } else {
                new_timer->it_sigev_notify = SIGEV_SIGNAL;

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to