Add epoll compat_ code to kernel/compat.c. IA64 and ARM-OABI are currently 
using their own version of epoll compat_ code and they could probably wire 
to the new common code. Patch over 2.6.20.


Signed-off-by: Davide Libenzi <davidel@xmailserver.org>


- Davide



diff -Nru linux-2.6.20/fs/eventpoll.c linux-2.6.20.mod/fs/eventpoll.c
--- linux-2.6.20/fs/eventpoll.c 2007-02-04 10:44:54.000000000 -0800
+++ linux-2.6.20.mod/fs/eventpoll.c     2007-02-11 12:03:50.000000000 -0800
@@ -544,8 +544,7 @@
  * file descriptors inside the interest set.  It represents
  * the kernel part of the user space epoll_ctl(2).
  */
-asmlinkage long
-sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
+asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event 
__user *event)
 {
        int error;
        struct file *file, *tfile;
@@ -707,8 +706,8 @@
  * part of the user space epoll_pwait(2).
  */
 asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
-               int maxevents, int timeout, const sigset_t __user *sigmask,
-               size_t sigsetsize)
+                               int maxevents, int timeout, const sigset_t 
__user *sigmask,
+                               size_t sigsetsize)
 {
        int error;
        sigset_t ksigmask, sigsaved;
diff -Nru linux-2.6.20/include/linux/compat.h 
linux-2.6.20.mod/include/linux/compat.h
--- linux-2.6.20/include/linux/compat.h 2007-02-09 16:14:20.000000000 -0800
+++ linux-2.6.20.mod/include/linux/compat.h     2007-02-11 12:03:50.000000000 
-0800
@@ -234,5 +234,22 @@
                compat_ulong_t maxnode, const compat_ulong_t __user *old_nodes,
                const compat_ulong_t __user *new_nodes);
 
+/*
+ * epoll (fs/eventpoll.c) compat bits follow ...
+ */
+struct compat_epoll_event {
+       u32 events;
+       u32 data[2];
+};
+
+asmlinkage long compat_sys_epoll_ctl(int epfd, int op, int fd,
+                                    struct compat_epoll_event __user *event);
+asmlinkage long compat_sys_epoll_wait(int epfd, struct compat_epoll_event 
__user *events,
+                                     int maxevents, int timeout);
+asmlinkage long compat_sys_epoll_pwait(int epfd, struct compat_epoll_event 
__user *events,
+                                      int maxevents, int timeout,
+                                      const compat_sigset_t __user *sigmask,
+                                      compat_size_t sigsetsize);
+
 #endif /* CONFIG_COMPAT */
 #endif /* _LINUX_COMPAT_H */
diff -Nru linux-2.6.20/kernel/compat.c linux-2.6.20.mod/kernel/compat.c
--- linux-2.6.20/kernel/compat.c        2007-02-04 10:44:54.000000000 -0800
+++ linux-2.6.20.mod/kernel/compat.c    2007-02-11 12:13:08.000000000 -0800
@@ -23,6 +23,7 @@
 #include <linux/timex.h>
 #include <linux/migrate.h>
 #include <linux/posix-timers.h>
+#include <linux/eventpoll.h>
 
 #include <asm/uaccess.h>
 
@@ -1016,3 +1017,157 @@
        return sys_migrate_pages(pid, nr_bits + 1, old, new);
 }
 #endif
+
+
+#ifdef CONFIG_EPOLL
+
+/*
+ * epoll (fs/eventpoll.c) compat functions follow ...
+ *
+ *
+ * We need the compat layer over the epoll_event structure, only if the offset
+ * of the __u64 data member is not 4 (size of the events member that precedes 
the
+ * data one).
+ */
+#define EPOLL_NEED_EVENT_COMPAT() (offsetof(struct epoll_event, data) != 4)
+
+
+asmlinkage long compat_sys_epoll_ctl(int epfd, int op, int fd,
+                                    struct compat_epoll_event __user *event)
+{
+       long ret;
+
+       /*
+        * If compat is not needed, this simply map to a jump to 
sys_epoll_ctl(),
+        * with the "else" code being dropped by GCC.
+        */
+       if (!EPOLL_NEED_EVENT_COMPAT() || op == EPOLL_CTL_DEL)
+               ret = sys_epoll_ctl(epfd, op, fd, (struct epoll_event __user *) 
event);
+       else {
+               struct compat_epoll_event user;
+               struct epoll_event __user *kernel;
+               union {
+                       u64 q;
+                       u32 d[2];
+               } mux;
+
+               /*
+                * The "event" pointer may be NULL in the EPOLL_CTL_DEL case,
+                * but we handle such case above, so here we know "event" should
+                * not be NULL.
+                */
+               if (copy_from_user(&user, event, sizeof(user)))
+                       return -EFAULT;
+               kernel = compat_alloc_user_space(sizeof(struct epoll_event));
+               ret = __put_user(user.events, &kernel->events);
+               mux.d[0] = user.data[0];
+               mux.d[1] = user.data[1];
+               if ((ret |= __put_user(mux.q, &kernel->data)) == 0)
+                       ret = sys_epoll_ctl(epfd, op, fd, kernel);
+       }
+       
+       return ret;
+}
+
+
+asmlinkage long compat_sys_epoll_wait(int epfd, struct compat_epoll_event 
__user *events,
+                                     int maxevents, int timeout)
+{
+       long ret;
+
+       /*
+        * The compat_sys_epoll_pwait() function is calling this one. We do 
need a
+        * compat function for sys_epoll_pwait() due to the sigset_t size, but 
not
+        * every architecture might need a compat layer over sys_epoll_wait().
+        * With the compile-time test below, a call to compat_sys_epoll_wait() 
that does
+        * not need a translation, will map directly to sys_epoll_wait() 
avoiding the
+        * double buffer copy for events (that might indeed blow cache and kill
+        * performance). GCC takes care of removing the unused code (being the 
condition
+        * known at compile-time), and issues a simple jump to sys_epoll_wait().
+        */
+       if (EPOLL_NEED_EVENT_COMPAT()) {
+               struct epoll_event __user *kbuf;
+               struct epoll_event ev;
+               long err, i;
+               union {
+                       u64 q;
+                       u32 d[2];
+               } mux;
+
+               if (maxevents <= 0 || maxevents > (INT_MAX / sizeof(struct 
epoll_event)))
+                       return -EINVAL;
+               kbuf = compat_alloc_user_space(sizeof(struct epoll_event) * 
maxevents);
+               ret = sys_epoll_wait(epfd, kbuf, maxevents, timeout);
+               err = 0;
+               for (i = 0; i < ret; i++) {
+                       err |= __get_user(ev.events, &kbuf[i].events);
+                       err |= __get_user(ev.data, &kbuf[i].data);
+                       err |= put_user(ev.events, &events->events);
+                       mux.q = ev.data;
+                       err |= put_user(mux.d[0], &events->data[0]);
+                       err |= put_user(mux.d[1], &events->data[1]);
+                       events++;
+               }
+               ret = err ? -EFAULT: ret;       
+       } else
+               ret = sys_epoll_wait(epfd, (struct epoll_event __user *) events,
+                                    maxevents, timeout);
+
+       return ret;
+}
+
+
+#ifdef TIF_RESTORE_SIGMASK
+
+asmlinkage long compat_sys_epoll_pwait(int epfd, struct compat_epoll_event 
__user *events,
+                                      int maxevents, int timeout,
+                                      const compat_sigset_t __user *sigmask,
+                                      compat_size_t sigsetsize)
+{
+       int error;
+       compat_sigset_t ss32;
+       sigset_t ksigmask, sigsaved;
+
+       /*
+        * If the caller wants a certain signal mask to be set during the wait,
+        * we apply it here.
+        */
+       if (sigmask) {
+               if (sigsetsize != sizeof(compat_sigset_t))
+                       return -EINVAL;
+               if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
+                       return -EFAULT;
+               sigset_from_compat(&ksigmask, &ss32);
+               sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+               sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);         
+       }
+
+       /* Compile-time switch ... */
+       if (EPOLL_NEED_EVENT_COMPAT())
+               error = compat_sys_epoll_wait(epfd, events, maxevents, timeout);
+       else
+               error = sys_epoll_wait(epfd, (struct epoll_event __user *) 
events,
+                                      maxevents, timeout);
+
+       /*
+        * If we changed the signal mask, we need to restore the original one.
+        * In case we've got a signal while waiting, we do not restore the 
+        * signal mask yet, and we allow do_signal() to deliver the signal on 
the way 
+        * back to userspace, before the signal mask is restored.
+        */
+       if (sigmask) {
+               if (error == -EINTR) {
+                       memcpy(&current->saved_sigmask, &sigsaved, 
+                              sizeof(sigsaved));
+                       set_thread_flag(TIF_RESTORE_SIGMASK);
+               } else
+                       sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+       }
+
+       return error;   
+}
+
+#endif /* #ifdef TIF_RESTORE_SIGMASK */
+
+#endif /* #ifdef CONFIG_EPOLL */
+
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to