Before this change, epoll_wait and epoll_pwait spuriously return with -EINTR on ptrace attach.
By analogy with poll syscall family, epoll_[p]wait should be interruptible by signals regardless of SA_RESTART, therefore, this change makes them return -ERESTARTNOHAND if timeout has expired. If timeout has not expired, and if interrupted by harmless SIG_DFL'ed signals such as SIGWINCH and other signal-like activity such as ptrace attach, epoll_[p]wait should restart with decreased timeout. To that end, set up restart block and return -ERESTART_RESTARTBLOCK. In order to define a "sigset_t ksigmask" member, I had to add <asm/signal.h> to linux/thread_info.h, which threw some build errors about undefined smp barriers and such. Removing <linux/seqlock.h> include from linux/time.h solved it. Run-tested. Signed-off-by: Denys Vlasenko <dvlas...@redhat.com> CC: Oleg Nesterov <o...@redhat.com> --- fs/eventpoll.c | 146 ++++++++++++++++++++++++++------------------ include/linux/thread_info.h | 10 +++ include/linux/time.h | 1 - 3 files changed, 98 insertions(+), 59 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 0cff4434..1df35aa 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1911,16 +1911,16 @@ error_return: return error; } -/* - * Implement the event wait interface for the eventpoll file. It is the kernel - * part of the user space epoll_wait(2). - */ -SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, - int, maxevents, int, timeout) +static long do_restart_epoll_pwait(struct restart_block *restart_block); + +static long do_epoll_pwait(int epfd, struct epoll_event __user *events, + int maxevents, int timeout, const sigset_t *sigmask) { int error; + sigset_t ksigmask, ksigsaved; struct fd f; struct eventpoll *ep; + struct timespec start; /* The maximum number of event must be greater than zero */ if (maxevents <= 0 || maxevents > EP_MAX_EVENTS) @@ -1944,19 +1944,97 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, goto error_fput; /* + * If the caller wants a certain signal mask to be set during the wait, + * we apply it here. + */ + if (sigmask) { + ksigmask = *sigmask; + sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); + sigprocmask(SIG_SETMASK, &ksigmask, &ksigsaved); + } + + /* * At this point it is safe to assume that the "private_data" contains * our own data structure. */ ep = f.file->private_data; + if (timeout > 0) + ktime_get_ts(&start); + /* Time to fish for events ... */ error = ep_poll(ep, events, maxevents, timeout); + if (error == -EINTR) { + error = -ERESTARTNOHAND; + if (timeout > 0) { + struct restart_block *restart_block; + struct timespec end; + ktime_t rem; + s64 rem_msec; + + ktime_get_ts(&end); + rem = ktime_sub(timespec_to_ktime(end), timespec_to_ktime(start)); + rem_msec = ktime_to_ms(rem); + if (rem_msec > 0) { + restart_block = ¤t_thread_info()->restart_block; + restart_block->fn = do_restart_epoll_pwait; + restart_block->epoll.epfd = epfd; + restart_block->epoll.events = events; + restart_block->epoll.maxevents = maxevents; + restart_block->epoll.timeout = rem_msec; + restart_block->epoll.has_ksigmask = (sigmask != NULL); + restart_block->epoll.ksigmask = ksigmask; + error = -ERESTART_RESTARTBLOCK; + } + } + /* + * We've got a signal while waiting, we do not restore the + * signal mask yet, and we allow do_signal() to deliver the signal on + * the way back to userspace, before the signal mask is restored. + */ + if (sigmask) { + memcpy(¤t->saved_sigmask, &ksigsaved, + sizeof(ksigsaved)); + set_restore_sigmask(); + } + } + else + /* + * If we changed the signal mask, we need to restore the original one. + */ + if (sigmask) { + sigprocmask(SIG_SETMASK, &ksigsaved, NULL); + } + error_fput: fdput(f); return error; } +static long do_restart_epoll_pwait(struct restart_block *restart_block) +{ + int epfd = restart_block->epoll.epfd; + struct epoll_event __user *events = restart_block->epoll.events; + int maxevents = restart_block->epoll.maxevents; + int timeout = restart_block->epoll.timeout; + sigset_t *ksigmask = restart_block->epoll.has_ksigmask + ? &restart_block->epoll.ksigmask + : NULL; + + return do_epoll_pwait(epfd, events, maxevents, timeout, ksigmask); +} + +/* + * Implement the event wait interface for the eventpoll file. It is the kernel + * part of the user space epoll_wait(2). + */ +SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, + int, maxevents, int, timeout) +{ + return do_epoll_pwait(epfd, events, maxevents, timeout, NULL); +} + /* * Implement the event wait interface for the eventpoll file. It is the kernel * part of the user space epoll_pwait(2). @@ -1965,40 +2043,16 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout, const sigset_t __user *, sigmask, size_t, sigsetsize) { - int error; - sigset_t ksigmask, sigsaved; + sigset_t ksigmask; - /* - * If the caller wants a certain signal mask to be set during the wait, - * we apply it here. - */ if (sigmask) { if (sigsetsize != sizeof(sigset_t)) return -EINVAL; if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) return -EFAULT; - sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); } - error = sys_epoll_wait(epfd, events, maxevents, timeout); - - /* - * If we changed the signal mask, we need to restore the original one. - * In case we've got a signal while waiting, we do not restore the - * signal mask yet, and we allow do_signal() to deliver the signal on - * the way back to userspace, before the signal mask is restored. - */ - if (sigmask) { - if (error == -EINTR) { - memcpy(¤t->saved_sigmask, &sigsaved, - sizeof(sigsaved)); - set_restore_sigmask(); - } else - sigprocmask(SIG_SETMASK, &sigsaved, NULL); - } - - return error; + return do_epoll_pwait(epfd, events, maxevents, timeout, sigmask ? &ksigmask : NULL); } #ifdef CONFIG_COMPAT @@ -2008,42 +2062,18 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) { - long err; compat_sigset_t csigmask; - sigset_t ksigmask, sigsaved; + sigset_t ksigmask; - /* - * If the caller wants a certain signal mask to be set during the wait, - * we apply it here. - */ if (sigmask) { if (sigsetsize != sizeof(compat_sigset_t)) return -EINVAL; if (copy_from_user(&csigmask, sigmask, sizeof(csigmask))) return -EFAULT; sigset_from_compat(&ksigmask, &csigmask); - sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); - } - - err = sys_epoll_wait(epfd, events, maxevents, timeout); - - /* - * If we changed the signal mask, we need to restore the original one. - * In case we've got a signal while waiting, we do not restore the - * signal mask yet, and we allow do_signal() to deliver the signal on - * the way back to userspace, before the signal mask is restored. - */ - if (sigmask) { - if (err == -EINTR) { - memcpy(¤t->saved_sigmask, &sigsaved, - sizeof(sigsaved)); - set_restore_sigmask(); - } else - sigprocmask(SIG_SETMASK, &sigsaved, NULL); } - return err; + return do_epoll_pwait(epfd, events, maxevents, timeout, sigmask ? &ksigmask : NULL); } #endif diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index e7e0473..1a7e0c6 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -8,6 +8,7 @@ #define _LINUX_THREAD_INFO_H #include <linux/types.h> +#include <asm/signal.h> #include <linux/bug.h> struct timespec; @@ -45,6 +46,15 @@ struct restart_block { unsigned long tv_sec; unsigned long tv_nsec; } poll; + /* For epoll_[p]wait */ + struct { + int epfd; + int maxevents; + int timeout; + int has_ksigmask; + struct epoll_event __user *events; + sigset_t ksigmask; + } epoll; }; }; diff --git a/include/linux/time.h b/include/linux/time.h index d5d229b..f0c1152 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -2,7 +2,6 @@ #define _LINUX_TIME_H # include <linux/cache.h> -# include <linux/seqlock.h> # include <linux/math64.h> #include <uapi/linux/time.h> -- 1.8.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/