From: Willem de Bruijn <[email protected]>

Add syscall epoll_pwait2, an epoll_wait variant with nsec resolution
that replaces int timeout with struct timespec. It is equivalent
otherwise.

    int epoll_pwait2(int fd, struct epoll_event *events,
                     int maxevents,
                     const struct timespec *timeout,
                     const sigset_t *sigset);

The underlying hrtimer is already programmed with nsec resolution.
pselect and ppoll also set nsec resolution timeout with timespec.

The sigset_t in epoll_pwait has a compat variant. epoll_pwait2 needs
the same.

For timespec, only support this new interface on 2038 aware platforms
that define __kernel_timespec_t. So no CONFIG_COMPAT_32BIT_TIME.

Changes
  v3:
  - rewrite: add epoll_pwait2 syscall instead of epoll_create1 flag
  v2:
  - cast to s64: avoid overflow on 32-bit platforms (Shuo Chen)
  - minor commit message rewording

Signed-off-by: Willem de Bruijn <[email protected]>

---

This version applies cleanly to linux-next-20201117.
---
 arch/alpha/kernel/syscalls/syscall.tbl      |   1 +
 arch/arm/tools/syscall.tbl                  |   1 +
 arch/arm64/include/asm/unistd.h             |   2 +-
 arch/arm64/include/asm/unistd32.h           |   2 +
 arch/ia64/kernel/syscalls/syscall.tbl       |   1 +
 arch/m68k/kernel/syscalls/syscall.tbl       |   1 +
 arch/microblaze/kernel/syscalls/syscall.tbl |   1 +
 arch/mips/kernel/syscalls/syscall_n32.tbl   |   1 +
 arch/mips/kernel/syscalls/syscall_n64.tbl   |   1 +
 arch/mips/kernel/syscalls/syscall_o32.tbl   |   1 +
 arch/parisc/kernel/syscalls/syscall.tbl     |   1 +
 arch/powerpc/kernel/syscalls/syscall.tbl    |   1 +
 arch/s390/kernel/syscalls/syscall.tbl       |   1 +
 arch/sh/kernel/syscalls/syscall.tbl         |   1 +
 arch/sparc/kernel/syscalls/syscall.tbl      |   1 +
 arch/x86/entry/syscalls/syscall_32.tbl      |   1 +
 arch/x86/entry/syscalls/syscall_64.tbl      |   1 +
 arch/xtensa/kernel/syscalls/syscall.tbl     |   1 +
 fs/eventpoll.c                              | 106 ++++++++++++++++----
 include/linux/compat.h                      |   6 ++
 include/linux/syscalls.h                    |   5 +
 include/uapi/asm-generic/unistd.h           |   4 +-
 kernel/sys_ni.c                             |   2 +
 23 files changed, 123 insertions(+), 20 deletions(-)

diff --git a/arch/alpha/kernel/syscalls/syscall.tbl 
b/arch/alpha/kernel/syscalls/syscall.tbl
index c5cc5bfa2062..e0a599bfb0e1 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -481,3 +481,4 @@
 549    common  faccessat2                      sys_faccessat2
 550    common  process_madvise                 sys_process_madvise
 551    common  watch_mount                     sys_watch_mount
+552    common  epoll_pwait2                    sys_epoll_pwait2
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 47325b3b661a..dbde88a855b6 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -455,3 +455,4 @@
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
 441    common  watch_mount                     sys_watch_mount
+443    common  epoll_pwait2                    sys_epoll_pwait2
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 949788f5ba40..d1f7d35f986e 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -38,7 +38,7 @@
 #define __ARM_NR_compat_set_tls                (__ARM_NR_COMPAT_BASE + 5)
 #define __ARM_NR_COMPAT_END            (__ARM_NR_COMPAT_BASE + 0x800)
 
-#define __NR_compat_syscalls           443
+#define __NR_compat_syscalls           444
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h 
b/arch/arm64/include/asm/unistd32.h
index c71c3fe0b6cd..b84e24a7e2c0 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -893,6 +893,8 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise)
 __SYSCALL(__NR_watch_mount, sys_watch_mount)
 #define __NR_memfd_secret 442
 __SYSCALL(__NR_memfd_secret, sys_memfd_secret)
+#define __NR_epoll_pwait2 443
+__SYSCALL(__NR_epoll_pwait2, sys_epoll_pwait2)
 
 /*
  * Please add new compat syscalls above this comment and update
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl 
b/arch/ia64/kernel/syscalls/syscall.tbl
index 033244462350..c8809959636f 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -362,3 +362,4 @@
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
 441    common  watch_mount                     sys_watch_mount
+443    common  epoll_pwait2                    sys_epoll_pwait2
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl 
b/arch/m68k/kernel/syscalls/syscall.tbl
index efd3ecb3cdfc..dde585616bf8 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -441,3 +441,4 @@
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
 441    common  watch_mount                     sys_watch_mount
+443    common  epoll_pwait2                    sys_epoll_pwait2
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl 
b/arch/microblaze/kernel/syscalls/syscall.tbl
index 67ae5a5e4d21..4c09f27fedd0 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -447,3 +447,4 @@
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
 441    common  watch_mount                     sys_watch_mount
+443    common  epoll_pwait2                    sys_epoll_pwait2
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl 
b/arch/mips/kernel/syscalls/syscall_n32.tbl
index c59bc6acc47a..00921244242d 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -380,3 +380,4 @@
 439    n32     faccessat2                      sys_faccessat2
 440    n32     process_madvise                 sys_process_madvise
 441    n32     watch_mount                     sys_watch_mount
+443    n32     epoll_pwait2                    sys_epoll_pwait2
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl 
b/arch/mips/kernel/syscalls/syscall_n64.tbl
index 50bbb9517052..b7920f76358d 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -356,3 +356,4 @@
 439    n64     faccessat2                      sys_faccessat2
 440    n64     process_madvise                 sys_process_madvise
 441    n64     watch_mount                     sys_watch_mount
+443    n64     epoll_pwait2                    sys_epoll_pwait2
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl 
b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 1d5644e221b4..2ab5ed500113 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -429,3 +429,4 @@
 439    o32     faccessat2                      sys_faccessat2
 440    o32     process_madvise                 sys_process_madvise
 441    o32     watch_mount                     sys_watch_mount
+443    o32     epoll_pwait2                    sys_epoll_pwait2                
compat_sys_epoll_pwait2
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl 
b/arch/parisc/kernel/syscalls/syscall.tbl
index 4492cc2fce23..9caf7d969846 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -439,3 +439,4 @@
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
 441    common  watch_mount                     sys_watch_mount
+443    common  epoll_pwait2                    sys_epoll_pwait2                
compat_sys_epoll_pwait2
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl 
b/arch/powerpc/kernel/syscalls/syscall.tbl
index ee122bfc2ddc..dce635420226 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -531,3 +531,4 @@
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
 441    common  watch_mount                     sys_watch_mount
+443    common  epoll_pwait2                    sys_epoll_pwait2                
compat_sys_epoll_pwait2
diff --git a/arch/s390/kernel/syscalls/syscall.tbl 
b/arch/s390/kernel/syscalls/syscall.tbl
index b467b57d66d1..dedfa6db9f8d 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -444,3 +444,4 @@
 439  common    faccessat2              sys_faccessat2                  
sys_faccessat2
 440  common    process_madvise         sys_process_madvise             
sys_process_madvise
 441    common  watch_mount             sys_watch_mount                 
sys_watch_mount
+443  common    epoll_pwait2            sys_epoll_pwait2                
sys_epoll_pwait2
diff --git a/arch/sh/kernel/syscalls/syscall.tbl 
b/arch/sh/kernel/syscalls/syscall.tbl
index f9c7ca5cb969..f7a383ae2c48 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -444,3 +444,4 @@
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
 441    common  watch_mount                     sys_watch_mount
+443    common  epoll_pwait2                    sys_epoll_pwait2
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl 
b/arch/sparc/kernel/syscalls/syscall.tbl
index 341fd4ba053b..a6cdf450d0f4 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -487,3 +487,4 @@
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
 441    common  watch_mount                     sys_watch_mount
+443    common  epoll_pwait2                    sys_epoll_pwait2
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl 
b/arch/x86/entry/syscalls/syscall_32.tbl
index 109e6681b8fa..9a4e8ec207fc 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -447,3 +447,4 @@
 440    i386    process_madvise         sys_process_madvise
 441    i386    watch_mount             sys_watch_mount
 442    i386    memfd_secret            sys_memfd_secret
+443    i386    epoll_pwait2            sys_epoll_pwait2                
compat_sys_epoll_pwait2
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl 
b/arch/x86/entry/syscalls/syscall_64.tbl
index 742cf17d7725..e2cb5f87e760 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -364,6 +364,7 @@
 440    common  process_madvise         sys_process_madvise
 441    common  watch_mount             sys_watch_mount
 442    common  memfd_secret            sys_memfd_secret
+443    common  epoll_pwait2            sys_epoll_pwait2
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl 
b/arch/xtensa/kernel/syscalls/syscall.tbl
index 6225d87be81f..1c8ae5fd026e 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -412,3 +412,4 @@
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
 441    common  watch_mount                     sys_watch_mount
+443    common  epoll_pwait2                    sys_epoll_pwait2
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 297aeb0ee9d1..0f0543e1cc6f 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1714,13 +1714,11 @@ static int ep_send_events(struct eventpoll *ep,
        return res;
 }
 
-static inline struct timespec64 ep_set_mstimeout(long ms)
+static inline struct timespec64 ep_set_nstimeout(s64 timeout)
 {
-       struct timespec64 now, ts = {
-               .tv_sec = ms / MSEC_PER_SEC,
-               .tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC),
-       };
+       struct timespec64 now, ts;
 
+       ts = ns_to_timespec64(timeout);
        ktime_get_ts64(&now);
        return timespec64_add_safe(now, ts);
 }
@@ -1734,7 +1732,7 @@ static inline struct timespec64 ep_set_mstimeout(long ms)
  *          stored.
  * @maxevents: Size (in terms of number of events) of the caller event buffer.
  * @timeout: Maximum timeout for the ready events fetch operation, in
- *           milliseconds. If the @timeout is zero, the function will not 
block,
+ *           nanoseconds. If the @timeout is zero, the function will not block,
  *           while if the @timeout is less than zero, the function will block
  *           until at least one event has been retrieved (or an error
  *           occurred).
@@ -1743,7 +1741,7 @@ static inline struct timespec64 ep_set_mstimeout(long ms)
  *          error code, in case of error.
  */
 static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
-                  int maxevents, long timeout)
+                  int maxevents, s64 timeout)
 {
        int res, eavail, timed_out = 0;
        u64 slack = 0;
@@ -1753,7 +1751,7 @@ static int ep_poll(struct eventpoll *ep, struct 
epoll_event __user *events,
        lockdep_assert_irqs_enabled();
 
        if (timeout > 0) {
-               struct timespec64 end_time = ep_set_mstimeout(timeout);
+               struct timespec64 end_time = ep_set_nstimeout(timeout);
 
                slack = select_estimate_accuracy(&end_time);
                to = &expires;
@@ -2177,7 +2175,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
  * part of the user space epoll_wait(2).
  */
 static int do_epoll_wait(int epfd, struct epoll_event __user *events,
-                        int maxevents, int timeout)
+                        int maxevents, s64 timeout)
 {
        int error;
        struct fd f;
@@ -2218,19 +2216,27 @@ static int do_epoll_wait(int epfd, struct epoll_event 
__user *events,
        return error;
 }
 
+static s64 timeout_to_ns(int timeout)
+{
+       if (timeout <= 0)
+               return timeout;
+       else
+               return timeout * (s64)NSEC_PER_MSEC;
+}
+
 SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
                int, maxevents, int, timeout)
 {
-       return do_epoll_wait(epfd, events, maxevents, timeout);
+       return do_epoll_wait(epfd, events, maxevents, timeout_to_ns(timeout));
 }
 
 /*
  * Implement the event wait interface for the eventpoll file. It is the kernel
  * part of the user space epoll_pwait(2).
  */
-SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
-               int, maxevents, int, timeout, const sigset_t __user *, sigmask,
-               size_t, sigsetsize)
+static int do_epoll_pwait(int epfd, struct epoll_event __user *events,
+                         int maxevents, s64 timeout,
+                         const sigset_t __user *sigmask, size_t sigsetsize)
 {
        int error;
 
@@ -2248,12 +2254,40 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct 
epoll_event __user *, events,
        return error;
 }
 
+SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
+               int, maxevents, int, timeout, const sigset_t __user *, sigmask,
+               size_t, sigsetsize)
+{
+       return do_epoll_pwait(epfd, events, maxevents, timeout_to_ns(timeout),
+                             sigmask, sigsetsize);
+}
+
+SYSCALL_DEFINE6(epoll_pwait2, int, epfd, struct epoll_event __user *, events,
+               int, maxevents, const struct __kernel_timespec __user *, 
timeout,
+               const sigset_t __user *, sigmask, size_t, sigsetsize)
+{
+       struct timespec64 ts;
+       s64 timeout_ns;
+
+       if (timeout) {
+               if (get_timespec64(&ts, timeout))
+                       return -EFAULT;
+               if (!timespec64_valid(&ts))
+                       return -EINVAL;
+               timeout_ns = timespec64_to_ns(&ts);
+       } else {
+               timeout_ns = -1;
+       }
+
+       return do_epoll_pwait(epfd, events, maxevents, timeout_ns,
+                             sigmask, sigsetsize);
+}
+
 #ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
-                       struct epoll_event __user *, events,
-                       int, maxevents, int, timeout,
-                       const compat_sigset_t __user *, sigmask,
-                       compat_size_t, sigsetsize)
+static int do_compat_epoll_pwait(int epfd, struct epoll_event __user *events,
+                                int maxevents, s64 timeout,
+                                const compat_sigset_t __user *sigmask,
+                                compat_size_t sigsetsize)
 {
        long err;
 
@@ -2270,6 +2304,42 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
 
        return err;
 }
+
+COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
+                      struct epoll_event __user *, events,
+                      int, maxevents, int, timeout,
+                      const compat_sigset_t __user *, sigmask,
+                      compat_size_t, sigsetsize)
+{
+       return do_compat_epoll_pwait(epfd, events, maxevents,
+                                    timeout_to_ns(timeout),
+                                    sigmask, sigsetsize);
+}
+
+COMPAT_SYSCALL_DEFINE6(epoll_pwait2, int, epfd,
+                      struct epoll_event __user *, events,
+                      int, maxevents,
+                      const struct __kernel_timespec __user *, timeout,
+                      const compat_sigset_t __user *, sigmask,
+                      compat_size_t, sigsetsize)
+{
+       struct timespec64 ts;
+       s64 timeout_ns;
+
+       if (timeout) {
+               if (get_timespec64(&ts, timeout))
+                       return -EFAULT;
+               if (!timespec64_valid(&ts))
+                       return -EINVAL;
+               timeout_ns = timespec64_to_ns(&ts);
+       } else {
+               timeout_ns = -1;
+       }
+
+       return do_compat_epoll_pwait(epfd, events, maxevents, timeout_ns,
+                                    sigmask, sigsetsize);
+}
+
 #endif
 
 static int __init eventpoll_init(void)
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 14d514233e1d..76dc28a2e7f1 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -505,6 +505,12 @@ asmlinkage long compat_sys_epoll_pwait(int epfd,
                        int maxevents, int timeout,
                        const compat_sigset_t __user *sigmask,
                        compat_size_t sigsetsize);
+asmlinkage long compat_sys_epoll_pwait2(int epfd,
+                       struct epoll_event __user *events,
+                       int maxevents,
+                       const struct __kernel_timespec __user *timeout,
+                       const compat_sigset_t __user *sigmask,
+                       compat_size_t sigsetsize);
 
 /* fs/fcntl.c */
 asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd,
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index e87a96ace85b..2ba45fb7874c 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -362,6 +362,11 @@ asmlinkage long sys_epoll_pwait(int epfd, struct 
epoll_event __user *events,
                                int maxevents, int timeout,
                                const sigset_t __user *sigmask,
                                size_t sigsetsize);
+asmlinkage long sys_epoll_pwait2(int epfd, struct epoll_event __user *events,
+                                int maxevents,
+                                const struct __kernel_timespec __user *timeout,
+                                const sigset_t __user *sigmask,
+                                size_t sigsetsize);
 
 /* fs/fcntl.c */
 asmlinkage long sys_dup(unsigned int fildes);
diff --git a/include/uapi/asm-generic/unistd.h 
b/include/uapi/asm-generic/unistd.h
index 3bc087d14535..aa883388700e 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -865,9 +865,11 @@ __SYSCALL(__NR_watch_mount, sys_watch_mount)
 #define __NR_memfd_secret 442
 __SYSCALL(__NR_memfd_secret, sys_memfd_secret)
 #endif
+#define __NR_epoll_pwait2 443
+__SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
 
 #undef __NR_syscalls
-#define __NR_syscalls 443
+#define __NR_syscalls 444
 
 /*
  * 32 bit systems traditionally used different
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 805fd7a668be..869aa6b5bf34 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -68,6 +68,8 @@ COND_SYSCALL(epoll_create1);
 COND_SYSCALL(epoll_ctl);
 COND_SYSCALL(epoll_pwait);
 COND_SYSCALL_COMPAT(epoll_pwait);
+COND_SYSCALL(epoll_pwait2);
+COND_SYSCALL_COMPAT(epoll_pwait2);
 
 /* fs/fcntl.c */
 
-- 
2.29.2.454.gaff20da3a2-goog

Reply via email to