Add new SS_EXPEDITE flag to be used when sending SIGKILL via
pidfd_send_signal() syscall to allow expedited memory reclaim of the
victim process. The usage of this flag is currently limited to SIGKILL
signal and only to privileged users.

Signed-off-by: Suren Baghdasaryan <sur...@google.com>
---
 include/linux/sched/signal.h |  3 ++-
 include/linux/signal.h       | 11 ++++++++++-
 ipc/mqueue.c                 |  2 +-
 kernel/signal.c              | 37 ++++++++++++++++++++++++++++--------
 kernel/time/itimer.c         |  2 +-
 5 files changed, 43 insertions(+), 12 deletions(-)

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index e412c092c1e8..8a227633a058 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -327,7 +327,8 @@ extern int send_sig_info(int, struct kernel_siginfo *, 
struct task_struct *);
 extern void force_sigsegv(int sig, struct task_struct *p);
 extern int force_sig_info(int, struct kernel_siginfo *, struct task_struct *);
 extern int __kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid 
*pgrp);
-extern int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid 
*pid);
+extern int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid,
+                               bool expedite);
 extern int kill_pid_info_as_cred(int, struct kernel_siginfo *, struct pid *,
                                const struct cred *);
 extern int kill_pgrp(struct pid *pid, int sig, int priv);
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 9702016734b1..34b7852aa4a0 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -446,8 +446,17 @@ int __save_altstack(stack_t __user *, unsigned long);
 } while (0);
 
 #ifdef CONFIG_PROC_FS
+
+/*
+ * SS_FLAGS values used in pidfd_send_signal:
+ *
+ * SS_EXPEDITE indicates desire to expedite the operation.
+ */
+#define SS_EXPEDITE    0x00000001
+
 struct seq_file;
 extern void render_sigset_t(struct seq_file *, const char *, sigset_t *);
-#endif
+
+#endif /* CONFIG_PROC_FS */
 
 #endif /* _LINUX_SIGNAL_H */
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index aea30530c472..27c66296e08e 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -720,7 +720,7 @@ static void __do_notify(struct mqueue_inode_info *info)
                        rcu_read_unlock();
 
                        kill_pid_info(info->notify.sigev_signo,
-                                     &sig_i, info->notify_owner);
+                                     &sig_i, info->notify_owner, false);
                        break;
                case SIGEV_THREAD:
                        set_cookie(info->notify_cookie, NOTIFY_WOKENUP);
diff --git a/kernel/signal.c b/kernel/signal.c
index f98448cf2def..02ed4332d17c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -43,6 +43,7 @@
 #include <linux/compiler.h>
 #include <linux/posix-timers.h>
 #include <linux/livepatch.h>
+#include <linux/oom.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/signal.h>
@@ -1394,7 +1395,8 @@ int __kill_pgrp_info(int sig, struct kernel_siginfo 
*info, struct pid *pgrp)
        return success ? 0 : retval;
 }
 
-int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid)
+int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid,
+                                 bool expedite)
 {
        int error = -ESRCH;
        struct task_struct *p;
@@ -1402,8 +1404,17 @@ int kill_pid_info(int sig, struct kernel_siginfo *info, 
struct pid *pid)
        for (;;) {
                rcu_read_lock();
                p = pid_task(pid, PIDTYPE_PID);
-               if (p)
+               if (p) {
                        error = group_send_sig_info(sig, info, p, PIDTYPE_TGID);
+
+                       /*
+                        * Ignore expedite_reclaim return value, it is best
+                        * effort only.
+                        */
+                       if (!error && expedite)
+                               expedite_reclaim(p);
+               }
+
                rcu_read_unlock();
                if (likely(!p || error != -ESRCH))
                        return error;
@@ -1420,7 +1431,7 @@ static int kill_proc_info(int sig, struct kernel_siginfo 
*info, pid_t pid)
 {
        int error;
        rcu_read_lock();
-       error = kill_pid_info(sig, info, find_vpid(pid));
+       error = kill_pid_info(sig, info, find_vpid(pid), false);
        rcu_read_unlock();
        return error;
 }
@@ -1487,7 +1498,7 @@ static int kill_something_info(int sig, struct 
kernel_siginfo *info, pid_t pid)
 
        if (pid > 0) {
                rcu_read_lock();
-               ret = kill_pid_info(sig, info, find_vpid(pid));
+               ret = kill_pid_info(sig, info, find_vpid(pid), false);
                rcu_read_unlock();
                return ret;
        }
@@ -1704,7 +1715,7 @@ EXPORT_SYMBOL(kill_pgrp);
 
 int kill_pid(struct pid *pid, int sig, int priv)
 {
-       return kill_pid_info(sig, __si_special(priv), pid);
+       return kill_pid_info(sig, __si_special(priv), pid, false);
 }
 EXPORT_SYMBOL(kill_pid);
 
@@ -3577,10 +3588,20 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
        struct pid *pid;
        kernel_siginfo_t kinfo;
 
-       /* Enforce flags be set to 0 until we add an extension. */
-       if (flags)
+       /* Enforce no unknown flags. */
+       if (flags & ~SS_EXPEDITE)
                return -EINVAL;
 
+       if (flags & SS_EXPEDITE) {
+               /* Enforce SS_EXPEDITE to be used with SIGKILL only. */
+               if (sig != SIGKILL)
+                       return -EINVAL;
+
+               /* Limit expedited killing to privileged users only. */
+               if (!capable(CAP_SYS_NICE))
+                       return -EPERM;
+       }
+
        f = fdget_raw(pidfd);
        if (!f.file)
                return -EBADF;
@@ -3614,7 +3635,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
                prepare_kill_siginfo(sig, &kinfo);
        }
 
-       ret = kill_pid_info(sig, &kinfo, pid);
+       ret = kill_pid_info(sig, &kinfo, pid, (flags & SS_EXPEDITE) != 0);
 
 err:
        fdput(f);
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index 02068b2d5862..c926483cdb53 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -140,7 +140,7 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer)
        struct pid *leader_pid = sig->pids[PIDTYPE_TGID];
 
        trace_itimer_expire(ITIMER_REAL, leader_pid, 0);
-       kill_pid_info(SIGALRM, SEND_SIG_PRIV, leader_pid);
+       kill_pid_info(SIGALRM, SEND_SIG_PRIV, leader_pid, false);
 
        return HRTIMER_NORESTART;
 }
-- 
2.21.0.392.gf8f6787159e-goog

Reply via email to