On 5/15/23 5:54 PM, Linus Torvalds wrote:
> On Mon, May 15, 2023 at 3:23 PM Mike Christie
> <[email protected]> wrote:
>>
>> The vhost layer really doesn't want any signals and wants to work like 
>> kthreads
>> for that case. To make it really simple can we do something like this where 
>> it
>> separates user and io worker behavior where the major diff is how they handle
>> signals and exit. I also included a fix for the freeze case:
> 
> I don't love the SIGKILL special case, but I also don't find this
> deeply offensive. So if this is what it takes, I'm ok with it.
> 
> I wonder if we could make that special case simply check for "is
> SIGKILL blocked" instead? No normal case will cause that, and it means

Yeah, it's doable. Updated below.

> that a PF_USER_WORKER thread could decide per-thread what it wants to
> do wrt SIGKILL.
> 
> Christian? And I guess we should Cc: Oleg too, since the signal parts
> are an area he's familiar with and has worked on.. Eric Biederman has
> already been on the list and has also been involved
> 
> Oleg: see
> 
>   
> https://lore.kernel.org/lkml/[email protected]/
> 
> for the context here.

Oleg and Christian,


Below is an updated patch that doesn't check for PF_USER_WORKER in the
signal.c code and instead will check for if we have blocked the signal.




diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 537cbf9a2ade..e0f5ac90a228 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -29,7 +29,6 @@ struct kernel_clone_args {
        u32 io_thread:1;
        u32 user_worker:1;
        u32 no_files:1;
-       u32 ignore_signals:1;
        unsigned long stack;
        unsigned long stack_size;
        unsigned long tls;
diff --git a/kernel/fork.c b/kernel/fork.c
index ed4e01daccaa..fd2970b598b2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2336,8 +2336,15 @@ __latent_entropy struct task_struct *copy_process(
        p->flags &= ~PF_KTHREAD;
        if (args->kthread)
                p->flags |= PF_KTHREAD;
-       if (args->user_worker)
+       if (args->user_worker) {
+               /*
+                * User worker are similar to io_threads but they do not
+                * support signals and cleanup is driven via another kernel
+                * interface so even SIGKILL is blocked.
+                */
                p->flags |= PF_USER_WORKER;
+               siginitsetinv(&p->blocked, 0);
+       }
        if (args->io_thread) {
                /*
                 * Mark us an IO worker, and block any signal that isn't
@@ -2517,8 +2524,8 @@ __latent_entropy struct task_struct *copy_process(
        if (retval)
                goto bad_fork_cleanup_io;
 
-       if (args->ignore_signals)
-               ignore_signals(p);
+       if (args->user_worker)
+               p->flags |= PF_NOFREEZE;
 
        stackleak_task_init(p);
 
@@ -2860,7 +2867,6 @@ struct task_struct *create_io_thread(int (*fn)(void *), 
void *arg, int node)
                .fn             = fn,
                .fn_arg         = arg,
                .io_thread      = 1,
-               .user_worker    = 1,
        };
 
        return copy_process(NULL, 0, node, &args);
diff --git a/kernel/signal.c b/kernel/signal.c
index 8f6330f0e9ca..bc7e26072437 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -995,6 +995,19 @@ static inline bool wants_signal(int sig, struct 
task_struct *p)
        return task_curr(p) || !task_sigpending(p);
 }
 
+static void try_set_pending_sigkill(struct task_struct *t)
+{
+       /*
+        * User workers don't support signals and their exit is driven through
+        * their kernel layer, so by default block even SIGKILL.
+        */
+       if (sigismember(&t->blocked, SIGKILL))
+               return;
+
+       sigaddset(&t->pending.signal, SIGKILL);
+       signal_wake_up(t, 1);
+}
+
 static void complete_signal(int sig, struct task_struct *p, enum pid_type type)
 {
        struct signal_struct *signal = p->signal;
@@ -1055,8 +1068,7 @@ static void complete_signal(int sig, struct task_struct 
*p, enum pid_type type)
                        t = p;
                        do {
                                task_clear_jobctl_pending(t, 
JOBCTL_PENDING_MASK);
-                               sigaddset(&t->pending.signal, SIGKILL);
-                               signal_wake_up(t, 1);
+                               try_set_pending_sigkill(t);
                        } while_each_thread(p, t);
                        return;
                }
@@ -1373,8 +1385,7 @@ int zap_other_threads(struct task_struct *p)
                /* Don't bother with already dead threads */
                if (t->exit_state)
                        continue;
-               sigaddset(&t->pending.signal, SIGKILL);
-               signal_wake_up(t, 1);
+               try_set_pending_sigkill(t);
        }
 
        return count;
diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c
index b7cbd66f889e..2d8d3ebaec4d 100644
--- a/kernel/vhost_task.c
+++ b/kernel/vhost_task.c
@@ -75,13 +75,13 @@ struct vhost_task *vhost_task_create(int (*fn)(void *), 
void *arg,
                                     const char *name)
 {
        struct kernel_clone_args args = {
-               .flags          = CLONE_FS | CLONE_UNTRACED | CLONE_VM,
+               .flags          = CLONE_FS | CLONE_UNTRACED | CLONE_VM |
+                                 CLONE_THREAD | CLONE_SIGHAND,
                .exit_signal    = 0,
                .fn             = vhost_task_fn,
                .name           = name,
                .user_worker    = 1,
                .no_files       = 1,
-               .ignore_signals = 1,
        };
        struct vhost_task *vtsk;
        struct task_struct *tsk;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d257916f39e5..255a2147e5c1 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1207,12 +1207,12 @@ void reclaim_throttle(pg_data_t *pgdat, enum 
vmscan_throttle_state reason)
        DEFINE_WAIT(wait);
 
        /*
-        * Do not throttle user workers, kthreads other than kswapd or
+        * Do not throttle IO/user workers, kthreads other than kswapd or
         * workqueues. They may be required for reclaim to make
         * forward progress (e.g. journalling workqueues or kthreads).
         */
        if (!current_is_kswapd() &&
-           current->flags & (PF_USER_WORKER|PF_KTHREAD)) {
+           current->flags & (PF_USER_WORKER|PF_IO_WORKER|PF_KTHREAD)) {
                cond_resched();
                return;
        }


_______________________________________________
Virtualization mailing list
[email protected]
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Reply via email to