RE: [[next]PATCH v2] cobalt/thread: move inband work descriptions off the stack

Chen, Hongzhan via Xenomai Mon, 14 Jun 2021 19:21:50 -0700


>-----Original Message-----
>From: Jan Kiszka <[email protected]> 
>Sent: Thursday, June 10, 2021 11:42 PM
>To: Chen, Hongzhan <[email protected]>; [email protected]
>Subject: Re: [[next]PATCH v2] cobalt/thread: move inband work descriptions off 
>the stack
>
>On 10.06.21 04:14, Hongzhan Chen via Xenomai wrote:
>> Unlike the I-pipe, Dovetail does not copy the work descriptor but
>> merely hands over the request to the common irq_work() mechanism. We
>> must guarantee that such descriptor lives in a portion of memory which
>> won't go stale until the handler has run, which by design can only
>> happen once the calling out-of-band context unwinds.
>> 
>> Therefore, we have to add one or more "signal slots" per possible
>> cause of in-band signal into this thread's control block.
>> 
>> For SIGDEBUG, except SIGDEBUG_WATCHDOG all other SIGDEBUG_* cause
>> including SIGDEBUG_MIGRATE_SIGNAL, SIGDEBUG_MIGRATE_SYSCALL,
>> SIGDEBUG_MIGRATE_FAULT, SIGDEBUG_MIGRATE_PRIOINV, SIGDEBUG_NOMLOCK,
>> SIGDEBUG_RESCNT_IMBALANCE, SIGDEBUG_LOCK_BREAK, SIGDEBUG_MUTEX_SLEEP
>> are synchronous and mutually exclusive due to the oob->in-band transition.
>> But SIGDEBUG_WATCHDOG is triggered asynchronously by the oob timer.
>> 
>> All SIGSHADOW_* events need their own slot: SIGSHADOW_ACTION_HARDEN and
>> SIGSHADOW_ACTION_HOME can be raised by remote threads asynchronously to
>> the target thread. SIGSHADOW_ACTION_BACKTRACE comes in addition to
>> SIGDEBUG_MIGRATE_*. For the latter reason, SIGSHADOW_ACTION_BACKTRACE
>> cannot pile up though.
>> 
>> Including SIGTERM, we have totally 6 slots.
>> 
>> Signed-off-by: Hongzhan Chen <[email protected]>
>> 
>> diff --git a/include/cobalt/kernel/thread.h b/include/cobalt/kernel/thread.h
>> index c92037bfe..0c5eacda7 100644
>> --- a/include/cobalt/kernel/thread.h
>> +++ b/include/cobalt/kernel/thread.h
>> @@ -23,6 +23,7 @@
>>  #include <linux/sched.h>
>>  #include <linux/sched/rt.h>
>>  #include <pipeline/thread.h>
>> +#include <pipeline/inband_work.h>
>>  #include <cobalt/kernel/list.h>
>>  #include <cobalt/kernel/stat.h>
>>  #include <cobalt/kernel/timer.h>
>> @@ -42,6 +43,14 @@
>>  #define XNTHREAD_BLOCK_BITS   
>> (XNSUSP|XNPEND|XNDELAY|XNDORMANT|XNRELAX|XNHELD|XNDBGSTOP)
>>  #define XNTHREAD_MODE_BITS    (XNRRB|XNWARN|XNTRAPLB)
>>  
>> +#define XNTHREAD_SIGNALS_NUM           6
>
>Better put at the end.
>
>> +#define XNTHREAD_SIGDEBUG_NONWATCHDOG  0
>> +#define XNTHREAD_SIGDEBUG_WATCHDOD     1
>> +#define XNTHREAD_SIGSHADOW_HARDEN      2
>> +#define XNTHREAD_SIGSHADOW_BACKTRACE   3
>> +#define XNTHREAD_SIGSHADOW_HOME        4
>> +#define XNTHREAD_SIGTERM               5
>
>How about
>
>XNSIGSLOT_...
>XNSIGSLOT_NUM  6
>
>?
>
>> +
>>  struct xnthread;
>>  struct xnsched;
>>  struct xnselector;
>> @@ -50,6 +59,13 @@ struct xnsched_tpslot;
>>  struct xnthread_personality;
>>  struct completion;
>>  
>> +struct lostage_signal {
>> +    struct pipeline_inband_work inband_work; /* Must be first. */
>> +    struct task_struct *task;
>> +    int signo, sigval;
>> +    struct lostage_signal *self; /* Revisit: I-pipe requirement */
>> +};
>> +
>>  struct xnthread_init_attr {
>>      struct xnthread_personality *personality;
>>      cpumask_t affinity;
>> @@ -199,6 +215,29 @@ struct xnthread {
>>      const char *exe_path;   /* Executable path */
>>      u32 proghash;           /* Hash value for exe_path */
>>  #endif
>> +    /*
>> +     * Each slot handle different cause of signal
>> +     * slot 0:
>> +     *      SIGDEBUG_MIGRATE_SIGNAL
>> +     *      SIGDEBUG_MIGRATE_SYSCALL
>> +     *      SIGDEBUG_MIGRATE_FAULT
>> +     *      SIGDEBUG_MIGRATE_PRIOINV
>> +     *      SIGDEBUG_NOMLOCK
>> +     *      SIGDEBUG_RESCNT_IMBALANCE
>> +     *      SIGDEBUG_LOCK_BREAK
>> +     *      SIGDEBUG_MUTEX_SLEEP
>> +     * slot 1:
>> +     *      SIGDEBUG_WATCHDOG
>> +     * slot 2:
>> +     *      SIGSHADOW_ACTION_HARDEN
>> +     * slot 3:
>> +     *      SIGSHADOW_ACTION_BACKTRACE
>> +     * slot 4:
>> +     *      SIGSHADOW_ACTION_HOME
>> +     * slot 5:
>> +     *      SIGTERM
>> +     */
>> +    struct lostage_signal sigarray[XNTHREAD_SIGNALS_NUM];
>>  };
>>  
>>  static inline int xnthread_get_state(const struct xnthread *thread)
>> diff --git a/kernel/cobalt/thread.c b/kernel/cobalt/thread.c
>> index d3a827eaa..d180aa85c 100644
>> --- a/kernel/cobalt/thread.c
>> +++ b/kernel/cobalt/thread.c
>> @@ -2083,12 +2083,6 @@ void xnthread_relax(int notify, int reason)
>>  }
>>  EXPORT_SYMBOL_GPL(xnthread_relax);
>>  
>> -struct lostage_signal {
>> -    struct pipeline_inband_work inband_work; /* Must be first. */
>> -    struct task_struct *task;
>> -    int signo, sigval;
>> -};
>> -
>>  static inline void do_kthread_signal(struct task_struct *p,
>>                                   struct xnthread *thread,
>>                                   struct lostage_signal *rq)
>> @@ -2112,7 +2106,7 @@ static void lostage_task_signal(struct 
>> pipeline_inband_work *inband_work)
>>      thread = xnthread_from_task(p);
>>      if (thread && !xnthread_test_state(thread, XNUSER)) {
>>              do_kthread_signal(p, thread, rq);
>> -            return;
>> +            goto out;
>>      }
>>  
>>      signo = rq->signo;
>> @@ -2127,6 +2121,8 @@ static void lostage_task_signal(struct 
>> pipeline_inband_work *inband_work)
>>              send_sig_info(signo, &si, p);
>>      } else
>>              send_sig(signo, p, 1);
>> +out:
>> +    memset(rq->self, 0, sizeof(*rq));
>
>"rq->self->self = NULL" would suffice, I suppose - but it reads even
>more weirdly.
>
>I missed that on 96d70658a589: We should document what the exact I-pipe
>requirement is here. And that is that the lostage handler will be called
>with a pointer to a copy of that work item under I-pipe, not the
>original one. That is only referenced by self. Had to think about that
>again myself.
>
>>  }
>>  
>>  static int force_wakeup(struct xnthread *thread) /* nklock locked, irqs off 
>> */
>> @@ -2288,19 +2284,77 @@ void xnthread_demote(struct xnthread *thread)
>>  }
>>  EXPORT_SYMBOL_GPL(xnthread_demote);
>>  
>> +static int get_slot_index_from_sig(int sig, int arg)
>> +{
>> +    int action;
>> +
>> +    switch (sig) {
>> +    case SIGDEBUG:
>> +            switch (arg) {
>> +            case SIGDEBUG_WATCHDOG:
>> +                    return XNTHREAD_SIGDEBUG_WATCHDOD;
>> +            case SIGDEBUG_MIGRATE_SIGNAL:
>> +            case SIGDEBUG_MIGRATE_SYSCALL:
>> +            case SIGDEBUG_MIGRATE_FAULT:
>> +            case SIGDEBUG_MIGRATE_PRIOINV:
>> +            case SIGDEBUG_NOMLOCK:
>> +            case SIGDEBUG_RESCNT_IMBALANCE:
>> +            case SIGDEBUG_LOCK_BREAK:
>> +            case SIGDEBUG_MUTEX_SLEEP:
>> +                    return XNTHREAD_SIGDEBUG_NONWATCHDOG;
>> +            }
>> +            break;
>> +    case SIGSHADOW:
>> +            action = sigshadow_action(arg);
>> +            switch (action) {
>> +            case SIGSHADOW_ACTION_HARDEN:
>> +                    return XNTHREAD_SIGSHADOW_HARDEN;
>> +            case SIGSHADOW_ACTION_BACKTRACE:
>> +                    return XNTHREAD_SIGSHADOW_BACKTRACE;
>> +            case SIGSHADOW_ACTION_HOME:
>> +                    return XNTHREAD_SIGSHADOW_HOME;
>> +            }
>> +            break;
>> +    case SIGTERM:
>> +            return XNTHREAD_SIGTERM;
>> +    }
>> +
>> +    return -1;
>> +}
>> +
>>  void xnthread_signal(struct xnthread *thread, int sig, int arg)
>>  {
>> -    struct lostage_signal sigwork = {
>> -            .inband_work = PIPELINE_INBAND_WORK_INITIALIZER(sigwork,
>> -                                    lostage_task_signal),
>> -            .task = xnthread_host_task(thread),
>> -            .signo = sig,
>> -            .sigval = sig == SIGDEBUG ? arg | sigdebug_marker : arg,
>> -    };
>> +    struct lostage_signal *sigwork;
>> +    int slot;
>> +
>> +    slot = get_slot_index_from_sig(sig, arg);
>> +
>> +    if (WARN_ON(slot < 0))
>> +            return;
>> +
>> +    sigwork = &thread->sigarray[slot];
>> +
>> +    /* To avoid invalidating the already submitted event
>> +     * if previous submitted has not been handled as we expected
>> +     * because setting sigwork->self zero is final step
>> +     * in handling sig.
>> +     */
>> +    if (WARN_ON((sigwork->self != NULL) &&
>
>So, we are still assuming that multiple signals for the same slot should
>be a bug? I'm not sure it is. I'm also not sure that sender and lostage
>handler will always be on the same CPU. Look at
>__xnthread_set_schedparam. It's "xnthread_signal(thread, SIGSHADOW,
>SIGSHADOW_ACTION_HOME)" in xnthread_suspend() can run on any CPU,
>targeting any thread on some other CPU, and that possibly multiple times.
>
>I'm afraid we need nklock around enqueue and dequeue so that we neither
>lose a signal nor warn needlessly.


I thought there might be multiple signals pile up here but actually this warn 
never
happen according to my test with smokey sigdebug test item after applying the 
patch.

>
>> +                            (sigwork->self == sigwork))) {
>
>What case is this part of the test excluding?

Because we have not initted them after xnthread is allocated, some data might 
not be empty I guess at the first time of using
according to my test . That was why I said I found duplicate signal case  in 
previous thread when I did not add it but actually it was not
multiple signals case causing return.
If signal can be queued , it is quite different than what we had discussed.  
Actually I do not know in which case multiple signals
would pile up here after splitting into slots. At least , I cannot reproduce 
multiple singles pile up issue with sigdebug test item.
If we consider queue to avoid losing signals for multiple signal in same slot , 
how can we decide the size of queue for each slot?
Sorry for late response, I was taking AL.


>
>> +            return;
>> +    }
>> +
>> +    sigwork->inband_work = (struct pipeline_inband_work)
>> +                    PIPELINE_INBAND_WORK_INITIALIZER(*sigwork,
>> +                                    lostage_task_signal);
>> +    sigwork->task = xnthread_host_task(thread);
>> +    sigwork->signo = sig;
>> +    sigwork->sigval = sig == SIGDEBUG ? arg | sigdebug_marker : arg;
>> +    sigwork->self = sigwork; /* Revisit: I-pipe requirement */
>>  
>> -    trace_cobalt_lostage_request("signal", sigwork.task);
>> +    trace_cobalt_lostage_request("signal", sigwork->task);
>>  
>> -    pipeline_post_inband_work(&sigwork);
>> +    pipeline_post_inband_work(sigwork);
>>  }
>>  EXPORT_SYMBOL_GPL(xnthread_signal);
>>  
>> 
>
>Jan
>
>-- 
>Siemens AG, T RDA IOT
>Corporate Competence Center Embedded Linux
>
>
>
>
>

RE: [[next]PATCH v2] cobalt/thread: move inband work descriptions off the stack

Reply via email to