On 07/27/2015 12:55 AM, Hidehiro Kawai wrote:
> panic_event() called as a panic notifier tries to flush queued
> messages, but it can't handle them if the kernel panic happens
> while processing a message.  What happens depends on when the
> kernel panics.

Sorry this took so long, I've been traveling.

I have queued the patches before this one.  They all look good and
necessary.

I'm not so sure about this patch.  It looks like the only thing that is
a real issue is #2 below.
It's not so important to avoid dropping messages.

Can this be simplified somehow to work around the issue at panic time if
intf->curr_msg is set and smi_info->waiting_msg is not?

Thank you,

-corey

>
> Here is the summary of message sending process.
>
>     smi_send()
>      smi_add_send_msg()
> (1)   intf->curr_msg = msg
>      sender()
> (2)   smi_info->waiting_msg = msg
>
>     <asynchronously called>
>     check_start_timer_thread()
>      start_next_msg()
>       smi_info->curr_msg = smi_info->waiting_msg
> (3)   smi_info->waiting_msg = NULL
> (4)   smi_info->handlers->start_transaction()
>
>     <asynchronously called>
>     smi_event_handler()
> (5)  handle_transaction_done()
>       smi_info->curr_msg = NULL
>       deliver_recv_msg()
>        ipmi_smi_msg_received()
>         intf->curr_msg = NULL
>
> If the kernel panics before (1), the requested message will be
> lost.  But it can't be helped.
>
> If the kernel panics before (2), new message sent by
> send_panic_events() is queued to intf->xmit_msgs because
> intf->curr_msg is non-NULL.  But the new message will be never
> sent because no one sends intf->curr_msg.  As the result, the
> kernel hangs up.
>
> If the kernel panics before (3), intf->curr_msg will be sent by
> set_run_to_completion().  It's no problem.
>
> If the kernel panics before (4), intf->curr_msg will be lost.
> However, messages on intf->xmit_msgs will be handled.
>
> If the kernel panics before (5), we try to continue running the
> state machine.  It may successfully complete.
>
> If the kernel panics after (5), we will miss the response message
> handling, but it's not much problem in the panic context.
>
> This patch tries to handle messages in intf->curr_msg and
> intf->xmit_msgs only once without losing them.  To achieve this,
> this patch does that:
>   - if a message is in intf->curr_msg or intf->xmit_msgs and
>     start_transaction() for the message hasn't been done yet,
>     resend it
>   - if start_transaction() for a message has been called,
>     just continue to run the state machine
>   - if the transaction has been completed, do nothing
>
> >From the perspective of implementation, these are done by keeping
> smi_info->waiting_msg until start_transaction() is completed and
> by keeping new flag IPMI_MSG_RESEND_ON_PANIC just before starting
> the state machine.
>
> Signed-off-by: Hidehiro Kawai <hidehiro.kawai...@hitachi.com>
> ---
>  drivers/char/ipmi/ipmi_msghandler.c |   36 
> +++++++++++++++++++++++++++++++++++
>  drivers/char/ipmi/ipmi_si_intf.c    |    5 ++++-
>  include/linux/ipmi_smi.h            |    5 +++++
>  3 files changed, 45 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/char/ipmi/ipmi_msghandler.c 
> b/drivers/char/ipmi/ipmi_msghandler.c
> index 5a2d9fe..3dcd814 100644
> --- a/drivers/char/ipmi/ipmi_msghandler.c
> +++ b/drivers/char/ipmi/ipmi_msghandler.c
> @@ -1493,6 +1493,8 @@ static struct ipmi_smi_msg *smi_add_send_msg(ipmi_smi_t 
> intf,
>                                            struct ipmi_smi_msg *smi_msg,
>                                            int priority)
>  {
> +     smi_msg->flags |= IPMI_MSG_RESEND_ON_PANIC;
> +
>       if (intf->curr_msg) {
>               if (priority > 0)
>                       list_add_tail(&smi_msg->link, &intf->hp_xmit_msgs);
> @@ -4223,6 +4225,7 @@ struct ipmi_smi_msg *ipmi_alloc_smi_msg(void)
>               rv->done = free_smi_msg;
>               rv->user_data = NULL;
>               atomic_inc(&smi_msg_inuse_count);
> +             rv->flags = 0;
>       }
>       return rv;
>  }
> @@ -4531,7 +4534,40 @@ static int panic_event(struct notifier_block *this,
>                       spin_unlock(&intf->waiting_rcv_msgs_lock);
>  
>               intf->run_to_completion = 1;
> +restart:
>               intf->handlers->set_run_to_completion(intf->send_info, 1);
> +
> +             if (intf->curr_msg) {
> +                     /*
> +                      * This can happen if the kernel panics before
> +                      * setting msg to smi_info->waiting_msg or while
> +                      * processing a response.  For the former case, we
> +                      * resend the message by re-queueing it.  For the
> +                      * latter case, we simply ignore it because handling
> +                      * response is not much meaningful in the panic
> +                      * context.
> +                      */
> +
> +                     /*
> +                      * Since we want to send the current message first,
> +                      * re-queue it into the high-prioritized queue.
> +                      */
> +                     if (intf->curr_msg->flags & IPMI_MSG_RESEND_ON_PANIC)
> +                             list_add(&intf->curr_msg->link,
> +                                      &intf->hp_xmit_msgs);
> +
> +                     intf->curr_msg = NULL;
> +             }
> +
> +             if (!list_empty(&intf->hp_xmit_msgs) ||
> +                 !list_empty(&intf->xmit_msgs)) {
> +                     /*
> +                      * This can happen if the kernel panics while
> +                      * processing a response.  Kick the queue and restart.
> +                      */
> +                     smi_recv_tasklet((unsigned long)intf);
> +                     goto restart;
> +             }
>       }
>  
>  #ifdef CONFIG_IPMI_PANIC_EVENT
> diff --git a/drivers/char/ipmi/ipmi_si_intf.c 
> b/drivers/char/ipmi/ipmi_si_intf.c
> index 814b7b7..c5c7806 100644
> --- a/drivers/char/ipmi/ipmi_si_intf.c
> +++ b/drivers/char/ipmi/ipmi_si_intf.c
> @@ -383,7 +383,6 @@ static enum si_sm_result start_next_msg(struct smi_info 
> *smi_info)
>               int err;
>  
>               smi_info->curr_msg = smi_info->waiting_msg;
> -             smi_info->waiting_msg = NULL;
>               debug_timestamp("Start2");
>               err = atomic_notifier_call_chain(&xaction_notifier_list,
>                               0, smi_info);
> @@ -401,6 +400,7 @@ static enum si_sm_result start_next_msg(struct smi_info 
> *smi_info)
>               rv = SI_SM_CALL_WITHOUT_DELAY;
>       }
>   out:
> +     smi_info->waiting_msg = NULL;
>       return rv;
>  }
>  
> @@ -804,6 +804,9 @@ static enum si_sm_result smi_event_handler(struct 
> smi_info *smi_info,
>  {
>       enum si_sm_result si_sm_result;
>  
> +     if (smi_info->curr_msg)
> +             smi_info->curr_msg->flags &= ~(IPMI_MSG_RESEND_ON_PANIC);
> +
>   restart:
>       /*
>        * There used to be a loop here that waited a little while
> diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h
> index ba57fb1..1200872 100644
> --- a/include/linux/ipmi_smi.h
> +++ b/include/linux/ipmi_smi.h
> @@ -47,6 +47,9 @@
>  /* Structure for the low-level drivers. */
>  typedef struct ipmi_smi *ipmi_smi_t;
>  
> +/* Flags for flags member of struct ipmi_smi_msg */
> +#define IPMI_MSG_RESEND_ON_PANIC     1 /* If set, resend in panic_event() */
> +
>  /*
>   * Messages to/from the lower layer.  The smi interface will take one
>   * of these to send. After the send has occurred and a response has
> @@ -75,6 +78,8 @@ struct ipmi_smi_msg {
>       /* Will be called when the system is done with the message
>          (presumably to free it). */
>       void (*done)(struct ipmi_smi_msg *msg);
> +
> +     int flags;
>  };
>  
>  struct ipmi_smi_handlers {
>
>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to