The driver would just fetch events and receive messages until the BMC said it was done. To avoid issues with BMCs that never say they are done, add a limit of 10 fetches at a time.
In addition, an si interface has an attn state it can return from the hardware which is supposed to cause a flag fetch to see if the driver needs to fetch events or message or a few other things. If the attn bit gets stuck, it's a similar problem. So allow messages in between flag fetches so the driver itself doesn't get stuck. This is a more general fix than the previous fix for the specific bad BMC, but should fix the more general issue of a BMC that won't stop saying it has data. This has been there from the beginning of the driver. It's not a bug per-se, but it is accounting for bugs in BMCs. Reported-by: Matt Fleming <[email protected]> Closes: https://lore.kernel.org/lkml/[email protected]/ Fixes: <1da177e4c3f4> ("Linux-2.6.12-rc2") Cc: [email protected] Signed-off-by: Corey Minyard <[email protected]> --- I have added this problem as a capability in the openipmi library simulator so I can reproduce the issue and make sure everything works properly. drivers/char/ipmi/ipmi_si_intf.c | 54 +++++++++++++++++++++++++------- drivers/char/ipmi/ipmi_ssif.c | 23 ++++++++++++-- 2 files changed, 64 insertions(+), 13 deletions(-) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 08c208cc64c5..7c3c463e08da 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -168,6 +168,10 @@ struct smi_info { OEM2_DATA_AVAIL) unsigned char msg_flags; + /* When requesting events and messages, don't do it forever. */ + unsigned int num_requests_in_a_row; + bool last_was_flag_fetch; + /* Does the BMC have an event buffer? */ bool has_event_buffer; @@ -410,7 +414,10 @@ static void start_getting_msg_queue(struct smi_info *smi_info) start_new_msg(smi_info, smi_info->curr_msg->data, smi_info->curr_msg->data_size); - smi_info->si_state = SI_GETTING_MESSAGES; + if (smi_info->si_state != SI_GETTING_MESSAGES) { + smi_info->num_requests_in_a_row = 0; + smi_info->si_state = SI_GETTING_MESSAGES; + } } static void start_getting_events(struct smi_info *smi_info) @@ -421,7 +428,10 @@ static void start_getting_events(struct smi_info *smi_info) start_new_msg(smi_info, smi_info->curr_msg->data, smi_info->curr_msg->data_size); - smi_info->si_state = SI_GETTING_EVENTS; + if (smi_info->si_state != SI_GETTING_EVENTS) { + smi_info->num_requests_in_a_row = 0; + smi_info->si_state = SI_GETTING_EVENTS; + } } /* @@ -595,6 +605,7 @@ static void handle_transaction_done(struct smi_info *smi_info) smi_info->si_state = SI_NORMAL; } else { smi_info->msg_flags = msg[3]; + smi_info->last_was_flag_fetch = true; handle_flags(smi_info); } break; @@ -646,6 +657,11 @@ static void handle_transaction_done(struct smi_info *smi_info) } else { smi_inc_stat(smi_info, events); + smi_info->num_requests_in_a_row++; + if (smi_info->num_requests_in_a_row > 10) + /* Stop if we do this too many times. */ + smi_info->msg_flags &= ~EVENT_MSG_BUFFER_FULL; + /* * Do this before we deliver the message * because delivering the message releases the @@ -684,6 +700,11 @@ static void handle_transaction_done(struct smi_info *smi_info) } else { smi_inc_stat(smi_info, incoming_messages); + smi_info->num_requests_in_a_row++; + if (smi_info->num_requests_in_a_row > 10) + /* Stop if we do this too many times. */ + smi_info->msg_flags &= ~RECEIVE_MSG_AVAIL; + /* * Do this before we deliver the message * because delivering the message releases the @@ -825,6 +846,26 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info, goto out; } + /* + * If we are currently idle, or if the last thing that was + * done was a flag fetch and there is a message pending, try + * to start the next message. + * + * We do the waiting message check to avoid a stuck flag + * completely wedging the driver. Let a message through + * in between flag operations if that happens. + */ + if (si_sm_result == SI_SM_IDLE || + (si_sm_result == SI_SM_ATTN && smi_info->waiting_msg && + smi_info->last_was_flag_fetch)) { + smi_info->last_was_flag_fetch = false; + smi_inc_stat(smi_info, idles); + + si_sm_result = start_next_msg(smi_info); + if (si_sm_result != SI_SM_IDLE) + goto restart; + } + /* * We prefer handling attn over new messages. But don't do * this if there is not yet an upper layer to handle anything. @@ -852,15 +893,6 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info, } } - /* If we are currently idle, try to start the next message. */ - if (si_sm_result == SI_SM_IDLE) { - smi_inc_stat(smi_info, idles); - - si_sm_result = start_next_msg(smi_info); - if (si_sm_result != SI_SM_IDLE) - goto restart; - } - if ((si_sm_result == SI_SM_IDLE) && (atomic_read(&smi_info->req_events))) { /* diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index b49500a1bd36..f3798f4e6a63 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -225,6 +225,9 @@ struct ssif_info { bool has_event_buffer; bool supports_alert; + /* When requesting events and messages, don't do it forever. */ + unsigned int num_requests_in_a_row; + /* * Used to tell what we should do with alerts. If we are * waiting on a response, read the data immediately. @@ -413,7 +416,10 @@ static void start_event_fetch(struct ssif_info *ssif_info, unsigned long *flags) } ssif_info->curr_msg = msg; - ssif_info->ssif_state = SSIF_GETTING_EVENTS; + if (ssif_info->ssif_state != SSIF_GETTING_EVENTS) { + ssif_info->num_requests_in_a_row = 0; + ssif_info->ssif_state = SSIF_GETTING_EVENTS; + } ipmi_ssif_unlock_cond(ssif_info, flags); msg->data[0] = (IPMI_NETFN_APP_REQUEST << 2); @@ -436,7 +442,10 @@ static void start_recv_msg_fetch(struct ssif_info *ssif_info, } ssif_info->curr_msg = msg; - ssif_info->ssif_state = SSIF_GETTING_MESSAGES; + if (ssif_info->ssif_state != SSIF_GETTING_MESSAGES) { + ssif_info->num_requests_in_a_row = 0; + ssif_info->ssif_state = SSIF_GETTING_MESSAGES; + } ipmi_ssif_unlock_cond(ssif_info, flags); msg->data[0] = (IPMI_NETFN_APP_REQUEST << 2); @@ -843,6 +852,11 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, ssif_info->msg_flags &= ~EVENT_MSG_BUFFER_FULL; handle_flags(ssif_info, flags); } else { + ssif_info->num_requests_in_a_row++; + if (ssif_info->num_requests_in_a_row > 10) + /* Stop if we do this too many times. */ + ssif_info->msg_flags &= ~EVENT_MSG_BUFFER_FULL; + handle_flags(ssif_info, flags); ssif_inc_stat(ssif_info, events); deliver_recv_msg(ssif_info, msg); @@ -876,6 +890,11 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, ssif_info->msg_flags &= ~RECEIVE_MSG_AVAIL; handle_flags(ssif_info, flags); } else { + ssif_info->num_requests_in_a_row++; + if (ssif_info->num_requests_in_a_row > 10) + /* Stop if we do this too many times. */ + ssif_info->msg_flags &= ~RECEIVE_MSG_AVAIL; + ssif_inc_stat(ssif_info, incoming_messages); handle_flags(ssif_info, flags); deliver_recv_msg(ssif_info, msg); -- 2.43.0 _______________________________________________ Openipmi-developer mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/openipmi-developer
