When reset is caused by hbm protocol mismatch or timeout
we might end up in an endless reset loop and hbm protocol
will never sync

Cc: <sta...@vger.kernel.org> 
Signed-off-by: Tomas Winkler <tomas.wink...@intel.com>
Signed-off-by: Alexander Usyskin <alexander.usys...@intel.com>
---
 drivers/misc/mei/hbm.c       | 19 +++++++++++++++++++
 drivers/misc/mei/hbm.h       |  1 +
 drivers/misc/mei/init.c      | 12 ++++++++----
 drivers/misc/mei/interrupt.c | 25 +++++++++++++++----------
 4 files changed, 43 insertions(+), 14 deletions(-)

diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c
index 8109b9a..836f92d 100644
--- a/drivers/misc/mei/hbm.c
+++ b/drivers/misc/mei/hbm.c
@@ -126,6 +126,17 @@ static bool is_treat_specially_client(struct mei_cl *cl,
        return false;
 }
 
+/**
+ * mei_hbm_idle - set hbm to idle state
+ *
+ * @dev: the device structure
+ */
+void mei_hbm_idle(struct mei_device *dev)
+{
+       dev->init_clients_timer = 0;
+       dev->hbm_state = MEI_HBM_IDLE;
+}
+
 int mei_hbm_start_wait(struct mei_device *dev)
 {
        int ret;
@@ -583,6 +594,14 @@ int mei_hbm_dispatch(struct mei_device *dev, struct 
mei_msg_hdr *hdr)
        mei_read_slots(dev, dev->rd_msg_buf, hdr->length);
        mei_msg = (struct mei_bus_message *)dev->rd_msg_buf;
 
+       /* ignore spurious message and prevent reset nesting
+        * hbm is put to idle during system reset
+        */
+       if (dev->hbm_state == MEI_HBM_IDLE) {
+               dev_dbg(&dev->pdev->dev, "hbm: state is idle ignore spurious 
messages\n");
+               return 0;
+       }
+
        switch (mei_msg->hbm_cmd) {
        case HOST_START_RES_CMD:
                dev_dbg(&dev->pdev->dev, "hbm: start: response message 
received.\n");
diff --git a/drivers/misc/mei/hbm.h b/drivers/misc/mei/hbm.h
index f2540ff..5f92188 100644
--- a/drivers/misc/mei/hbm.h
+++ b/drivers/misc/mei/hbm.h
@@ -49,6 +49,7 @@ static inline void mei_hbm_hdr(struct mei_msg_hdr *hdr, 
size_t length)
        hdr->reserved = 0;
 }
 
+void mei_hbm_idle(struct mei_device *dev);
 int mei_hbm_start_req(struct mei_device *dev);
 int mei_hbm_start_wait(struct mei_device *dev);
 int mei_hbm_cl_flow_control_req(struct mei_device *dev, struct mei_cl *cl);
diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c
index 71cd209..2af08bf 100644
--- a/drivers/misc/mei/init.c
+++ b/drivers/misc/mei/init.c
@@ -129,14 +129,19 @@ void mei_reset(struct mei_device *dev, int 
interrupts_enabled)
                dev_warn(&dev->pdev->dev, "unexpected reset: dev_state = %s\n",
                         mei_dev_state_str(dev->dev_state));
 
+       /* we're already in reset, cancel the init timer
+        * if the reset was called due the hbm protocol error
+        * we need to call it before hw start
+        * so the hbm watchdog won't kick in
+        */
+       mei_hbm_idle(dev);
+
        ret = mei_hw_reset(dev, interrupts_enabled);
        if (ret) {
                dev_err(&dev->pdev->dev, "hw reset failed disabling the 
device\n");
                interrupts_enabled = false;
-               dev->dev_state = MEI_DEV_DISABLED;
        }
 
-       dev->hbm_state = MEI_HBM_IDLE;
 
        if (dev->dev_state != MEI_DEV_INITIALIZING &&
            dev->dev_state != MEI_DEV_POWER_UP) {
@@ -161,8 +166,6 @@ void mei_reset(struct mei_device *dev, int 
interrupts_enabled)
                memset(&dev->wr_ext_msg, 0, sizeof(dev->wr_ext_msg));
        }
 
-       /* we're already in reset, cancel the init timer */
-       dev->init_clients_timer = 0;
 
        dev->me_clients_num = 0;
        dev->rd_msg_hdr = 0;
@@ -170,6 +173,7 @@ void mei_reset(struct mei_device *dev, int 
interrupts_enabled)
 
        if (!interrupts_enabled) {
                dev_dbg(&dev->pdev->dev, "intr not enabled end of reset\n");
+               dev->dev_state = MEI_DEV_DISABLED;
                return;
        }
 
diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c
index bbb61be..206dbe9 100644
--- a/drivers/misc/mei/interrupt.c
+++ b/drivers/misc/mei/interrupt.c
@@ -537,7 +537,6 @@ EXPORT_SYMBOL_GPL(mei_irq_write_handler);
  *
  * @work: pointer to the work_struct structure
  *
- * NOTE: This function is called by timer interrupt work
  */
 void mei_timer(struct work_struct *work)
 {
@@ -552,18 +551,24 @@ void mei_timer(struct work_struct *work)
 
 
        mutex_lock(&dev->device_lock);
-       if (dev->dev_state != MEI_DEV_ENABLED) {
-               if (dev->dev_state == MEI_DEV_INIT_CLIENTS) {
-                       if (dev->init_clients_timer) {
-                               if (--dev->init_clients_timer == 0) {
-                                       dev_err(&dev->pdev->dev, "reset: init 
clients timeout hbm_state = %d.\n",
-                                               dev->hbm_state);
-                                       mei_reset(dev, 1);
-                               }
+
+       /* Catch interrupt stalls during HBM init handshake */
+       if (dev->dev_state == MEI_DEV_INIT_CLIENTS &&
+           dev->hbm_state != MEI_HBM_IDLE) {
+
+               if (dev->init_clients_timer) {
+                       if (--dev->init_clients_timer == 0) {
+                               dev_err(&dev->pdev->dev, "timer: init clients 
timeout hbm_state = %d.\n",
+                                       dev->hbm_state);
+                               mei_reset(dev, 1);
+                               goto out;
                        }
                }
-               goto out;
        }
+
+       if (dev->dev_state != MEI_DEV_ENABLED)
+               goto out;
+
        /*** connect/disconnect timeouts ***/
        list_for_each_entry_safe(cl_pos, cl_next, &dev->file_list, link) {
                if (cl_pos->timer_count) {
-- 
1.8.4.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to