From: Graeme Foot <graeme.foot@touchcut.com>
Date: Wed, 23 Sep 2020 09:49:17 +1200

Slaves can fail to read their SII information, leading to
zero vendor id, product code and alias information.

This patch retries scanning the slave from scratch if there are
any problems reading the SII information.

diff -r 33b922ec1871 master/fsm_slave_scan.c
--- a/master/fsm_slave_scan.c	Fri Aug 18 12:30:16 2017 +0200
+++ b/master/fsm_slave_scan.c	Wed Sep 23 09:58:40 2020 +1200
@@ -41,6 +41,15 @@
 
 #include "fsm_slave_scan.h"
 
+/** Time to wait before slave scan retry [ms].
+ *
+ * Used to calculate time based on the jiffies counter.
+ *
+ * \attention Must be more than 10 to avoid problems on kernels that run with
+ * a timer interupt frequency of 100 Hz.
+ */
+#define SCAN_RETRY_TIME 100
+
 /*****************************************************************************/
 
 void ec_fsm_slave_scan_state_start(ec_fsm_slave_scan_t *);
@@ -64,6 +73,8 @@
 
 void ec_fsm_slave_scan_state_end(ec_fsm_slave_scan_t *);
 void ec_fsm_slave_scan_state_error(ec_fsm_slave_scan_t *);
+void ec_fsm_slave_scan_state_retry(ec_fsm_slave_scan_t *, ec_datagram_t *);
+void ec_fsm_slave_scan_state_retry_wait(ec_fsm_slave_scan_t *, ec_datagram_t *);
 
 void ec_fsm_slave_scan_enter_datalink(ec_fsm_slave_scan_t *);
 #ifdef EC_REGALIAS
@@ -114,6 +125,7 @@
         )
 {
     fsm->slave = slave;
+    fsm->scan_retries = EC_FSM_RETRIES;
     fsm->state = ec_fsm_slave_scan_state_start;
 }
 
@@ -576,13 +588,20 @@
         return;
 
     if (!ec_fsm_sii_success(&fsm->fsm_sii)) {
-        fsm->slave->error_flag = 1;
-        fsm->state = ec_fsm_slave_scan_state_error;
-        EC_SLAVE_ERR(slave, "Failed to determine SII content size:"
-                " Reading word offset 0x%04x failed. Assuming %u words.\n",
-                fsm->sii_offset, EC_FIRST_SII_CATEGORY_OFFSET);
-        slave->sii_nwords = EC_FIRST_SII_CATEGORY_OFFSET;
-        goto alloc_sii;
+        if (fsm->scan_retries--) {
+            EC_SLAVE_ERR(slave, "Failed to determine SII content size"
+                    " Retrying.\n");
+            fsm->state = ec_fsm_slave_scan_state_retry;
+            return;
+        } else {
+            fsm->slave->error_flag = 1;
+            fsm->state = ec_fsm_slave_scan_state_error;
+            EC_SLAVE_ERR(slave, "Failed to determine SII content size:"
+                    " Reading word offset 0x%04x failed. Assuming %u words.\n",
+                    fsm->sii_offset, EC_FIRST_SII_CATEGORY_OFFSET);
+            slave->sii_nwords = EC_FIRST_SII_CATEGORY_OFFSET;
+            goto alloc_sii;
+        }
     }
 
     cat_type = EC_READ_U16(fsm->fsm_sii.value);
@@ -591,11 +610,19 @@
     if (cat_type != 0xFFFF) { // not the last category
         off_t next_offset = 2UL + fsm->sii_offset + cat_size;
         if (next_offset >= EC_MAX_SII_SIZE) {
-            EC_SLAVE_WARN(slave, "SII size exceeds %u words"
-                    " (0xffff limiter missing?).\n", EC_MAX_SII_SIZE);
-            // cut off category data...
-            slave->sii_nwords = EC_FIRST_SII_CATEGORY_OFFSET;
-            goto alloc_sii;
+            if (fsm->scan_retries--) {
+                EC_SLAVE_WARN(slave, "SII size exceeds %u words"
+                        " (0xffff limiter missing?). Retrying.\n",
+                        EC_MAX_SII_SIZE);
+                fsm->state = ec_fsm_slave_scan_state_retry;
+                return;
+            } else {
+                EC_SLAVE_WARN(slave, "SII size exceeds %u words"
+                        " (0xffff limiter missing?).\n", EC_MAX_SII_SIZE);
+                // cut off category data...
+                slave->sii_nwords = EC_FIRST_SII_CATEGORY_OFFSET;
+                goto alloc_sii;
+            }
         }
         fsm->sii_offset = next_offset;
         ec_fsm_sii_read(&fsm->fsm_sii, slave, fsm->sii_offset,
@@ -645,9 +672,13 @@
     if (ec_fsm_sii_exec(&fsm->fsm_sii)) return;
 
     if (!ec_fsm_sii_success(&fsm->fsm_sii)) {
-        fsm->slave->error_flag = 1;
-        fsm->state = ec_fsm_slave_scan_state_error;
         EC_SLAVE_ERR(slave, "Failed to fetch SII contents.\n");
+        if (fsm->scan_retries--) {
+            fsm->state = ec_fsm_slave_scan_state_retry;
+        } else {
+          fsm->slave->error_flag = 1;
+          fsm->state = ec_fsm_slave_scan_state_error;
+        }
         return;
     }
 
@@ -702,6 +733,20 @@
     slave->sii.mailbox_protocols =
         EC_READ_U16(slave->sii_words + 0x001C);
 
+    // check for invalid vendor id and product code
+    if ( (slave->sii.vendor_id == 0) ||
+         (slave->sii.product_code == 0) ) {
+        EC_SLAVE_ERR(slave, "Failed to determine product and vendor id."
+                " SII returned a zero value.\n");
+        if (fsm->scan_retries--) {
+            fsm->state = ec_fsm_slave_scan_state_retry;
+        } else {
+            fsm->slave->error_flag = 1;
+            fsm->state = ec_fsm_slave_scan_state_error;
+        }
+        return;
+    }
+
     if (slave->sii_nwords == EC_FIRST_SII_CATEGORY_OFFSET) {
         // sii does not contain category data
         fsm->state = ec_fsm_slave_scan_state_end;
@@ -992,6 +1037,41 @@
     fsm->state = ec_fsm_slave_scan_state_end;
 }
 
+/*****************************************************************************/
+
+/** Slave scan state: scan retry.
+ */
+void ec_fsm_slave_scan_state_retry(
+        ec_fsm_slave_scan_t *fsm, /**< slave state machine */
+        ec_datagram_t *datagram /**< Datagram to use. */
+        )
+{
+    ec_slave_t *slave = fsm->slave;
+
+    fsm->scan_jiffies_start = jiffies;
+    fsm->state = ec_fsm_slave_scan_state_retry_wait;
+    EC_SLAVE_WARN(slave, "Retrying slave scan.\n");
+    return;
+}
+
+/*****************************************************************************/
+
+/** Slave scan state: scan retry wait.
+ */
+void ec_fsm_slave_scan_state_retry_wait(
+        ec_fsm_slave_scan_t *fsm, /**< slave state machine */
+        ec_datagram_t *datagram /**< Datagram to use. */
+        )
+{
+    // wait for timeout
+    unsigned long diff_ms =
+        (jiffies - fsm->scan_jiffies_start) * 1000 / HZ;
+        
+    if (diff_ms >= SCAN_RETRY_TIME) {
+        fsm->state = ec_fsm_slave_scan_state_start;
+    }
+}
+
 /******************************************************************************
  * Common state functions
  *****************************************************************************/
diff -r 33b922ec1871 master/fsm_slave_scan.h
--- a/master/fsm_slave_scan.h	Fri Aug 18 12:30:16 2017 +0200
+++ b/master/fsm_slave_scan.h	Wed Sep 23 09:58:40 2020 +1200
@@ -60,6 +60,8 @@
                                                machine to use. */
     ec_fsm_pdo_t *fsm_pdo; /**< PDO configuration state machine to use. */
     unsigned int retries; /**< Retries on datagram timeout. */
+    unsigned int scan_retries; /**< Retries on scan read error. */
+    unsigned long scan_jiffies_start; /**< scan retry start timestamp. */
 
     void (*state)(ec_fsm_slave_scan_t *); /**< State function. */
     uint16_t sii_offset; /**< SII offset in words. */
