Re: [etherlab-dev] EoE in OP mode

Geller, Nir Tue, 24 Jan 2017 00:55:54 -0800

Hi,

Indeed, I'm using RT_PREEMPT.


After some trial and error I was able to get EoE working while the master is in 
OP mode.
Eventually I didn't drive the EoE fsm from RT interrupt context but from 
EtherCAT-OP thread.

I'm not sure about the implications of this design.
Should I be aware of something with regard to this choice?

Any comments are appreciated.

I'm not sure if it's the right way to do it, but  EoE seems to be working 
properly. I'm currently stress testing the system.

My patch is added for your evaluation.

Thanks a lot,

Nir.


diff -ur etherlab_patchset_20160804/master/ethernet.c 
etherlab_eoe_op/master/ethernet.c
--- etherlab_patchset_20160804/master/ethernet.c       2017-01-23 
17:00:50.657116495 +0200
+++ etherlab_eoe_op/master/ethernet.c           2017-01-23 17:47:04.728268793 
+0200
@@ -442,10 +442,18 @@
  */
void ec_eoe_queue(ec_eoe_t *eoe /**< EoE handler */)
{
-   if (eoe->queue_datagram) {
-       ec_master_queue_datagram_ext(eoe->slave->master, &eoe->datagram);
-       eoe->queue_datagram = 0;
-   }
+    if (eoe->queue_datagram) {
+
+        if(EC_OPERATION == eoe->slave->master->phase)
+        {
+            ec_master_queue_datagram(eoe->slave->master, &eoe->datagram);
+        }
+        else
+        {
+            ec_master_queue_datagram_ext(eoe->slave->master, &eoe->datagram);
+        }
+        eoe->queue_datagram = 0;
+    }
}
 /*****************************************************************************/
@@ -580,6 +588,7 @@
     size_t rec_size, data_size;
     uint8_t *data, frame_type, last_fragment, time_appended, mbox_prot;
     uint8_t fragment_offset, fragment_number;
+             unsigned int skb_alloc_size = 0;
#if EOE_DEBUG_LEVEL >= 2
     uint8_t frame_number;
#endif
@@ -647,8 +656,8 @@
#endif
 #if EOE_DEBUG_LEVEL >= 2
-    EC_SLAVE_DBG(eoe->slave, 0, "EoE %s RX fragment %u%s, offset %u,"
-            " frame %u%s, %zu octets\n", eoe->dev->name, fragment_number,
+    EC_SLAVE_DBG(eoe->slave, 0, "EoE %s RX fragment %u%s, fragment_offset %u,"
+            " frame_number %u%s, %zu octets\n", eoe->dev->name, 
fragment_number,
            last_fragment ? "" : "+", fragment_offset, frame_number,
            time_appended ? ", + timestamp" : "",
            time_appended ? rec_size - 8 : rec_size - 4);
@@ -675,7 +684,18 @@
         }
         // new socket buffer
-        if (!(eoe->rx_skb = dev_alloc_skb(fragment_offset * 32))) {
+
+             if(fragment_offset)
+             {
+                             skb_alloc_size = fragment_offset * 32;
+             }
+             else
+             {
+                             EC_SLAVE_DBG(eoe->slave, 0, 
"ec_eoe_state_rx_fetch: fragment_offset=0 !!!! allocating skb size 1500!!!!\n");
+                             skb_alloc_size = 1500;
+             }
+
+        if (!(eoe->rx_skb = dev_alloc_skb(skb_alloc_size))) {
             if (printk_ratelimit())
                 EC_SLAVE_WARN(eoe->slave, "EoE RX low on mem,"
                         " frame dropped.\n");
@@ -685,9 +705,10 @@
         }
         eoe->rx_skb_offset = 0;
-        eoe->rx_skb_size = fragment_offset * 32;
+        eoe->rx_skb_size = skb_alloc_size;
         eoe->rx_expected_fragment = 0;
     }
+             /* if this is NOT the first fragment */
     else {
         if (!eoe->rx_skb) {
             eoe->stats.rx_dropped++;
@@ -696,8 +717,11 @@
         }
         offset = fragment_offset * 32;
-        if (offset != eoe->rx_skb_offset ||
-            offset + data_size > eoe->rx_skb_size ||
+             // motivation for offset != eoe->rx_skb_offset is unclear, 
because offset is rounded to 32 bits chuncks
+             // and eoe->rx_skb_offset is byte accurate
+             /*if (offset != eoe->rx_skb_offset ||
+                 offset + data_size > eoe->rx_skb_size ||*/
+        if(offset + data_size > eoe->rx_skb_size ||
             fragment_number != eoe->rx_expected_fragment) {
             dev_kfree_skb(eoe->rx_skb);
             eoe->rx_skb = NULL;
@@ -838,6 +862,9 @@
             eoe->tries--; // try again
             eoe->queue_datagram = 1;
         } else {
+            dev_kfree_skb(eoe->tx_frame->skb);
+            kfree(eoe->tx_frame);
+            eoe->tx_frame = NULL;
             eoe->stats.tx_errors++;
#if EOE_DEBUG_LEVEL >= 1
             EC_SLAVE_WARN(eoe->slave, "Failed to receive send"
@@ -854,6 +881,9 @@
             eoe->tries--; // try again
             eoe->queue_datagram = 1;
         } else {
+            dev_kfree_skb(eoe->tx_frame->skb);
+            kfree(eoe->tx_frame);
+            eoe->tx_frame = NULL;
             eoe->stats.tx_errors++;
#if EOE_DEBUG_LEVEL >= 1
             EC_SLAVE_WARN(eoe->slave, "No sending response"
diff -ur etherlab_patchset_20160804/master/master.c 
etherlab_eoe_op/master/master.c
--- etherlab_patchset_20160804/master/master.c           2017-01-23 
17:00:41.873182432 +0200
+++ etherlab_eoe_op/master/master.c               2017-01-23 17:51:39.978196560 
+0200
@@ -106,6 +106,8 @@
void ec_master_clear_device_stats(ec_master_t *);
void ec_master_update_device_stats(ec_master_t *);
+void ecatmc_eoe_fsm(void* priv_data);
+
/*****************************************************************************/
 /** Static variables initializer.
@@ -1816,6 +1818,9 @@
             ec_master_exec_slave_fsms(master);
+            /* Drive the EoE state machine */
+            ecatmc_eoe_fsm((void*)master);
+
             ec_lock_up(&master->master_sem);
         }
@@ -1958,6 +1963,48 @@
 /*****************************************************************************/
+void ecatmc_eoe_fsm(void* priv_data)
+{
+    ec_master_t *master = (ec_master_t *) priv_data;
+    ec_eoe_t *eoe;
+    unsigned int none_open, sth_to_send, all_idle;
+
+             none_open = 1;
+
+        list_for_each_entry(eoe, &master->eoe_handlers, list) {
+            if (ec_eoe_is_open(eoe)) {
+                none_open = 0;
+                break;
+            }
+        }
+        if (none_open)
+            return;
+
+        // actual EoE processing
+        sth_to_send = 0;
+        list_for_each_entry(eoe, &master->eoe_handlers, list) {
+            if ((eoe->slave->current_state == EC_SLAVE_STATE_PREOP) ||
+                (eoe->slave->current_state == EC_SLAVE_STATE_SAFEOP) ||
+                (eoe->slave->current_state == EC_SLAVE_STATE_OP)) {
+                ec_eoe_run(eoe);
+                if (eoe->queue_datagram) {
+                    sth_to_send = 1;
+                }
+                if (!ec_eoe_is_idle(eoe)) {
+                    all_idle = 0;
+                }
+            }
+        }
+
+        if (sth_to_send) {
+            list_for_each_entry(eoe, &master->eoe_handlers, list) {
+                ec_eoe_queue(eoe);
+            }
+             }
+}
+
+/*****************************************************************************/
+
/** Attaches the slave configurations to the slaves.
  */
void ec_master_attach_slave_configs(
@@ -2614,11 +2661,6 @@
     master->receive_cb = master->app_receive_cb;
     master->cb_data = master->app_cb_data;
-#ifdef EC_EOE
-    if (eoe_was_running) {
-        ec_master_eoe_start(master);
-    }
-#endif
     ret = ec_master_thread_start(master, ec_master_operation_thread,
                 "EtherCAT-OP");
     if (ret < 0) {
@@ -2705,10 +2747,6 @@
     }
     ec_master_thread_stop(master);
-#ifdef EC_EOE
-    eoe_was_running = master->eoe_thread != NULL;
-    ec_master_eoe_stop(master);
-#endif
     master->send_cb = ec_master_internal_send_cb;
     master->receive_cb = ec_master_internal_receive_cb;
@@ -2749,9 +2787,7 @@
     master->active = 0;
 #ifdef EC_EOE
-    if (eoe_was_running) {
-        ec_master_eoe_start(master);
-    }
+    ec_master_eoe_start(master);
#endif
     if (ec_master_thread_start(master, ec_master_idle_thread,
                 "EtherCAT-IDLE")) {



From: Gavin Lambert [mailto:[email protected]]
Sent: Thursday, January 19, 2017 1:32 AM
To: Geller, Nir; [email protected]<mailto:[email protected]>; 
Slutsker, Rasty
Subject: RE: EoE in OP mode

Note that the patchset has only really been tested with RT_PREEMPT or otherwise 
standard user mode usage.

In particular, there are some patches that change locks and callbacks in ways 
that I don't think are entirely compatible with RTAI / Xenomai; there have 
previously been reported problems using those with this patchset.

As I was neither the author of those patches nor do I use Xenomai (or EoE) 
myself, I don't really know what needs to be done to resolve the issues (except 
just dropping them and possibly breaking the scenario they were originally 
authored to fix); additionally, I don't have much time at the moment to work on 
EtherCAT.  I welcome assistance in correcting this situation. :)


As far as I understand, ec_master_send/receive are only ever supposed to be 
invoked on one thread at a time; when you're using the userspace library this 
is enforced by a Linux lock in the corresponding ioctl, but this doesn't apply 
or is insufficient when using a kernel-mode application or RTAI/Xenomai.  In 
those, you need to register callbacks and use your own appropriate locking 
mechanism to ensure that the send/receive are not called concurrently.

In particular note that both the send callback and the receive callback are 
permitted to do nothing if called in a context where they can't wait on a lock 
but something else is concurrently busy doing the same thing.  So if you're 
calling send/receive from an interrupt thread, you will need to keep track of 
this and force the EoE thread callback to block until the interrupt is done, 
and also to make the interrupt thread avoid send/receive without blocking if 
the EoE thread is already in the middle of it.  Alternately you could probably 
make the interrupt handler responsible to do both of these things and have the 
EoE callbacks always do nothing, which might be better for your application 
performance.  (Though like I said, I haven't looked at the code much in this 
area so take these suggestions with a grain of salt; I could have something 
incorrect.)

From: etherlab-dev [mailto:[email protected]] On Behalf Of 
Geller, Nir
Sent: Wednesday, 18 January 2017 23:38
To: [email protected]<mailto:[email protected]>; Slutsker, 
Rasty <[email protected]<mailto:[email protected]>>
Subject: [etherlab-dev] EoE in OP mode


Hi,



I recently upgraded ethercat master to Gavin Patchset 20160804, adding to that, 
patch 0061.



EoE seems to be working fine while the master is idle, with heavy SDO traffic 
in parallel.



When the master is active our realtime application invokes  
ecrt_master_receive(master);  and  ecrt_master_send(master);  from interrupt 
context, and NOT from

ec_master_operation_thread() thread context.


The problem comes up when the master is active.



Just as I issue



ifconfig eoe0a1 up



I get a bunch of UNMATCHED DATAGRAMS in the kernel log, and the master is 
released.


[   73.324525] EtherCAT DEBUG 0: UNMATCHED datagram:
[   73.324528] EtherCAT DEBUG: 0D 83 01 00 10 09 08 80 00 00 68 5A 4A 84 9C 9B
[   73.324539] EtherCAT DEBUG: 84 11 01 00
[   73.324544] EtherCAT DEBUG 0: UNMATCHED datagram:
[   73.324547] EtherCAT DEBUG: 04 84 01 00 90 09 08 80 00 00 B0 3D 4C 84 9C 9B
[   73.324557] EtherCAT DEBUG: 84 11 01 00
[   73.324562] EtherCAT DEBUG 0: UNMATCHED datagram:
[   73.324565] EtherCAT DEBUG: 0C 85 00 00 00 00 10 80 00 00 00 00 70 FF FF FF
[   73.324575] EtherCAT DEBUG: 50 52 70 FF FF FF 00 00 31 00 03 00
[   73.324584] EtherCAT DEBUG 0: UNMATCHED datagram:
[   73.324587] EtherCAT DEBUG: 07 86 01 00 30 01 02 00 00 00 08 00 01 00
[   73.324838] EtherCAT 0: fsm->slaves_responding[fsm->dev_idx]=1
[   73.324843] EtherCAT 0: 0 slave(s) responding on main device.
[   73.324846] EtherCAT 0: datagram->working_counter=0     
<-------------------------  In wireshark capture WC is 1 !!!!
[   73.324850] EtherCAT 0: datagram->state=4
[   73.324853] EtherCAT 0: datagram->device_index=0
[   73.324856] EtherCAT 0: datagram->device_origin=0
[   73.324860] EtherCAT 0: datagram->index=134
[   73.324863] EtherCAT 0: datagram->type=7
[   73.324866] EtherCAT DEBUG 0: Rescanning the bus



This happens due to a timeout. When the EoE thread invokes



master->receive_cb(master->cb_data); which leads to invoke of 
ecrt_master_receive(master); it somehow messes up



master->devices[EC_DEVICE_MAIN].cycles_poll



which leads to a negative time delta in the calculation 
master->devices[EC_DEVICE_MAIN].cycles_poll - datagram->cycles_sent.



Attempting to bypass that in the EoE thread, I commented out  
master->receive_cb(master->cb_data);  and  master->send_cb(master->cb_data);

and once I invoke

ifconfig eoe0a1 up



I get an explosion of


[  123.529911] EtherCAT WARNING 0-main-0: Failed to receive mbox check datagram 
for eoe0a1.
[  123.529918] EtherCAT WARNING 0-main-0: Failed to receive mbox check datagram 
for eoe0a1.
[  123.529925] EtherCAT WARNING 0-main-0: Failed to receive mbox check datagram 
for eoe0a1.
[  123.529932] EtherCAT WARNING 0-main-0: Failed to receive mbox check datagram 
for eoe0a1.




If I comment out only master->receive_cb(master->cb_data);



I get no errors in dmesg, but then of course, EoE is not functional, and EoE 
thread starts gathering more and more CPU usage.



I understand that an invoke of master->send_cb(master->cb_data); leads to

ec_master_internal_send_cb     -->     ecrt_master_send_ext(master);



which pulls datagrams from master->ext_datagram_queue and pushes them forward 
with ec_master_queue_datagram(), and then

invokes ecrt_master_send(master); which will lead to a collision with 
ecrt_master_send() in the interrupt context.



So instead of invoking master->send_cb(master->cb_data); i tried only to pass 
datagrams from master->ext_datagram_queue,

but it caused a kernel panic.





So, if I want EoE to work when the master is active, how should I pass 
datagrams from EoE thread to the master?

Should I change the ethernet.c state machine?



Thanks a lot,



Nir.

etherlab_eoe_op.patch
Description: etherlab_eoe_op.patch

_______________________________________________
etherlab-dev mailing list
[email protected]
http://lists.etherlab.org/mailman/listinfo/etherlab-dev

Re: [etherlab-dev] EoE in OP mode

Reply via email to