The switch is unmanaged, so I can't set anything. But I will try another switch 
and see if it makes a difference. The change I made was simply set the pcie 
variable based on the capability and then comment out the trigger function 
invocation in the send function. So, it goes through the whole loop but it 
doesn't try to restart transmissions. 

I made some progress detecting stalls through the stall_check function in 
rge_chip.c. I simply monitor xmit_ok and if it hasn't changed four times in a 
row, I restart the chip. The original watchdog is triggered by the send_recycle 
function, which will only be called after hald the tx buffers are used up. It 
takes a while to generate that many packets. A tx stall recovery now takes in 
20 secs, which is fast enough to maintain the TCP connections. 4 secs are used 
to detect and the stall and the rest to recover. I will focus on speeding up 
the latter next.

These are the diffs from the original snv_127 driver.

r...@safe:/storage/local# diff -r usr/src/uts/common/io/rge rge
diff -r usr/src/uts/common/io/rge/rge_chip.c rge/rge_chip.c
45c45
< static uint32_t rge_watchdog_count    = 1 << 16;
---
> static uint32_t rge_watchdog_count    = 4;
706,719c706,707
<       switch (chip->mac_ver) {
<       case MAC_VER_8168:
<       case MAC_VER_8168B_B:
<       case MAC_VER_8168B_C:
<       case MAC_VER_8168C:
<       case MAC_VER_8101E:
<       case MAC_VER_8101E_B:
<               chip->is_pcie = B_TRUE;
<               break;
<
<       default:
<               chip->is_pcie = B_FALSE;
<               break;
<       }
---
>         chip->is_pcie =
>             pci_lcap_locate(rgep->cfg_handle, PCI_CAP_ID_PCI_E, &val16) == 
> DDI_SUCCESS;
1452a1441,1442
>       rge_hw_stats_t *bstp;
>       uint64_t val;
1453a1444
>       boolean_t stall_detected = B_FALSE;
1473,1475d1463
<       dogval = rge_atomic_shl32(&rgep->watchdog, 1);
<       if (dogval < rge_watchdog_count)
<               return (B_FALSE);
1477,1478c1465,1492
<       RGE_REPORT((rgep, "Tx stall detected, watchdog code 0x%x", dogval));
<       return (B_TRUE);
---
>       /*dogval = rge_atomic_shl32(&rgep->watchdog, 1);*/
>       dogval = rgep->watchdog;
>       if (dogval >= rge_watchdog_count) {
>               stall_detected = B_TRUE;
>               RGE_REPORT((rgep, "Tx stall detected, watchdog code #1 0x%x", 
> dogval));
>       }
>       else if (rgep->chipid.is_pcie) {
>                 rge_hw_stats_dump(rgep);
>                 bstp = rgep->hw_stats;
>               val = RGE_BSWAP_64(bstp->xmt_ok);
>               if (rgep->stats.prev_xmt_ok == val && rgep->tx_free != 
> RGE_SEND_SLOTS) {
>                       rgep->watchdog += 1;
>                       if (rgep->watchdog > 3)
>                               RGE_REPORT((rgep, "Tx stall detected #2, 
> watchdog code 0x%x 0x%lx 0x%x", rgep->watchdog, val, rgep->tx_free));
>                       rgep->resched_needed = B_TRUE;
>               }
>               else {
>                       if (rgep->watchdog != 0)
>                       {
>                               if (rgep->watchdog > 3)
>                                       RGE_REPORT((rgep, "Tx stall cancelled 
> #2, watchdog code 0x%x 0x%lx 0x%x", rgep->watchdog, val, rgep->tx_free));
>                               rgep->watchdog = 0;
>                       }
>               }
>               rgep->stats.prev_xmt_ok = val;
>               }
>
>       return (stall_detected);
diff -r usr/src/uts/common/io/rge/rge_rxtx.c rge/rge_rxtx.c
439,444d438
<                               /*
<                                * Recyled nothing: bump the watchdog counter,
<                                * thus guaranteeing that it's nonzero
<                                * (watchdog activated).
<                                */
<                               rgep->watchdog += 1;
460d453
<       rgep->watchdog = 0;
660,681d652
<               /*
<                * It's observed that in current Realtek PCI-E chips, tx
<                * request of the second fragment for upper layer packets
<                * will be ignored if the hardware transmission is in
<                * progress and will not be processed when the tx engine
<                * is idle. So one solution is to re-issue the requests
<                * if the hardware and the software tx packets statistics
<                * are inconsistent.
<                */
<               if (rgep->chipid.is_pcie && rgep->stats.tx_pre_ismax) {
<                       for (counter = 0; counter < 10; counter ++) {
<                               mutex_enter(rgep->genlock);
<                               rge_hw_stats_dump(rgep);
<                               mutex_exit(rgep->genlock);
<                               bstp = rgep->hw_stats;
<                               if (rgep->stats.opackets
<                                   != RGE_BSWAP_64(bstp->rcv_ok))
<                                       rge_tx_trigger(rgep);
<                               else
<                                       break;
<                       }
<               }
diff -r usr/src/uts/common/io/rge/rge.h rge/rge.h
356a357
>       uint64_t        prev_xmt_ok;
-- 
This message posted from opensolaris.org
_______________________________________________
opensolaris-discuss mailing list
opensolaris-discuss@opensolaris.org

Reply via email to