The switch is unmanaged, so I can't set anything. But I will try another switch and see if it makes a difference. The change I made was simply set the pcie variable based on the capability and then comment out the trigger function invocation in the send function. So, it goes through the whole loop but it doesn't try to restart transmissions.
I made some progress detecting stalls through the stall_check function in rge_chip.c. I simply monitor xmit_ok and if it hasn't changed four times in a row, I restart the chip. The original watchdog is triggered by the send_recycle function, which will only be called after hald the tx buffers are used up. It takes a while to generate that many packets. A tx stall recovery now takes in 20 secs, which is fast enough to maintain the TCP connections. 4 secs are used to detect and the stall and the rest to recover. I will focus on speeding up the latter next. These are the diffs from the original snv_127 driver. r...@safe:/storage/local# diff -r usr/src/uts/common/io/rge rge diff -r usr/src/uts/common/io/rge/rge_chip.c rge/rge_chip.c 45c45 < static uint32_t rge_watchdog_count = 1 << 16; --- > static uint32_t rge_watchdog_count = 4; 706,719c706,707 < switch (chip->mac_ver) { < case MAC_VER_8168: < case MAC_VER_8168B_B: < case MAC_VER_8168B_C: < case MAC_VER_8168C: < case MAC_VER_8101E: < case MAC_VER_8101E_B: < chip->is_pcie = B_TRUE; < break; < < default: < chip->is_pcie = B_FALSE; < break; < } --- > chip->is_pcie = > pci_lcap_locate(rgep->cfg_handle, PCI_CAP_ID_PCI_E, &val16) == > DDI_SUCCESS; 1452a1441,1442 > rge_hw_stats_t *bstp; > uint64_t val; 1453a1444 > boolean_t stall_detected = B_FALSE; 1473,1475d1463 < dogval = rge_atomic_shl32(&rgep->watchdog, 1); < if (dogval < rge_watchdog_count) < return (B_FALSE); 1477,1478c1465,1492 < RGE_REPORT((rgep, "Tx stall detected, watchdog code 0x%x", dogval)); < return (B_TRUE); --- > /*dogval = rge_atomic_shl32(&rgep->watchdog, 1);*/ > dogval = rgep->watchdog; > if (dogval >= rge_watchdog_count) { > stall_detected = B_TRUE; > RGE_REPORT((rgep, "Tx stall detected, watchdog code #1 0x%x", > dogval)); > } > else if (rgep->chipid.is_pcie) { > rge_hw_stats_dump(rgep); > bstp = rgep->hw_stats; > val = RGE_BSWAP_64(bstp->xmt_ok); > if (rgep->stats.prev_xmt_ok == val && rgep->tx_free != > RGE_SEND_SLOTS) { > rgep->watchdog += 1; > if (rgep->watchdog > 3) > RGE_REPORT((rgep, "Tx stall detected #2, > watchdog code 0x%x 0x%lx 0x%x", rgep->watchdog, val, rgep->tx_free)); > rgep->resched_needed = B_TRUE; > } > else { > if (rgep->watchdog != 0) > { > if (rgep->watchdog > 3) > RGE_REPORT((rgep, "Tx stall cancelled > #2, watchdog code 0x%x 0x%lx 0x%x", rgep->watchdog, val, rgep->tx_free)); > rgep->watchdog = 0; > } > } > rgep->stats.prev_xmt_ok = val; > } > > return (stall_detected); diff -r usr/src/uts/common/io/rge/rge_rxtx.c rge/rge_rxtx.c 439,444d438 < /* < * Recyled nothing: bump the watchdog counter, < * thus guaranteeing that it's nonzero < * (watchdog activated). < */ < rgep->watchdog += 1; 460d453 < rgep->watchdog = 0; 660,681d652 < /* < * It's observed that in current Realtek PCI-E chips, tx < * request of the second fragment for upper layer packets < * will be ignored if the hardware transmission is in < * progress and will not be processed when the tx engine < * is idle. So one solution is to re-issue the requests < * if the hardware and the software tx packets statistics < * are inconsistent. < */ < if (rgep->chipid.is_pcie && rgep->stats.tx_pre_ismax) { < for (counter = 0; counter < 10; counter ++) { < mutex_enter(rgep->genlock); < rge_hw_stats_dump(rgep); < mutex_exit(rgep->genlock); < bstp = rgep->hw_stats; < if (rgep->stats.opackets < != RGE_BSWAP_64(bstp->rcv_ok)) < rge_tx_trigger(rgep); < else < break; < } < } diff -r usr/src/uts/common/io/rge/rge.h rge/rge.h 356a357 > uint64_t prev_xmt_ok; -- This message posted from opensolaris.org _______________________________________________ opensolaris-discuss mailing list opensolaris-discuss@opensolaris.org