Nice catch!

Reviewed-by: Dmitry Fleytman <dmitry.fleyt...@gmail.com 
<mailto:dmitry.fleyt...@gmail.com>>

> On 21 Mar 2019, at 3:35, yuchenlin <yuchen...@synology.com> wrote:
> 
> Ping?
> 
> On 2019-03-13 14:56, yuchen...@synology.com wrote:
>> From: yuchenlin <yuchen...@synology.com>
>> Due to too early RCT0 interrput, win10x32 may hang on booting.
>> This problem can be reproduced by doing power cycle on win10x32 guest.
>> In our environment, we have 10 win10x32 and stress power cycle.
>> The problem will happen about 20 rounds.
>> Below shows some log with comment:
>> The normal case:
>> 22831@1551928392.984687:e1000x_rx_disabled Received packet dropped
>> because receive is disabled RCTL = 0
>> 22831@1551928392.985655:e1000x_rx_disabled Received packet dropped
>> because receive is disabled RCTL = 0
>> 22831@1551928392.985801:e1000x_rx_disabled Received packet dropped
>> because receive is disabled RCTL = 0
>> e1000: set_ics 0, ICR 0, IMR 0
>> e1000: set_ics 0, ICR 0, IMR 0
>> e1000: set_ics 0, ICR 0, IMR 0
>> e1000: RCTL: 0, mac_reg[RCTL] = 0x0
>> 22831@1551928393.056710:e1000x_rx_disabled Received packet dropped
>> because receive is disabled RCTL = 0
>> e1000: set_ics 0, ICR 0, IMR 0
>> e1000: ICR read: 0
>> e1000: set_ics 0, ICR 0, IMR 0
>> e1000: set_ics 0, ICR 0, IMR 0
>> e1000: RCTL: 0, mac_reg[RCTL] = 0x0
>> 22831@1551928393.077548:e1000x_rx_disabled Received packet dropped
>> because receive is disabled RCTL = 0
>> e1000: set_ics 0, ICR 0, IMR 0
>> e1000: ICR read: 0
>> e1000: set_ics 2, ICR 0, IMR 0
>> e1000: set_ics 2, ICR 2, IMR 0
>> e1000: RCTL: 0, mac_reg[RCTL] = 0x0
>> 22831@1551928393.102974:e1000x_rx_disabled Received packet dropped
>> because receive is disabled RCTL = 0
>> 22831@1551928393.103267:e1000x_rx_disabled Received packet dropped
>> because receive is disabled RCTL = 0
>> e1000: RCTL: 255, mac_reg[RCTL] = 0x40002 <- win10x32 says it can handle
>> RX now
>> e1000: set_ics 0, ICR 2, IMR 9d <- unmask interrupt
>> e1000: RCTL: 255, mac_reg[RCTL] = 0x48002
>> e1000: set_ics 80, ICR 2, IMR 9d <- interrupt and work!
>> ...
>> The bad case:
>> 27744@1551930483.117766:e1000x_rx_disabled Received packet dropped
>> because receive is disabled RCTL = 0
>> 27744@1551930483.118398:e1000x_rx_disabled Received packet dropped
>> because receive is disabled RCTL = 0
>> e1000: set_ics 0, ICR 0, IMR 0
>> e1000: set_ics 0, ICR 0, IMR 0
>> e1000: set_ics 0, ICR 0, IMR 0
>> e1000: RCTL: 0, mac_reg[RCTL] = 0x0
>> 27744@1551930483.198063:e1000x_rx_disabled Received packet dropped
>> because receive is disabled RCTL = 0
>> e1000: set_ics 0, ICR 0, IMR 0
>> e1000: ICR read: 0
>> e1000: set_ics 0, ICR 0, IMR 0
>> e1000: set_ics 0, ICR 0, IMR 0
>> e1000: RCTL: 0, mac_reg[RCTL] = 0x0
>> 27744@1551930483.218675:e1000x_rx_disabled Received packet dropped
>> because receive is disabled RCTL = 0
>> e1000: set_ics 0, ICR 0, IMR 0
>> e1000: ICR read: 0
>> e1000: set_ics 2, ICR 0, IMR 0
>> e1000: set_ics 2, ICR 2, IMR 0
>> e1000: RCTL: 0, mac_reg[RCTL] = 0x0
>> 27744@1551930483.241768:e1000x_rx_disabled Received packet dropped
>> because receive is disabled RCTL = 0
>> 27744@1551930483.241979:e1000x_rx_disabled Received packet dropped
>> because receive is disabled RCTL = 0
>> e1000: RCTL: 255, mac_reg[RCTL] = 0x40002 <- win10x32 says it can handle
>> RX now
>> e1000: set_ics 80, ICR 2, IMR 0 <- flush queue (caused by setting RCTL)
>> e1000: set_ics 0, ICR 82, IMR 9d <- unmask interrupt and because 0x82&0x9d
>> != 0 generate interrupt, hang on here...
>> To workaround this problem, simply delay flush queue. Also stop receiving
>> when timer is going to run.
>> Tested on CentOS, Win7SP1x64 and Win10x32.
>> Signed-off-by: yuchenlin <yuchen...@synology.com>
>> ---
>> hw/net/e1000.c | 24 ++++++++++++++++++++++--
>> 1 file changed, 22 insertions(+), 2 deletions(-)
>> diff --git a/hw/net/e1000.c b/hw/net/e1000.c
>> index 5e144cb4e4..9b39bccfb2 100644
>> --- a/hw/net/e1000.c
>> +++ b/hw/net/e1000.c
>> @@ -120,6 +120,8 @@ typedef struct E1000State_st {
>>     bool mit_irq_level;        /* Tracks interrupt pin level. */
>>     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
>> +    QEMUTimer *flush_queue_timer;
>> +
>> /* Compatibility flags for migration to/from qemu 1.3.0 and older */
>> #define E1000_FLAG_AUTONEG_BIT 0
>> #define E1000_FLAG_MIT_BIT 1
>> @@ -366,6 +368,7 @@ static void e1000_reset(void *opaque)
>>     timer_del(d->autoneg_timer);
>>     timer_del(d->mit_timer);
>> +    timer_del(d->flush_queue_timer);
>>     d->mit_timer_on = 0;
>>     d->mit_irq_level = 0;
>>     d->mit_ide = 0;
>> @@ -391,6 +394,14 @@ set_ctrl(E1000State *s, int index, uint32_t val)
>>     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
>> }
>> +static void
>> +e1000_flush_queue_timer(void *opaque)
>> +{
>> +    E1000State *s = opaque;
>> +
>> +    qemu_flush_queued_packets(qemu_get_queue(s->nic));
>> +}
>> +
>> static void
>> set_rx_control(E1000State *s, int index, uint32_t val)
>> {
>> @@ -399,7 +410,8 @@ set_rx_control(E1000State *s, int index, uint32_t val)
>>     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
>>     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
>>            s->mac_reg[RCTL]);
>> -    qemu_flush_queued_packets(qemu_get_queue(s->nic));
>> +    timer_mod(s->flush_queue_timer,
>> +              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
>> }
>> static void
>> @@ -837,7 +849,7 @@ e1000_can_receive(NetClientState *nc)
>>     E1000State *s = qemu_get_nic_opaque(nc);
>>     return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
>> -        e1000_has_rxbufs(s, 1);
>> +        e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
>> }
>> static uint64_t rx_desc_base(E1000State *s)
>> @@ -881,6 +893,10 @@ e1000_receive_iov(NetClientState *nc, const
>> struct iovec *iov, int iovcnt)
>>         return -1;
>>     }
>> +    if (timer_pending(s->flush_queue_timer)) {
>> +        return 0;
>> +    }
>> +
>>     /* Pad to minimum Ethernet frame length */
>>     if (size < sizeof(min_buf)) {
>>         iov_to_buf(iov, iovcnt, 0, min_buf, size);
>> @@ -1637,6 +1653,8 @@ pci_e1000_uninit(PCIDevice *dev)
>>     timer_free(d->autoneg_timer);
>>     timer_del(d->mit_timer);
>>     timer_free(d->mit_timer);
>> +    timer_del(d->flush_queue_timer);
>> +    timer_free(d->flush_queue_timer);
>>     qemu_del_nic(d->nic);
>> }
>> @@ -1700,6 +1718,8 @@ static void pci_e1000_realize(PCIDevice
>> *pci_dev, Error **errp)
>>     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
>> e1000_autoneg_timer, d);
>>     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
>> +    d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
>> +                                        e1000_flush_queue_timer, d);
>> }
>> static void qdev_e1000_reset(DeviceState *dev)
> 

Reply via email to