Nice catch!
Reviewed-by: Dmitry Fleytman <dmitry.fleyt...@gmail.com <mailto:dmitry.fleyt...@gmail.com>> > On 21 Mar 2019, at 3:35, yuchenlin <yuchen...@synology.com> wrote: > > Ping? > > On 2019-03-13 14:56, yuchen...@synology.com wrote: >> From: yuchenlin <yuchen...@synology.com> >> Due to too early RCT0 interrput, win10x32 may hang on booting. >> This problem can be reproduced by doing power cycle on win10x32 guest. >> In our environment, we have 10 win10x32 and stress power cycle. >> The problem will happen about 20 rounds. >> Below shows some log with comment: >> The normal case: >> 22831@1551928392.984687:e1000x_rx_disabled Received packet dropped >> because receive is disabled RCTL = 0 >> 22831@1551928392.985655:e1000x_rx_disabled Received packet dropped >> because receive is disabled RCTL = 0 >> 22831@1551928392.985801:e1000x_rx_disabled Received packet dropped >> because receive is disabled RCTL = 0 >> e1000: set_ics 0, ICR 0, IMR 0 >> e1000: set_ics 0, ICR 0, IMR 0 >> e1000: set_ics 0, ICR 0, IMR 0 >> e1000: RCTL: 0, mac_reg[RCTL] = 0x0 >> 22831@1551928393.056710:e1000x_rx_disabled Received packet dropped >> because receive is disabled RCTL = 0 >> e1000: set_ics 0, ICR 0, IMR 0 >> e1000: ICR read: 0 >> e1000: set_ics 0, ICR 0, IMR 0 >> e1000: set_ics 0, ICR 0, IMR 0 >> e1000: RCTL: 0, mac_reg[RCTL] = 0x0 >> 22831@1551928393.077548:e1000x_rx_disabled Received packet dropped >> because receive is disabled RCTL = 0 >> e1000: set_ics 0, ICR 0, IMR 0 >> e1000: ICR read: 0 >> e1000: set_ics 2, ICR 0, IMR 0 >> e1000: set_ics 2, ICR 2, IMR 0 >> e1000: RCTL: 0, mac_reg[RCTL] = 0x0 >> 22831@1551928393.102974:e1000x_rx_disabled Received packet dropped >> because receive is disabled RCTL = 0 >> 22831@1551928393.103267:e1000x_rx_disabled Received packet dropped >> because receive is disabled RCTL = 0 >> e1000: RCTL: 255, mac_reg[RCTL] = 0x40002 <- win10x32 says it can handle >> RX now >> e1000: set_ics 0, ICR 2, IMR 9d <- unmask interrupt >> e1000: RCTL: 255, mac_reg[RCTL] = 0x48002 >> e1000: set_ics 80, ICR 2, IMR 9d <- interrupt and work! >> ... >> The bad case: >> 27744@1551930483.117766:e1000x_rx_disabled Received packet dropped >> because receive is disabled RCTL = 0 >> 27744@1551930483.118398:e1000x_rx_disabled Received packet dropped >> because receive is disabled RCTL = 0 >> e1000: set_ics 0, ICR 0, IMR 0 >> e1000: set_ics 0, ICR 0, IMR 0 >> e1000: set_ics 0, ICR 0, IMR 0 >> e1000: RCTL: 0, mac_reg[RCTL] = 0x0 >> 27744@1551930483.198063:e1000x_rx_disabled Received packet dropped >> because receive is disabled RCTL = 0 >> e1000: set_ics 0, ICR 0, IMR 0 >> e1000: ICR read: 0 >> e1000: set_ics 0, ICR 0, IMR 0 >> e1000: set_ics 0, ICR 0, IMR 0 >> e1000: RCTL: 0, mac_reg[RCTL] = 0x0 >> 27744@1551930483.218675:e1000x_rx_disabled Received packet dropped >> because receive is disabled RCTL = 0 >> e1000: set_ics 0, ICR 0, IMR 0 >> e1000: ICR read: 0 >> e1000: set_ics 2, ICR 0, IMR 0 >> e1000: set_ics 2, ICR 2, IMR 0 >> e1000: RCTL: 0, mac_reg[RCTL] = 0x0 >> 27744@1551930483.241768:e1000x_rx_disabled Received packet dropped >> because receive is disabled RCTL = 0 >> 27744@1551930483.241979:e1000x_rx_disabled Received packet dropped >> because receive is disabled RCTL = 0 >> e1000: RCTL: 255, mac_reg[RCTL] = 0x40002 <- win10x32 says it can handle >> RX now >> e1000: set_ics 80, ICR 2, IMR 0 <- flush queue (caused by setting RCTL) >> e1000: set_ics 0, ICR 82, IMR 9d <- unmask interrupt and because 0x82&0x9d >> != 0 generate interrupt, hang on here... >> To workaround this problem, simply delay flush queue. Also stop receiving >> when timer is going to run. >> Tested on CentOS, Win7SP1x64 and Win10x32. >> Signed-off-by: yuchenlin <yuchen...@synology.com> >> --- >> hw/net/e1000.c | 24 ++++++++++++++++++++++-- >> 1 file changed, 22 insertions(+), 2 deletions(-) >> diff --git a/hw/net/e1000.c b/hw/net/e1000.c >> index 5e144cb4e4..9b39bccfb2 100644 >> --- a/hw/net/e1000.c >> +++ b/hw/net/e1000.c >> @@ -120,6 +120,8 @@ typedef struct E1000State_st { >> bool mit_irq_level; /* Tracks interrupt pin level. */ >> uint32_t mit_ide; /* Tracks E1000_TXD_CMD_IDE bit. */ >> + QEMUTimer *flush_queue_timer; >> + >> /* Compatibility flags for migration to/from qemu 1.3.0 and older */ >> #define E1000_FLAG_AUTONEG_BIT 0 >> #define E1000_FLAG_MIT_BIT 1 >> @@ -366,6 +368,7 @@ static void e1000_reset(void *opaque) >> timer_del(d->autoneg_timer); >> timer_del(d->mit_timer); >> + timer_del(d->flush_queue_timer); >> d->mit_timer_on = 0; >> d->mit_irq_level = 0; >> d->mit_ide = 0; >> @@ -391,6 +394,14 @@ set_ctrl(E1000State *s, int index, uint32_t val) >> s->mac_reg[CTRL] = val & ~E1000_CTRL_RST; >> } >> +static void >> +e1000_flush_queue_timer(void *opaque) >> +{ >> + E1000State *s = opaque; >> + >> + qemu_flush_queued_packets(qemu_get_queue(s->nic)); >> +} >> + >> static void >> set_rx_control(E1000State *s, int index, uint32_t val) >> { >> @@ -399,7 +410,8 @@ set_rx_control(E1000State *s, int index, uint32_t val) >> s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1; >> DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT], >> s->mac_reg[RCTL]); >> - qemu_flush_queued_packets(qemu_get_queue(s->nic)); >> + timer_mod(s->flush_queue_timer, >> + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000); >> } >> static void >> @@ -837,7 +849,7 @@ e1000_can_receive(NetClientState *nc) >> E1000State *s = qemu_get_nic_opaque(nc); >> return e1000x_rx_ready(&s->parent_obj, s->mac_reg) && >> - e1000_has_rxbufs(s, 1); >> + e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer); >> } >> static uint64_t rx_desc_base(E1000State *s) >> @@ -881,6 +893,10 @@ e1000_receive_iov(NetClientState *nc, const >> struct iovec *iov, int iovcnt) >> return -1; >> } >> + if (timer_pending(s->flush_queue_timer)) { >> + return 0; >> + } >> + >> /* Pad to minimum Ethernet frame length */ >> if (size < sizeof(min_buf)) { >> iov_to_buf(iov, iovcnt, 0, min_buf, size); >> @@ -1637,6 +1653,8 @@ pci_e1000_uninit(PCIDevice *dev) >> timer_free(d->autoneg_timer); >> timer_del(d->mit_timer); >> timer_free(d->mit_timer); >> + timer_del(d->flush_queue_timer); >> + timer_free(d->flush_queue_timer); >> qemu_del_nic(d->nic); >> } >> @@ -1700,6 +1718,8 @@ static void pci_e1000_realize(PCIDevice >> *pci_dev, Error **errp) >> d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, >> e1000_autoneg_timer, d); >> d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d); >> + d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, >> + e1000_flush_queue_timer, d); >> } >> static void qdev_e1000_reset(DeviceState *dev) >