Andrey, Can you get a trace of the guest (echo kvm > /sys/kernel/debug/tracing/set_event; sleep 1s; cp /sys/kernel/debug/tracing/trace /tmp/trace.txt) while it is in the 100% cpu consumption state?
Is it otherwise operational? On Thu, Sep 25, 2014 at 10:40:27PM +0400, Andrey Korolyov wrote: > Hello, > > there is a somewhat weird issue with 3.10, backported IOAPIC behavior > (which allowed windows guest to not hang in my case), 2.1.1 and webkit > suite which is well-known for all timer weirdness for Windows guests. > Here is a way to reproduce: > > - start the guest, > - open webkit-based browser, cpu consumption will raise a bit but with > hv_ bits will remain under single core in my case, > - live-migrate VM, > - close browser, > - observe 100% cpu consumption by guest. > > The loop can be 'destroyed' by doing another live migration, e.g. > without something what drives guest timer insane. > > Launch arguments can be found here: > https://lists.nongnu.org/archive/html/qemu-devel/2014-08/msg04012.html, > also I am re-sending the patch since I found that I missed comma in > last public version. The issue is specific to at least W2k8R2. If > webkit is stood idle for some time, it looks like Windows timer is > falling back to defaults and migration issue will not appear. Bug has > floating nature by itself, so it is just matter of interest if it can > reappear on others` setup. > diff -ru linux-3.10.11/arch/ia64/kvm/kvm-ia64.c > linux-3.10.11.patched-ioapic/arch/ia64/kvm/kvm-ia64.c > --- linux-3.10.11/arch/ia64/kvm/kvm-ia64.c 2013-09-08 09:10:14.000000000 > +0400 > +++ linux-3.10.11.patched-ioapic/arch/ia64/kvm/kvm-ia64.c 2014-08-24 > 19:49:25.723072383 +0400 > @@ -199,6 +199,7 @@ > case KVM_CAP_IRQCHIP: > case KVM_CAP_MP_STATE: > case KVM_CAP_IRQ_INJECT_STATUS: > + case KVM_CAP_IOAPIC_POLARITY_IGNORED: > r = 1; > break; > case KVM_CAP_COALESCED_MMIO: > diff -ru linux-3.10.11/arch/x86/kvm/x86.c > linux-3.10.11.patched-ioapic/arch/x86/kvm/x86.c > --- linux-3.10.11/arch/x86/kvm/x86.c 2013-09-08 09:10:14.000000000 +0400 > +++ linux-3.10.11.patched-ioapic/arch/x86/kvm/x86.c 2014-08-24 > 19:50:06.553716276 +0400 > @@ -2537,6 +2537,7 @@ > case KVM_CAP_GET_TSC_KHZ: > case KVM_CAP_KVMCLOCK_CTRL: > case KVM_CAP_READONLY_MEM: > + case KVM_CAP_IOAPIC_POLARITY_IGNORED: > #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT > case KVM_CAP_ASSIGN_DEV_IRQ: > case KVM_CAP_PCI_2_3: > diff -ru linux-3.10.11/include/uapi/linux/kvm.h > linux-3.10.11.patched-ioapic/include/uapi/linux/kvm.h > --- linux-3.10.11/include/uapi/linux/kvm.h 2013-09-08 09:10:14.000000000 > +0400 > +++ linux-3.10.11.patched-ioapic/include/uapi/linux/kvm.h 2014-08-24 > 19:51:10.975577204 +0400 > @@ -666,6 +666,7 @@ > #define KVM_CAP_IRQ_MPIC 90 > #define KVM_CAP_PPC_RTAS 91 > #define KVM_CAP_IRQ_XICS 92 > +#define KVM_CAP_IOAPIC_POLARITY_IGNORED 93 > > #ifdef KVM_CAP_IRQ_ROUTING > > diff -ru linux-3.10.11/virt/kvm/ioapic.c > linux-3.10.11.patched-ioapic/virt/kvm/ioapic.c > --- linux-3.10.11/virt/kvm/ioapic.c 2013-09-08 09:10:14.000000000 +0400 > +++ linux-3.10.11.patched-ioapic/virt/kvm/ioapic.c 2014-08-24 > 19:59:26.755137527 +0400 > @@ -50,7 +50,7 @@ > #else > #define ioapic_debug(fmt, arg...) > #endif > -static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq, > +static int ioapic_service(struct kvm_ioapic *vioapic, int irq, > bool line_status); > > static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, > @@ -163,23 +163,67 @@ > return false; > } > > -static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx, > - bool line_status) > +static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, > + int irq_level, bool line_status) > { > - union kvm_ioapic_redirect_entry *pent; > - int injected = -1; > + union kvm_ioapic_redirect_entry entry; > + u32 mask = 1 << irq; > + u32 old_irr; > + int edge, ret; > > - pent = &ioapic->redirtbl[idx]; > + entry = ioapic->redirtbl[irq]; > + edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); > > - if (!pent->fields.mask) { > - injected = ioapic_deliver(ioapic, idx, line_status); > - if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG) > - pent->fields.remote_irr = 1; > + if (!irq_level) { > + ioapic->irr &= ~mask; > + ret = 1; > + goto out; > + } > + > + /* > + * Return 0 for coalesced interrupts; for edge-triggered interrupts, > + * this only happens if a previous edge has not been delivered due > + * do masking. For level interrupts, the remote_irr field tells > + * us if the interrupt is waiting for an EOI. > + * > + * RTC is special: it is edge-triggered, but userspace likes to know > + * if it has been already ack-ed via EOI because coalesced RTC > + * interrupts lead to time drift in Windows guests. So we track > + * EOI manually for the RTC interrupt. > + */ > + if (irq == RTC_GSI && line_status && > + rtc_irq_check_coalesced(ioapic)) { > + ret = 0; > + goto out; > } > > - return injected; > + old_irr = ioapic->irr; > + ioapic->irr |= mask; > + if ((edge && old_irr == ioapic->irr) || > + (!edge && entry.fields.remote_irr)) { > + ret = 0; > + goto out; > + } > + > + ret = ioapic_service(ioapic, irq, line_status); > + > +out: > + trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); > + return ret; > +} > + > +static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long > irr) > +{ > + u32 idx; > + > + rtc_irq_eoi_tracking_reset(ioapic); > + for_each_set_bit(idx, &irr, IOAPIC_NUM_PINS) > + ioapic_set_irq(ioapic, idx, 1, true); > + > + kvm_rtc_eoi_tracking_restore_all(ioapic); > } > > + > static void update_handled_vectors(struct kvm_ioapic *ioapic) > { > DECLARE_BITMAP(handled_vectors, 256); > @@ -282,12 +326,15 @@ > } > } > > -static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool > line_status) > +static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool > line_status) > { > union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq]; > struct kvm_lapic_irq irqe; > int ret; > > + if (entry->fields.mask) > + return -1; > + > ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " > "vector=%x trig_mode=%x\n", > entry->fields.dest_id, entry->fields.dest_mode, > @@ -302,6 +349,10 @@ > irqe.level = 1; > irqe.shorthand = 0; > > + if (irqe.trig_mode == IOAPIC_EDGE_TRIG) > + ioapic->irr &= ~(1 << irq); > + > + > if (irq == RTC_GSI && line_status) { > BUG_ON(ioapic->rtc_status.pending_eoi != 0); > ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, > @@ -309,6 +360,8 @@ > ioapic->rtc_status.pending_eoi = ret; > } else > ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL); > + if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG) > + entry->fields.remote_irr = 1; > > return ret; > } > @@ -316,39 +369,15 @@ > int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, > int level, bool line_status) > { > - u32 old_irr; > - u32 mask = 1 << irq; > - union kvm_ioapic_redirect_entry entry; > int ret, irq_level; > > BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS); > > spin_lock(&ioapic->lock); > - old_irr = ioapic->irr; > irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq], > irq_source_id, level); > - entry = ioapic->redirtbl[irq]; > - irq_level ^= entry.fields.polarity; > - if (!irq_level) { > - ioapic->irr &= ~mask; > - ret = 1; > - } else { > - int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); > + ret = ioapic_set_irq(ioapic, irq, irq_level, line_status); > > - if (irq == RTC_GSI && line_status && > - rtc_irq_check_coalesced(ioapic)) { > - ret = 0; /* coalesced */ > - goto out; > - } > - ioapic->irr |= mask; > - if ((edge && old_irr != ioapic->irr) || > - (!edge && !entry.fields.remote_irr)) > - ret = ioapic_service(ioapic, irq, line_status); > - else > - ret = 0; /* report coalesced interrupt */ > - } > -out: > - trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); > spin_unlock(&ioapic->lock); > > return ret; > @@ -394,7 +423,7 @@ > > ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); > ent->fields.remote_irr = 0; > - if (!ent->fields.mask && (ioapic->irr & (1 << i))) > + if (ioapic->irr & (1 << i)) > ioapic_service(ioapic, i, false); > } > } > @@ -595,9 +624,10 @@ > > spin_lock(&ioapic->lock); > memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); > + ioapic->irr = 0; > update_handled_vectors(ioapic); > kvm_vcpu_request_scan_ioapic(kvm); > - kvm_rtc_eoi_tracking_restore_all(ioapic); > + kvm_ioapic_inject_all(ioapic, state->irr); > spin_unlock(&ioapic->lock); > return 0; > }