Hello,

there is a somewhat weird issue with 3.10, backported IOAPIC behavior
(which allowed windows guest to not hang in my case), 2.1.1 and webkit
suite which is well-known for all timer weirdness for Windows guests.
Here is a way to reproduce:

- start the guest,
- open webkit-based browser, cpu consumption will raise a bit but with
hv_ bits will remain under single core in my case,
- live-migrate VM,
- close browser,
- observe 100% cpu consumption by guest.

The loop can be 'destroyed' by doing another live migration, e.g.
without something what drives guest timer insane.

Launch arguments can be found here:
https://lists.nongnu.org/archive/html/qemu-devel/2014-08/msg04012.html,
also I am re-sending the patch since I found that I missed comma in
last public version. The issue is specific to at least W2k8R2. If
webkit is stood idle for some time, it looks like Windows timer is
falling back to defaults and migration issue will not appear. Bug has
floating nature by itself, so it is just matter of interest if it can
reappear on others` setup.
diff -ru linux-3.10.11/arch/ia64/kvm/kvm-ia64.c linux-3.10.11.patched-ioapic/arch/ia64/kvm/kvm-ia64.c
--- linux-3.10.11/arch/ia64/kvm/kvm-ia64.c	2013-09-08 09:10:14.000000000 +0400
+++ linux-3.10.11.patched-ioapic/arch/ia64/kvm/kvm-ia64.c	2014-08-24 19:49:25.723072383 +0400
@@ -199,6 +199,7 @@
 	case KVM_CAP_IRQCHIP:
 	case KVM_CAP_MP_STATE:
 	case KVM_CAP_IRQ_INJECT_STATUS:
+	case KVM_CAP_IOAPIC_POLARITY_IGNORED:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
diff -ru linux-3.10.11/arch/x86/kvm/x86.c linux-3.10.11.patched-ioapic/arch/x86/kvm/x86.c
--- linux-3.10.11/arch/x86/kvm/x86.c	2013-09-08 09:10:14.000000000 +0400
+++ linux-3.10.11.patched-ioapic/arch/x86/kvm/x86.c	2014-08-24 19:50:06.553716276 +0400
@@ -2537,6 +2537,7 @@
 	case KVM_CAP_GET_TSC_KHZ:
 	case KVM_CAP_KVMCLOCK_CTRL:
 	case KVM_CAP_READONLY_MEM:
+	case KVM_CAP_IOAPIC_POLARITY_IGNORED:
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 	case KVM_CAP_ASSIGN_DEV_IRQ:
 	case KVM_CAP_PCI_2_3:
diff -ru linux-3.10.11/include/uapi/linux/kvm.h linux-3.10.11.patched-ioapic/include/uapi/linux/kvm.h
--- linux-3.10.11/include/uapi/linux/kvm.h	2013-09-08 09:10:14.000000000 +0400
+++ linux-3.10.11.patched-ioapic/include/uapi/linux/kvm.h	2014-08-24 19:51:10.975577204 +0400
@@ -666,6 +666,7 @@
 #define KVM_CAP_IRQ_MPIC 90
 #define KVM_CAP_PPC_RTAS 91
 #define KVM_CAP_IRQ_XICS 92
+#define KVM_CAP_IOAPIC_POLARITY_IGNORED 93
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff -ru linux-3.10.11/virt/kvm/ioapic.c linux-3.10.11.patched-ioapic/virt/kvm/ioapic.c
--- linux-3.10.11/virt/kvm/ioapic.c	2013-09-08 09:10:14.000000000 +0400
+++ linux-3.10.11.patched-ioapic/virt/kvm/ioapic.c	2014-08-24 19:59:26.755137527 +0400
@@ -50,7 +50,7 @@
 #else
 #define ioapic_debug(fmt, arg...)
 #endif
-static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq,
+static int ioapic_service(struct kvm_ioapic *vioapic, int irq,
 		bool line_status);
 
 static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
@@ -163,23 +163,67 @@
 	return false;
 }
 
-static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx,
-		bool line_status)
+static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
+		int irq_level, bool line_status)
 {
-	union kvm_ioapic_redirect_entry *pent;
-	int injected = -1;
+	union kvm_ioapic_redirect_entry entry;
+	u32 mask = 1 << irq;
+	u32 old_irr;
+	int edge, ret;
 
-	pent = &ioapic->redirtbl[idx];
+	entry = ioapic->redirtbl[irq];
+	edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
 
-	if (!pent->fields.mask) {
-		injected = ioapic_deliver(ioapic, idx, line_status);
-		if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG)
-			pent->fields.remote_irr = 1;
+	if (!irq_level) {
+		ioapic->irr &= ~mask;
+		ret = 1;
+		goto out;
+	}
+
+	/*
+	 * Return 0 for coalesced interrupts; for edge-triggered interrupts,
+	 * this only happens if a previous edge has not been delivered due
+	 * do masking.  For level interrupts, the remote_irr field tells
+	 * us if the interrupt is waiting for an EOI.
+	 *
+	 * RTC is special: it is edge-triggered, but userspace likes to know
+	 * if it has been already ack-ed via EOI because coalesced RTC
+	 * interrupts lead to time drift in Windows guests.  So we track
+	 * EOI manually for the RTC interrupt.
+	 */
+	if (irq == RTC_GSI && line_status &&
+		rtc_irq_check_coalesced(ioapic)) {
+		ret = 0;
+		goto out;
 	}
 
-	return injected;
+	old_irr = ioapic->irr;
+	ioapic->irr |= mask;
+	if ((edge && old_irr == ioapic->irr) ||
+	    (!edge && entry.fields.remote_irr)) {
+		ret = 0;
+		goto out;
+	}
+
+	ret = ioapic_service(ioapic, irq, line_status);
+
+out:
+	trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
+	return ret;
+}
+
+static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr)
+{
+	u32 idx;
+
+	rtc_irq_eoi_tracking_reset(ioapic);
+	for_each_set_bit(idx, &irr, IOAPIC_NUM_PINS)
+		ioapic_set_irq(ioapic, idx, 1, true);
+
+	kvm_rtc_eoi_tracking_restore_all(ioapic);
 }
 
+
 static void update_handled_vectors(struct kvm_ioapic *ioapic)
 {
 	DECLARE_BITMAP(handled_vectors, 256);
@@ -282,12 +326,15 @@
 	}
 }
 
-static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status)
+static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
 {
 	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
 	struct kvm_lapic_irq irqe;
 	int ret;
 
+	if (entry->fields.mask)
+	    return -1;
+
 	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
 		     "vector=%x trig_mode=%x\n",
 		     entry->fields.dest_id, entry->fields.dest_mode,
@@ -302,6 +349,10 @@
 	irqe.level = 1;
 	irqe.shorthand = 0;
 
+	if (irqe.trig_mode == IOAPIC_EDGE_TRIG)
+	    ioapic->irr &= ~(1 << irq);
+
+
 	if (irq == RTC_GSI && line_status) {
 		BUG_ON(ioapic->rtc_status.pending_eoi != 0);
 		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
@@ -309,6 +360,8 @@
 		ioapic->rtc_status.pending_eoi = ret;
 	} else
 		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
+	if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG)
+	    entry->fields.remote_irr = 1;
 
 	return ret;
 }
@@ -316,39 +369,15 @@
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
 		       int level, bool line_status)
 {
-	u32 old_irr;
-	u32 mask = 1 << irq;
-	union kvm_ioapic_redirect_entry entry;
 	int ret, irq_level;
 
 	BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS);
 
 	spin_lock(&ioapic->lock);
-	old_irr = ioapic->irr;
 	irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq],
 					 irq_source_id, level);
-	entry = ioapic->redirtbl[irq];
-	irq_level ^= entry.fields.polarity;
-	if (!irq_level) {
-		ioapic->irr &= ~mask;
-		ret = 1;
-	} else {
-		int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
+	ret = ioapic_set_irq(ioapic, irq, irq_level, line_status);
 
-		if (irq == RTC_GSI && line_status &&
-			rtc_irq_check_coalesced(ioapic)) {
-			ret = 0; /* coalesced */
-			goto out;
-		}
-		ioapic->irr |= mask;
-		if ((edge && old_irr != ioapic->irr) ||
-		    (!edge && !entry.fields.remote_irr))
-			ret = ioapic_service(ioapic, irq, line_status);
-		else
-			ret = 0; /* report coalesced interrupt */
-	}
-out:
-	trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
 	spin_unlock(&ioapic->lock);
 
 	return ret;
@@ -394,7 +423,7 @@
 
 		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
 		ent->fields.remote_irr = 0;
-		if (!ent->fields.mask && (ioapic->irr & (1 << i)))
+		if (ioapic->irr & (1 << i))
 			ioapic_service(ioapic, i, false);
 	}
 }
@@ -595,9 +624,10 @@
 
 	spin_lock(&ioapic->lock);
 	memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
+	ioapic->irr = 0;
 	update_handled_vectors(ioapic);
 	kvm_vcpu_request_scan_ioapic(kvm);
-	kvm_rtc_eoi_tracking_restore_all(ioapic);
+	kvm_ioapic_inject_all(ioapic, state->irr);
 	spin_unlock(&ioapic->lock);
 	return 0;
 }

Reply via email to