On April 07, 2017 11:24 AM, Chao Gao wrote: >When injecting periodic timer interrupt in vmx_intr_assist(), multiple read >operation is operated during one event delivery and incur to inconsistent >views of vIOAPIC. For example, if a periodic timer interrupt is from PIT, when >set the corresponding bit in vIRR, the corresponding RTE is accessed in >pt_update_irq(). When this function returns, it accesses the RTE again to get >the vector it set in vIRR. Between the two accesses, the content of RTE may >have been changed by another CPU for no protection method in use. This case >can incur the assertion failure in vmx_intr_assist(). Also after a periodic >timer interrupt is injected successfully, pt_irq_posted() will decrease the >count (pending_intr_nr). But if the corresponding RTE has been changed, >pt_irq_posted() will not decrease the count, resulting one more timer >interrupt. > >More discussion and history can be found in >1.https://lists.xenproject.org/archives/html/xen-devel/2017-03/msg00906.ht >ml >2.https://lists.xenproject.org/archives/html/xen-devel/2017-01/msg01019.ht >ml > >This patch simply uses irq_lock to avoid these inconsistent views of vIOAPIC.
To vLAPIC, is it in a similar situation? >To fix the deadlock, provide locked version of several functions. > >Signed-off-by: Chao Gao <chao....@intel.com> >--- > xen/arch/x86/hvm/irq.c | 22 ++++++++++++++++------ > xen/arch/x86/hvm/svm/intr.c | 21 +++++++++++++++------ >xen/arch/x86/hvm/vmx/intr.c | 9 +++++++++ > xen/arch/x86/hvm/vpic.c | 20 ++++++++++++++------ > xen/arch/x86/hvm/vpt.c | 4 ++-- > xen/include/xen/hvm/irq.h | 4 ++++ > 6 files changed, 60 insertions(+), 20 deletions(-) > >diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c index >8625584..a1af61e 100644 >--- a/xen/arch/x86/hvm/irq.c >+++ b/xen/arch/x86/hvm/irq.c >@@ -126,37 +126,47 @@ void hvm_pci_intx_deassert( > spin_unlock(&d->arch.hvm_domain.irq_lock); > } > >-void hvm_isa_irq_assert( >+void hvm_isa_irq_assert_locked( > struct domain *d, unsigned int isa_irq) { > struct hvm_irq *hvm_irq = hvm_domain_irq(d); > unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq); > > ASSERT(isa_irq <= 15); >- >- spin_lock(&d->arch.hvm_domain.irq_lock); >+ ASSERT(spin_is_locked(&d->arch.hvm_domain.irq_lock)); > > if ( !__test_and_set_bit(isa_irq, &hvm_irq->isa_irq.i) && > (hvm_irq->gsi_assert_count[gsi]++ == 0) ) > assert_irq(d, gsi, isa_irq); >+} > >+void hvm_isa_irq_assert( >+ struct domain *d, unsigned int isa_irq) { >+ spin_lock(&d->arch.hvm_domain.irq_lock); >+ hvm_isa_irq_assert_locked(d, isa_irq); > spin_unlock(&d->arch.hvm_domain.irq_lock); > } > >-void hvm_isa_irq_deassert( >+void hvm_isa_irq_deassert_locked( > struct domain *d, unsigned int isa_irq) { > struct hvm_irq *hvm_irq = hvm_domain_irq(d); > unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq); > > ASSERT(isa_irq <= 15); >- >- spin_lock(&d->arch.hvm_domain.irq_lock); >+ ASSERT(spin_is_locked(&d->arch.hvm_domain.irq_lock)); > > if ( __test_and_clear_bit(isa_irq, &hvm_irq->isa_irq.i) && > (--hvm_irq->gsi_assert_count[gsi] == 0) ) > deassert_irq(d, isa_irq); >+} > >+void hvm_isa_irq_deassert( >+ struct domain *d, unsigned int isa_irq) { >+ spin_lock(&d->arch.hvm_domain.irq_lock); >+ hvm_isa_irq_deassert_locked(d, isa_irq); > spin_unlock(&d->arch.hvm_domain.irq_lock); > } > >diff --git a/xen/arch/x86/hvm/svm/intr.c b/xen/arch/x86/hvm/svm/intr.c >index 8511ff0..d2a318c 100644 >--- a/xen/arch/x86/hvm/svm/intr.c >+++ b/xen/arch/x86/hvm/svm/intr.c >@@ -137,18 +137,25 @@ void svm_intr_assist(void) > struct hvm_intack intack; > enum hvm_intblk intblk; > >+ /* >+ * Avoid the vIOAPIC RTE being changed by another vCPU. >+ * Otherwise, pt_update_irq() may return a wrong vector which is not >in >+ * vIRR and pt_irq_posted() may not recognize a timer interrupt has >been >+ * injected. >+ */ >+ spin_lock(&v->domain->arch.hvm_domain.irq_lock); > /* Crank the handle on interrupt state. */ > pt_update_irq(v); > > do { > intack = hvm_vcpu_has_pending_irq(v); > if ( likely(intack.source == hvm_intsrc_none) ) >- return; >+ goto out; > > intblk = hvm_interrupt_blocked(v, intack); > if ( intblk == hvm_intblk_svm_gif ) { > ASSERT(nestedhvm_enabled(v->domain)); >- return; >+ goto out; > } > > /* Interrupts for the nested guest are already @@ -165,13 +172,13 >@@ void svm_intr_assist(void) > switch (rc) { > case NSVM_INTR_NOTINTERCEPTED: > /* Inject interrupt into 2nd level guest directly. */ >- break; >+ goto out; > case NSVM_INTR_NOTHANDLED: > case NSVM_INTR_FORCEVMEXIT: >- return; >+ goto out; > case NSVM_INTR_MASKED: > /* Guest already enabled an interrupt window. */ >- return; >+ goto out; > default: > panic("%s: nestedsvm_vcpu_interrupt can't handle >value %#x", > __func__, rc); >@@ -195,7 +202,7 @@ void svm_intr_assist(void) > if ( unlikely(vmcb->eventinj.fields.v) || intblk ) > { > svm_enable_intr_window(v, intack); >- return; >+ goto out; > } > > intack = hvm_vcpu_ack_pending_irq(v, intack); @@ -216,6 +223,8 >@@ void svm_intr_assist(void) > intack = hvm_vcpu_has_pending_irq(v); > if ( unlikely(intack.source != hvm_intsrc_none) ) > svm_enable_intr_window(v, intack); >+ out: >+ spin_unlock(&v->domain->arch.hvm_domain.irq_lock); > } > > /* >diff --git a/xen/arch/x86/hvm/vmx/intr.c b/xen/arch/x86/hvm/vmx/intr.c >index 1eb9f38..8f3d1a7 100644 >--- a/xen/arch/x86/hvm/vmx/intr.c >+++ b/xen/arch/x86/hvm/vmx/intr.c >@@ -234,6 +234,14 @@ void vmx_intr_assist(void) > return; > } > >+ /* >+ * Avoid the vIOAPIC RTE being changed by another vCPU. This comment seems inconsistent with the patch description, which said ' by another CPU ', instead of 'by another vCPU'.. -Quan >+ * Otherwise, pt_update_irq() may return a wrong vector which is not >in >+ * vIRR and pt_irq_posted() may not recognize a timer interrupt has >been >+ * injected. >+ */ >+ spin_lock(&v->domain->arch.hvm_domain.irq_lock); >+ > /* Crank the handle on interrupt state. */ > if ( is_hvm_vcpu(v) ) > pt_vector = pt_update_irq(v); >@@ -396,6 +404,7 @@ void vmx_intr_assist(void) > } > > out: >+ spin_unlock(&v->domain->arch.hvm_domain.irq_lock); > if ( !nestedhvm_vcpu_in_guestmode(v) && > !cpu_has_vmx_virtual_intr_delivery && > cpu_has_vmx_tpr_shadow ) >diff --git a/xen/arch/x86/hvm/vpic.c b/xen/arch/x86/hvm/vpic.c index >e160bbd..8d02e52 100644 >--- a/xen/arch/x86/hvm/vpic.c >+++ b/xen/arch/x86/hvm/vpic.c >@@ -153,14 +153,13 @@ static void __vpic_intack(struct hvm_hw_vpic >*vpic, int irq) > vpic_update_int_output(vpic); > } > >-static int vpic_intack(struct hvm_hw_vpic *vpic) >+static int vpic_intack_locked(struct hvm_hw_vpic *vpic) > { > int irq = -1; > >- vpic_lock(vpic); >- >+ ASSERT(vpic_is_locked(vpic)); > if ( !vpic->int_output ) >- goto out; >+ return irq; > > irq = vpic_get_highest_priority_irq(vpic); > BUG_ON(irq < 0); >@@ -175,7 +174,15 @@ static int vpic_intack(struct hvm_hw_vpic *vpic) > irq += 8; > } > >- out: >+ return irq; >+} >+ >+static int vpic_intack(struct hvm_hw_vpic *vpic) { >+ int irq; >+ >+ vpic_lock(vpic); >+ irq = vpic_intack_locked(vpic); > vpic_unlock(vpic); > return irq; > } >@@ -487,13 +494,14 @@ int vpic_ack_pending_irq(struct vcpu *v) > struct hvm_hw_vpic *vpic = &v->domain->arch.hvm_domain.vpic[0]; > > ASSERT(has_vpic(v->domain)); >+ ASSERT(vpic_is_locked(vpic)); > > TRACE_2D(TRC_HVM_EMUL_PIC_PEND_IRQ_CALL, >vlapic_accept_pic_intr(v), > vpic->int_output); > if ( !vlapic_accept_pic_intr(v) || !vpic->int_output ) > return -1; > >- irq = vpic_intack(vpic); >+ irq = vpic_intack_locked(vpic); > if ( irq == -1 ) > return -1; > >diff --git a/xen/arch/x86/hvm/vpt.c b/xen/arch/x86/hvm/vpt.c index >e3f2039..d048f26 100644 >--- a/xen/arch/x86/hvm/vpt.c >+++ b/xen/arch/x86/hvm/vpt.c >@@ -296,8 +296,8 @@ int pt_update_irq(struct vcpu *v) > vlapic_set_irq(vcpu_vlapic(v), irq, 0); > else > { >- hvm_isa_irq_deassert(v->domain, irq); >- hvm_isa_irq_assert(v->domain, irq); >+ hvm_isa_irq_deassert_locked(v->domain, irq); >+ hvm_isa_irq_assert_locked(v->domain, irq); > } > > /* >diff --git a/xen/include/xen/hvm/irq.h b/xen/include/xen/hvm/irq.h index >f041252..a8f36f8 100644 >--- a/xen/include/xen/hvm/irq.h >+++ b/xen/include/xen/hvm/irq.h >@@ -119,8 +119,12 @@ void hvm_pci_intx_deassert( > /* Modify state of an ISA device's IRQ wire. */ void hvm_isa_irq_assert( > struct domain *d, unsigned int isa_irq); >+void hvm_isa_irq_assert_locked( >+ struct domain *d, unsigned int isa_irq); > void hvm_isa_irq_deassert( > struct domain *d, unsigned int isa_irq); >+void hvm_isa_irq_deassert_locked( >+ struct domain *d, unsigned int isa_irq); > > int hvm_set_pci_link_route(struct domain *d, u8 link, u8 isa_irq); > >-- >1.8.3.1 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org https://lists.xen.org/xen-devel