When using posted interrupts and the guest migrates MSI from vCPUs Xen needs to flush any pending PIRR vectors on the previous vCPU, or else those vectors could get wrongly injected at a later point when the MSI fields are already updated.
The usage of a fixed vCPU in lowest priority mode when using VT-d posted interrupts is also removed, and as a result VT-d posted interrupts are not used together with lowest priority mode and multiple destinations. That forces vlapic_lowest_prio to be called in order to select the destination vCPU during interrupt dispatch. Note that PIRR is synced to IRR both in pt_irq_destroy_bind and pt_irq_create_bind when the interrupt delivery data is being updated. Reported-by: Joe Jin <joe....@oracle.com> Signed-off-by: Roger Pau Monné <roger....@citrix.com> --- Cc: Joe Jin <joe....@oracle.com> Cc: Juergen Gross <jgr...@suse.com> --- Changes since v3: - In multi-destination mode make sure all destination vCPUs have PIR synced to IRR by using a bitmap. - Drop the bogus selection of a fixed vCPU when using lowest priority mode. Changes since v2: - Also sync PIRR with IRR when using CPU posted interrupts. - Force the selection of a specific vCPU when using posted interrupts for multi-dest. - Change vmsi_deliver_pirq to honor dest_vcpu_id. Changes since v1: - Store the vcpu id also in multi-dest mode if the interrupt is bound to a vcpu for posted delivery. - s/#if/#ifdef/. --- xen/arch/x86/hvm/hvm.c | 31 ++++++++ xen/arch/x86/hvm/vlapic.c | 19 +++++ xen/arch/x86/hvm/vmsi.c | 23 ------ xen/drivers/passthrough/io.c | 118 ++++++++++++++----------------- xen/include/asm-x86/hvm/hvm.h | 5 +- xen/include/asm-x86/hvm/vlapic.h | 3 + 6 files changed, 110 insertions(+), 89 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 06a7b40107..0e3379fa6f 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -43,6 +43,7 @@ #include <asm/current.h> #include <asm/e820.h> #include <asm/io.h> +#include <asm/io_apic.h> #include <asm/regs.h> #include <asm/cpufeature.h> #include <asm/processor.h> @@ -5266,6 +5267,36 @@ void hvm_set_segment_register(struct vcpu *v, enum x86_segment seg, alternative_vcall(hvm_funcs.set_segment_register, v, seg, reg); } +int hvm_intr_get_dests(struct domain *d, uint8_t dest, uint8_t dest_mode, + uint8_t delivery_mode, unsigned long *vcpus) +{ + struct vcpu *v; + + switch ( delivery_mode ) + { + case dest_LowestPrio: + /* + * Get all the possible destinations, but note that lowest priority + * mode is only going to inject the interrupt to the vCPU running at + * the least privilege level. + * + * Fallthrough + */ + case dest_Fixed: + for_each_vcpu ( d, v ) + if ( vlapic_match_dest(vcpu_vlapic(v), NULL, 0, dest, dest_mode) ) + __set_bit(v->vcpu_id, vcpus); + break; + + default: + gprintk(XENLOG_WARNING, "unsupported interrupt delivery mode %u\n", + delivery_mode); + return -EINVAL; + } + + return 0; +} + /* * Local variables: * mode: C diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c index 9466258d6f..9d9c6d391a 100644 --- a/xen/arch/x86/hvm/vlapic.c +++ b/xen/arch/x86/hvm/vlapic.c @@ -112,6 +112,25 @@ static void sync_pir_to_irr(struct vcpu *v) alternative_vcall(hvm_funcs.sync_pir_to_irr, v); } +void domain_sync_vlapic_pir(struct domain *d, unsigned long *vcpus) +{ + unsigned int id; + + if ( !bitmap_weight(vcpus, d->max_vcpus) ) + return; + + for ( id = find_first_bit(vcpus, d->max_vcpus); + id < d->max_vcpus; + id = find_next_bit(vcpus, d->max_vcpus, id + 1) ) + { + if ( d->vcpu[id] != current ) + vcpu_pause(d->vcpu[id]); + sync_pir_to_irr(d->vcpu[id]); + if ( d->vcpu[id] != current ) + vcpu_unpause(d->vcpu[id]); + } +} + static int vlapic_find_highest_irr(struct vlapic *vlapic) { sync_pir_to_irr(vlapic_vcpu(vlapic)); diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c index 6597d9f719..66891d7d20 100644 --- a/xen/arch/x86/hvm/vmsi.c +++ b/xen/arch/x86/hvm/vmsi.c @@ -121,29 +121,6 @@ void vmsi_deliver_pirq(struct domain *d, const struct hvm_pirq_dpci *pirq_dpci) vmsi_deliver(d, vector, dest, dest_mode, delivery_mode, trig_mode); } -/* Return value, -1 : multi-dests, non-negative value: dest_vcpu_id */ -int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t dest_mode) -{ - int dest_vcpu_id = -1, w = 0; - struct vcpu *v; - - if ( d->max_vcpus == 1 ) - return 0; - - for_each_vcpu ( d, v ) - { - if ( vlapic_match_dest(vcpu_vlapic(v), NULL, 0, dest, dest_mode) ) - { - w++; - dest_vcpu_id = v->vcpu_id; - } - } - if ( w > 1 ) - return -1; - - return dest_vcpu_id; -} - /* MSI-X mask bit hypervisor interception */ struct msixtbl_entry { diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c index b292e79382..5289e89bc1 100644 --- a/xen/drivers/passthrough/io.c +++ b/xen/drivers/passthrough/io.c @@ -219,62 +219,6 @@ void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci) xfree(dpci); } -/* - * This routine handles lowest-priority interrupts using vector-hashing - * mechanism. As an example, modern Intel CPUs use this method to handle - * lowest-priority interrupts. - * - * Here is the details about the vector-hashing mechanism: - * 1. For lowest-priority interrupts, store all the possible destination - * vCPUs in an array. - * 2. Use "gvec % max number of destination vCPUs" to find the right - * destination vCPU in the array for the lowest-priority interrupt. - */ -static struct vcpu *vector_hashing_dest(const struct domain *d, - uint32_t dest_id, - bool dest_mode, - uint8_t gvec) - -{ - unsigned long *dest_vcpu_bitmap; - unsigned int dest_vcpus = 0; - struct vcpu *v, *dest = NULL; - unsigned int i; - - dest_vcpu_bitmap = xzalloc_array(unsigned long, - BITS_TO_LONGS(d->max_vcpus)); - if ( !dest_vcpu_bitmap ) - return NULL; - - for_each_vcpu ( d, v ) - { - if ( !vlapic_match_dest(vcpu_vlapic(v), NULL, APIC_DEST_NOSHORT, - dest_id, dest_mode) ) - continue; - - __set_bit(v->vcpu_id, dest_vcpu_bitmap); - dest_vcpus++; - } - - if ( dest_vcpus != 0 ) - { - unsigned int mod = gvec % dest_vcpus; - unsigned int idx = 0; - - for ( i = 0; i <= mod; i++ ) - { - idx = find_next_bit(dest_vcpu_bitmap, d->max_vcpus, idx) + 1; - BUG_ON(idx > d->max_vcpus); - } - - dest = d->vcpu[idx - 1]; - } - - xfree(dest_vcpu_bitmap); - - return dest; -} - int pt_irq_create_bind( struct domain *d, const struct xen_domctl_bind_pt_irq *pt_irq_bind) { @@ -345,6 +289,8 @@ int pt_irq_create_bind( const struct vcpu *vcpu; uint32_t gflags = pt_irq_bind->u.msi.gflags & ~XEN_DOMCTL_VMSI_X86_UNMASKED; + DECLARE_BITMAP(dest_vcpus, MAX_VIRT_CPUS) = { }; + DECLARE_BITMAP(prev_vcpus, MAX_VIRT_CPUS) = { }; if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) ) { @@ -411,6 +357,24 @@ int pt_irq_create_bind( pirq_dpci->gmsi.gvec = pt_irq_bind->u.msi.gvec; pirq_dpci->gmsi.gflags = gflags; + if ( pirq_dpci->gmsi.dest_vcpu_id != -1 ) + __set_bit(pirq_dpci->gmsi.dest_vcpu_id, prev_vcpus); + else + { + /* + * If previous configuration has multiple possible + * destinations record them in order to sync the PIR to IRR + * afterwards. + */ + dest = MASK_EXTR(pirq_dpci->gmsi.gflags, + XEN_DOMCTL_VMSI_X86_DEST_ID_MASK); + dest_mode = pirq_dpci->gmsi.gflags & + XEN_DOMCTL_VMSI_X86_DM_MASK; + delivery_mode = MASK_EXTR(pirq_dpci->gmsi.gflags, + XEN_DOMCTL_VMSI_X86_DELIV_MASK); + hvm_intr_get_dests(d, dest, dest_mode, delivery_mode, + prev_vcpus); + } } } /* Calculate dest_vcpu_id for MSI-type pirq migration. */ @@ -420,20 +384,16 @@ int pt_irq_create_bind( delivery_mode = MASK_EXTR(pirq_dpci->gmsi.gflags, XEN_DOMCTL_VMSI_X86_DELIV_MASK); - dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode); + hvm_intr_get_dests(d, dest, dest_mode, delivery_mode, dest_vcpus); + dest_vcpu_id = bitmap_weight(dest_vcpus, d->max_vcpus) != 1 ? + -1 : find_first_bit(dest_vcpus, d->max_vcpus); pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id; spin_unlock(&d->event_lock); pirq_dpci->gmsi.posted = false; vcpu = (dest_vcpu_id >= 0) ? d->vcpu[dest_vcpu_id] : NULL; - if ( iommu_intpost ) - { - if ( delivery_mode == dest_LowestPrio ) - vcpu = vector_hashing_dest(d, dest, dest_mode, - pirq_dpci->gmsi.gvec); - if ( vcpu ) - pirq_dpci->gmsi.posted = true; - } + if ( vcpu && iommu_intpost ) + pirq_dpci->gmsi.posted = true; if ( vcpu && is_iommu_enabled(d) ) hvm_migrate_pirq(pirq_dpci, vcpu); @@ -442,6 +402,9 @@ int pt_irq_create_bind( pi_update_irte(vcpu ? &vcpu->arch.hvm.vmx.pi_desc : NULL, info, pirq_dpci->gmsi.gvec); + if ( hvm_funcs.deliver_posted_intr ) + domain_sync_vlapic_pir(d, prev_vcpus); + if ( pt_irq_bind->u.msi.gflags & XEN_DOMCTL_VMSI_X86_UNMASKED ) { unsigned long flags; @@ -731,6 +694,31 @@ int pt_irq_destroy_bind( else if ( pirq_dpci && pirq_dpci->gmsi.posted ) pi_update_irte(NULL, pirq, 0); + if ( hvm_funcs.deliver_posted_intr ) + { + DECLARE_BITMAP(vcpus, MAX_VIRT_CPUS) = { }; + + if ( pirq_dpci->gmsi.dest_vcpu_id != -1 ) + __set_bit(pirq_dpci->gmsi.dest_vcpu_id, vcpus); + else + { + /* + * If previous configuration has multiple possible + * destinations record them in order to sync the PIR to IRR. + */ + uint8_t dest = MASK_EXTR(pirq_dpci->gmsi.gflags, + XEN_DOMCTL_VMSI_X86_DEST_ID_MASK); + uint8_t dest_mode = pirq_dpci->gmsi.gflags & + XEN_DOMCTL_VMSI_X86_DM_MASK; + uint8_t delivery_mode = MASK_EXTR(pirq_dpci->gmsi.gflags, + XEN_DOMCTL_VMSI_X86_DELIV_MASK); + + hvm_intr_get_dests(d, dest, dest_mode, delivery_mode, vcpus); + } + + domain_sync_vlapic_pir(d, vcpus); + } + if ( pirq_dpci && (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) && list_empty(&pirq_dpci->digl_list) ) { diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h index f86af09898..899665fed8 100644 --- a/xen/include/asm-x86/hvm/hvm.h +++ b/xen/include/asm-x86/hvm/hvm.h @@ -266,7 +266,6 @@ int vmsi_deliver( uint8_t delivery_mode, uint8_t trig_mode); struct hvm_pirq_dpci; void vmsi_deliver_pirq(struct domain *d, const struct hvm_pirq_dpci *); -int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t dest_mode); enum hvm_intblk hvm_interrupt_blocked(struct vcpu *v, struct hvm_intack intack); @@ -336,6 +335,10 @@ unsigned long hvm_cr4_guest_valid_bits(const struct domain *d, bool restore); bool hvm_flush_vcpu_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v), void *ctxt); +/* Get all the possible destination vCPUs of an interrupt. */ +int hvm_intr_get_dests(struct domain *d, uint8_t dest, uint8_t dest_mode, + uint8_t delivery_mode, unsigned long *vcpus); + #ifdef CONFIG_HVM #define hvm_get_guest_tsc(v) hvm_get_guest_tsc_fixed(v, 0) diff --git a/xen/include/asm-x86/hvm/vlapic.h b/xen/include/asm-x86/hvm/vlapic.h index dde66b4f0f..2bc2ebadf0 100644 --- a/xen/include/asm-x86/hvm/vlapic.h +++ b/xen/include/asm-x86/hvm/vlapic.h @@ -150,4 +150,7 @@ bool_t vlapic_match_dest( const struct vlapic *target, const struct vlapic *source, int short_hand, uint32_t dest, bool_t dest_mode); +/* Sync the PIR to IRR of all vlapics in the vcpus bitmap. */ +void domain_sync_vlapic_pir(struct domain *d, unsigned long *vcpus); + #endif /* __ASM_X86_HVM_VLAPIC_H__ */ -- 2.24.0 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel