When using posted interrupts and the guest migrates MSI from vCPUs Xen
needs to flush any pending PIRR vectors on the previous vCPU, or else
those vectors could get wrongly injected at a later point when the MSI
fields are already updated.

The usage of a fixed vCPU in lowest priority mode when using VT-d
posted interrupts is also removed, and as a result VT-d posted
interrupts are not used together with lowest priority mode and
multiple destinations. That forces vlapic_lowest_prio to be called in
order to select the destination vCPU during interrupt dispatch.

Note that PIRR is synced to IRR both in pt_irq_destroy_bind and
pt_irq_create_bind when the interrupt delivery data is being updated.

Reported-by: Joe Jin <joe....@oracle.com>
Signed-off-by: Roger Pau Monné <roger....@citrix.com>
---
Cc: Joe Jin <joe....@oracle.com>
Cc: Juergen Gross <jgr...@suse.com>
---
Changes since v3:
 - In multi-destination mode make sure all destination vCPUs have PIR
   synced to IRR by using a bitmap.
 - Drop the bogus selection of a fixed vCPU when using lowest priority
   mode.

Changes since v2:
 - Also sync PIRR with IRR when using CPU posted interrupts.
 - Force the selection of a specific vCPU when using posted interrupts
   for multi-dest.
 - Change vmsi_deliver_pirq to honor dest_vcpu_id.

Changes since v1:
 - Store the vcpu id also in multi-dest mode if the interrupt is bound
   to a vcpu for posted delivery.
 - s/#if/#ifdef/.
---
 xen/arch/x86/hvm/hvm.c           |  31 ++++++++
 xen/arch/x86/hvm/vlapic.c        |  19 +++++
 xen/arch/x86/hvm/vmsi.c          |  23 ------
 xen/drivers/passthrough/io.c     | 118 ++++++++++++++-----------------
 xen/include/asm-x86/hvm/hvm.h    |   5 +-
 xen/include/asm-x86/hvm/vlapic.h |   3 +
 6 files changed, 110 insertions(+), 89 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 06a7b40107..0e3379fa6f 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -43,6 +43,7 @@
 #include <asm/current.h>
 #include <asm/e820.h>
 #include <asm/io.h>
+#include <asm/io_apic.h>
 #include <asm/regs.h>
 #include <asm/cpufeature.h>
 #include <asm/processor.h>
@@ -5266,6 +5267,36 @@ void hvm_set_segment_register(struct vcpu *v, enum 
x86_segment seg,
     alternative_vcall(hvm_funcs.set_segment_register, v, seg, reg);
 }
 
+int hvm_intr_get_dests(struct domain *d, uint8_t dest, uint8_t dest_mode,
+                       uint8_t delivery_mode, unsigned long *vcpus)
+{
+    struct vcpu *v;
+
+    switch ( delivery_mode )
+    {
+    case dest_LowestPrio:
+        /*
+         * Get all the possible destinations, but note that lowest priority
+         * mode is only going to inject the interrupt to the vCPU running at
+         * the least privilege level.
+         *
+         * Fallthrough
+         */
+    case dest_Fixed:
+        for_each_vcpu ( d, v )
+            if ( vlapic_match_dest(vcpu_vlapic(v), NULL, 0, dest, dest_mode) )
+                __set_bit(v->vcpu_id, vcpus);
+        break;
+
+    default:
+        gprintk(XENLOG_WARNING, "unsupported interrupt delivery mode %u\n",
+                delivery_mode);
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
index 9466258d6f..9d9c6d391a 100644
--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -112,6 +112,25 @@ static void sync_pir_to_irr(struct vcpu *v)
         alternative_vcall(hvm_funcs.sync_pir_to_irr, v);
 }
 
+void domain_sync_vlapic_pir(struct domain *d, unsigned long *vcpus)
+{
+    unsigned int id;
+
+    if ( !bitmap_weight(vcpus, d->max_vcpus) )
+        return;
+
+    for ( id = find_first_bit(vcpus, d->max_vcpus);
+          id < d->max_vcpus;
+          id = find_next_bit(vcpus, d->max_vcpus, id + 1) )
+    {
+        if ( d->vcpu[id] != current )
+            vcpu_pause(d->vcpu[id]);
+        sync_pir_to_irr(d->vcpu[id]);
+        if ( d->vcpu[id] != current )
+            vcpu_unpause(d->vcpu[id]);
+    }
+}
+
 static int vlapic_find_highest_irr(struct vlapic *vlapic)
 {
     sync_pir_to_irr(vlapic_vcpu(vlapic));
diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c
index 6597d9f719..66891d7d20 100644
--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -121,29 +121,6 @@ void vmsi_deliver_pirq(struct domain *d, const struct 
hvm_pirq_dpci *pirq_dpci)
     vmsi_deliver(d, vector, dest, dest_mode, delivery_mode, trig_mode);
 }
 
-/* Return value, -1 : multi-dests, non-negative value: dest_vcpu_id */
-int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t dest_mode)
-{
-    int dest_vcpu_id = -1, w = 0;
-    struct vcpu *v;
-    
-    if ( d->max_vcpus == 1 )
-        return 0;
- 
-    for_each_vcpu ( d, v )
-    {
-        if ( vlapic_match_dest(vcpu_vlapic(v), NULL, 0, dest, dest_mode) ) 
-        {
-            w++;
-            dest_vcpu_id = v->vcpu_id;
-        }
-    }
-    if ( w > 1 )
-        return -1;
-
-    return dest_vcpu_id;
-}
-
 /* MSI-X mask bit hypervisor interception */
 struct msixtbl_entry
 {
diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index b292e79382..5289e89bc1 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -219,62 +219,6 @@ void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci)
     xfree(dpci);
 }
 
-/*
- * This routine handles lowest-priority interrupts using vector-hashing
- * mechanism. As an example, modern Intel CPUs use this method to handle
- * lowest-priority interrupts.
- *
- * Here is the details about the vector-hashing mechanism:
- * 1. For lowest-priority interrupts, store all the possible destination
- *    vCPUs in an array.
- * 2. Use "gvec % max number of destination vCPUs" to find the right
- *    destination vCPU in the array for the lowest-priority interrupt.
- */
-static struct vcpu *vector_hashing_dest(const struct domain *d,
-                                        uint32_t dest_id,
-                                        bool dest_mode,
-                                        uint8_t gvec)
-
-{
-    unsigned long *dest_vcpu_bitmap;
-    unsigned int dest_vcpus = 0;
-    struct vcpu *v, *dest = NULL;
-    unsigned int i;
-
-    dest_vcpu_bitmap = xzalloc_array(unsigned long,
-                                     BITS_TO_LONGS(d->max_vcpus));
-    if ( !dest_vcpu_bitmap )
-        return NULL;
-
-    for_each_vcpu ( d, v )
-    {
-        if ( !vlapic_match_dest(vcpu_vlapic(v), NULL, APIC_DEST_NOSHORT,
-                                dest_id, dest_mode) )
-            continue;
-
-        __set_bit(v->vcpu_id, dest_vcpu_bitmap);
-        dest_vcpus++;
-    }
-
-    if ( dest_vcpus != 0 )
-    {
-        unsigned int mod = gvec % dest_vcpus;
-        unsigned int idx = 0;
-
-        for ( i = 0; i <= mod; i++ )
-        {
-            idx = find_next_bit(dest_vcpu_bitmap, d->max_vcpus, idx) + 1;
-            BUG_ON(idx > d->max_vcpus);
-        }
-
-        dest = d->vcpu[idx - 1];
-    }
-
-    xfree(dest_vcpu_bitmap);
-
-    return dest;
-}
-
 int pt_irq_create_bind(
     struct domain *d, const struct xen_domctl_bind_pt_irq *pt_irq_bind)
 {
@@ -345,6 +289,8 @@ int pt_irq_create_bind(
         const struct vcpu *vcpu;
         uint32_t gflags = pt_irq_bind->u.msi.gflags &
                           ~XEN_DOMCTL_VMSI_X86_UNMASKED;
+        DECLARE_BITMAP(dest_vcpus, MAX_VIRT_CPUS) = { };
+        DECLARE_BITMAP(prev_vcpus, MAX_VIRT_CPUS) = { };
 
         if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
         {
@@ -411,6 +357,24 @@ int pt_irq_create_bind(
 
                 pirq_dpci->gmsi.gvec = pt_irq_bind->u.msi.gvec;
                 pirq_dpci->gmsi.gflags = gflags;
+                if ( pirq_dpci->gmsi.dest_vcpu_id != -1 )
+                    __set_bit(pirq_dpci->gmsi.dest_vcpu_id, prev_vcpus);
+                else
+                {
+                    /*
+                     * If previous configuration has multiple possible
+                     * destinations record them in order to sync the PIR to IRR
+                     * afterwards.
+                     */
+                    dest = MASK_EXTR(pirq_dpci->gmsi.gflags,
+                                     XEN_DOMCTL_VMSI_X86_DEST_ID_MASK);
+                    dest_mode = pirq_dpci->gmsi.gflags &
+                                XEN_DOMCTL_VMSI_X86_DM_MASK;
+                    delivery_mode = MASK_EXTR(pirq_dpci->gmsi.gflags,
+                                              XEN_DOMCTL_VMSI_X86_DELIV_MASK);
+                    hvm_intr_get_dests(d, dest, dest_mode, delivery_mode,
+                                       prev_vcpus);
+                }
             }
         }
         /* Calculate dest_vcpu_id for MSI-type pirq migration. */
@@ -420,20 +384,16 @@ int pt_irq_create_bind(
         delivery_mode = MASK_EXTR(pirq_dpci->gmsi.gflags,
                                   XEN_DOMCTL_VMSI_X86_DELIV_MASK);
 
-        dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode);
+        hvm_intr_get_dests(d, dest, dest_mode, delivery_mode, dest_vcpus);
+        dest_vcpu_id = bitmap_weight(dest_vcpus, d->max_vcpus) != 1 ?
+            -1 : find_first_bit(dest_vcpus, d->max_vcpus);
         pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id;
         spin_unlock(&d->event_lock);
 
         pirq_dpci->gmsi.posted = false;
         vcpu = (dest_vcpu_id >= 0) ? d->vcpu[dest_vcpu_id] : NULL;
-        if ( iommu_intpost )
-        {
-            if ( delivery_mode == dest_LowestPrio )
-                vcpu = vector_hashing_dest(d, dest, dest_mode,
-                                           pirq_dpci->gmsi.gvec);
-            if ( vcpu )
-                pirq_dpci->gmsi.posted = true;
-        }
+        if ( vcpu && iommu_intpost )
+            pirq_dpci->gmsi.posted = true;
         if ( vcpu && is_iommu_enabled(d) )
             hvm_migrate_pirq(pirq_dpci, vcpu);
 
@@ -442,6 +402,9 @@ int pt_irq_create_bind(
             pi_update_irte(vcpu ? &vcpu->arch.hvm.vmx.pi_desc : NULL,
                            info, pirq_dpci->gmsi.gvec);
 
+        if ( hvm_funcs.deliver_posted_intr )
+            domain_sync_vlapic_pir(d, prev_vcpus);
+
         if ( pt_irq_bind->u.msi.gflags & XEN_DOMCTL_VMSI_X86_UNMASKED )
         {
             unsigned long flags;
@@ -731,6 +694,31 @@ int pt_irq_destroy_bind(
     else if ( pirq_dpci && pirq_dpci->gmsi.posted )
         pi_update_irte(NULL, pirq, 0);
 
+    if ( hvm_funcs.deliver_posted_intr )
+    {
+        DECLARE_BITMAP(vcpus, MAX_VIRT_CPUS) = { };
+
+        if ( pirq_dpci->gmsi.dest_vcpu_id != -1 )
+            __set_bit(pirq_dpci->gmsi.dest_vcpu_id, vcpus);
+        else
+        {
+            /*
+             * If previous configuration has multiple possible
+             * destinations record them in order to sync the PIR to IRR.
+             */
+            uint8_t dest = MASK_EXTR(pirq_dpci->gmsi.gflags,
+                                     XEN_DOMCTL_VMSI_X86_DEST_ID_MASK);
+            uint8_t dest_mode = pirq_dpci->gmsi.gflags &
+                                XEN_DOMCTL_VMSI_X86_DM_MASK;
+            uint8_t delivery_mode = MASK_EXTR(pirq_dpci->gmsi.gflags,
+                                              XEN_DOMCTL_VMSI_X86_DELIV_MASK);
+
+            hvm_intr_get_dests(d, dest, dest_mode, delivery_mode, vcpus);
+        }
+
+        domain_sync_vlapic_pir(d, vcpus);
+    }
+
     if ( pirq_dpci && (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) &&
          list_empty(&pirq_dpci->digl_list) )
     {
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index f86af09898..899665fed8 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -266,7 +266,6 @@ int vmsi_deliver(
     uint8_t delivery_mode, uint8_t trig_mode);
 struct hvm_pirq_dpci;
 void vmsi_deliver_pirq(struct domain *d, const struct hvm_pirq_dpci *);
-int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t dest_mode);
 
 enum hvm_intblk
 hvm_interrupt_blocked(struct vcpu *v, struct hvm_intack intack);
@@ -336,6 +335,10 @@ unsigned long hvm_cr4_guest_valid_bits(const struct domain 
*d, bool restore);
 bool hvm_flush_vcpu_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
                         void *ctxt);
 
+/* Get all the possible destination vCPUs of an interrupt. */
+int hvm_intr_get_dests(struct domain *d, uint8_t dest, uint8_t dest_mode,
+                       uint8_t delivery_mode, unsigned long *vcpus);
+
 #ifdef CONFIG_HVM
 
 #define hvm_get_guest_tsc(v) hvm_get_guest_tsc_fixed(v, 0)
diff --git a/xen/include/asm-x86/hvm/vlapic.h b/xen/include/asm-x86/hvm/vlapic.h
index dde66b4f0f..2bc2ebadf0 100644
--- a/xen/include/asm-x86/hvm/vlapic.h
+++ b/xen/include/asm-x86/hvm/vlapic.h
@@ -150,4 +150,7 @@ bool_t vlapic_match_dest(
     const struct vlapic *target, const struct vlapic *source,
     int short_hand, uint32_t dest, bool_t dest_mode);
 
+/* Sync the PIR to IRR of all vlapics in the vcpus bitmap. */
+void domain_sync_vlapic_pir(struct domain *d, unsigned long *vcpus);
+
 #endif /* __ASM_X86_HVM_VLAPIC_H__ */
-- 
2.24.0


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Reply via email to