[Xen-devel] [v3 09/15] vt-d: Extend struct iremap_entry to support VT-d Posted-Interrupts

2015-06-23 Thread Feng Wu
Extend struct iremap_entry according to VT-d Posted-Interrupts Spec.

Signed-off-by: Feng Wu 
---
v3:
- Use u32 instead of u64 to define the bitfields in 'struct iremap_entry'
- Limit using bitfield if possible

 xen/drivers/passthrough/vtd/intremap.c | 92 +-
 xen/drivers/passthrough/vtd/iommu.h| 42 ++--
 xen/drivers/passthrough/vtd/utils.c| 10 ++--
 3 files changed, 80 insertions(+), 64 deletions(-)

diff --git a/xen/drivers/passthrough/vtd/intremap.c 
b/xen/drivers/passthrough/vtd/intremap.c
index 0333686..b7a42f6 100644
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -123,9 +123,9 @@ static u16 hpetid_to_bdf(unsigned int hpet_id)
 static void set_ire_sid(struct iremap_entry *ire,
 unsigned int svt, unsigned int sq, unsigned int sid)
 {
-ire->hi.svt = svt;
-ire->hi.sq = sq;
-ire->hi.sid = sid;
+ire->remap.svt = svt;
+ire->remap.sq = sq;
+ire->remap.sid = sid;
 }
 
 static void set_ioapic_source_id(int apic_id, struct iremap_entry *ire)
@@ -220,7 +220,7 @@ static unsigned int alloc_remap_entry(struct iommu *iommu, 
unsigned int nr)
 else
 p = &iremap_entries[i % (1 << IREMAP_ENTRY_ORDER)];
 
-if ( p->lo_val || p->hi_val ) /* not a free entry */
+if ( p->lo || p->hi ) /* not a free entry */
 found = 0;
 else if ( ++found == nr )
 break;
@@ -254,7 +254,7 @@ static int remap_entry_to_ioapic_rte(
 GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
  iremap_entries, iremap_entry);
 
-if ( iremap_entry->hi_val == 0 && iremap_entry->lo_val == 0 )
+if ( iremap_entry->hi == 0 && iremap_entry->lo == 0 )
 {
 dprintk(XENLOG_ERR VTDPREFIX,
 "%s: index (%d) get an empty entry!\n",
@@ -264,13 +264,13 @@ static int remap_entry_to_ioapic_rte(
 return -EFAULT;
 }
 
-old_rte->vector = iremap_entry->lo.vector;
-old_rte->delivery_mode = iremap_entry->lo.dlm;
-old_rte->dest_mode = iremap_entry->lo.dm;
-old_rte->trigger = iremap_entry->lo.tm;
+old_rte->vector = iremap_entry->remap.vector;
+old_rte->delivery_mode = iremap_entry->remap.dlm;
+old_rte->dest_mode = iremap_entry->remap.dm;
+old_rte->trigger = iremap_entry->remap.tm;
 old_rte->__reserved_2 = 0;
 old_rte->dest.logical.__reserved_1 = 0;
-old_rte->dest.logical.logical_dest = iremap_entry->lo.dst >> 8;
+old_rte->dest.logical.logical_dest = iremap_entry->remap.dst >> 8;
 
 unmap_vtd_domain_page(iremap_entries);
 spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
@@ -318,27 +318,28 @@ static int ioapic_rte_to_remap_entry(struct iommu *iommu,
 if ( rte_upper )
 {
 if ( x2apic_enabled )
-new_ire.lo.dst = value;
+new_ire.remap.dst = value;
 else
-new_ire.lo.dst = (value >> 24) << 8;
+new_ire.remap.dst = (value >> 24) << 8;
 }
 else
 {
 *(((u32 *)&new_rte) + 0) = value;
-new_ire.lo.fpd = 0;
-new_ire.lo.dm = new_rte.dest_mode;
-new_ire.lo.tm = new_rte.trigger;
-new_ire.lo.dlm = new_rte.delivery_mode;
+new_ire.remap.fpd = 0;
+new_ire.remap.dm = new_rte.dest_mode;
+new_ire.remap.tm = new_rte.trigger;
+new_ire.remap.dlm = new_rte.delivery_mode;
 /* Hardware require RH = 1 for LPR delivery mode */
-new_ire.lo.rh = (new_ire.lo.dlm == dest_LowestPrio);
-new_ire.lo.avail = 0;
-new_ire.lo.res_1 = 0;
-new_ire.lo.vector = new_rte.vector;
-new_ire.lo.res_2 = 0;
+new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
+new_ire.remap.avail = 0;
+new_ire.remap.res_1 = 0;
+new_ire.remap.vector = new_rte.vector;
+new_ire.remap.res_2 = 0;
 
 set_ioapic_source_id(IO_APIC_ID(apic), &new_ire);
-new_ire.hi.res_1 = 0;
-new_ire.lo.p = 1; /* finally, set present bit */
+new_ire.remap.res_3 = 0;
+new_ire.remap.res_4 = 0;
+new_ire.remap.p = 1; /* finally, set present bit */
 
 /* now construct new ioapic rte entry */
 remap_rte->vector = new_rte.vector;
@@ -511,7 +512,7 @@ static int remap_entry_to_msi_msg(
 GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
  iremap_entries, iremap_entry);
 
-if ( iremap_entry->hi_val == 0 && iremap_entry->lo_val == 0 )
+if ( iremap_entry->hi == 0 && iremap_entry->lo == 0 )
 {
 dprintk(XENLOG_ERR VTDPREFIX,
 "%s: index (%d) get an empty entry!\n",
@@ -524,25 +525,25 @@ static int remap_entry_to_msi

[Xen-devel] [v3 10/15] vt-d: Add API to update IRTE when VT-d PI is used

2015-06-23 Thread Feng Wu
This patch adds an API which is used to update the IRTE
for posted-interrupt when guest changes MSI/MSI-X information.

Signed-off-by: Feng Wu 
---
v3:
- Remove "adding PDA_MASK()" when updating 'pda_l' and 'pda_h' for IRTE.
- Change the return type of pi_update_irte() to int.
- Remove some pointless printk message in pi_update_irte().
- Use structure assignment instead of memcpy() for irte copy.

 xen/drivers/passthrough/vtd/intremap.c | 98 ++
 xen/drivers/passthrough/vtd/iommu.h|  2 +
 xen/include/asm-x86/iommu.h|  2 +
 3 files changed, 102 insertions(+)

diff --git a/xen/drivers/passthrough/vtd/intremap.c 
b/xen/drivers/passthrough/vtd/intremap.c
index b7a42f6..401a9d1 100644
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -900,3 +900,101 @@ void iommu_disable_x2apic_IR(void)
 for_each_drhd_unit ( drhd )
 disable_qinval(drhd->iommu);
 }
+
+static inline void setup_posted_irte(
+struct iremap_entry *new_ire, struct pi_desc *pi_desc, uint8_t gvec)
+{
+new_ire->post.urg = 0;
+new_ire->post.vector = gvec;
+new_ire->post.pda_l = virt_to_maddr(pi_desc) >> (32 - PDA_LOW_BIT);
+new_ire->post.pda_h = virt_to_maddr(pi_desc) >> 32;
+
+new_ire->post.res_1 = 0;
+new_ire->post.res_2 = 0;
+new_ire->post.res_3 = 0;
+new_ire->post.res_4 = 0;
+new_ire->post.res_5 = 0;
+
+new_ire->post.im = 1;
+}
+
+/*
+ * This function is used to update the IRTE for posted-interrupt
+ * when guest changes MSI/MSI-X information.
+ */
+int pi_update_irte(struct vcpu *v, struct pirq *pirq, uint8_t gvec)
+{
+struct irq_desc *desc;
+struct msi_desc *msi_desc;
+int remap_index;
+int rc = 0;
+struct pci_dev *pci_dev;
+struct acpi_drhd_unit *drhd;
+struct iommu *iommu;
+struct ir_ctrl *ir_ctrl;
+struct iremap_entry *iremap_entries = NULL, *p = NULL;
+struct iremap_entry new_ire;
+struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc;
+unsigned long flags;
+uint128_t old_ire, ret;
+
+desc = pirq_spin_lock_irq_desc(pirq, NULL);
+if ( !desc )
+return -ENOMEM;
+
+msi_desc = desc->msi_desc;
+if ( !msi_desc )
+{
+rc = -EBADSLT;
+goto unlock_out;
+}
+
+pci_dev = msi_desc->dev;
+if ( !pci_dev )
+{
+rc = -ENODEV;
+goto unlock_out;
+}
+
+remap_index = msi_desc->remap_index;
+drhd = acpi_find_matched_drhd_unit(pci_dev);
+if ( !drhd )
+{
+rc = -ENODEV;
+goto unlock_out;
+}
+
+iommu = drhd->iommu;
+ir_ctrl = iommu_ir_ctrl(iommu);
+if ( !ir_ctrl )
+{
+rc = -ENODEV;
+goto unlock_out;
+}
+
+spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
+
+GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, remap_index, iremap_entries, p);
+new_ire = *p;
+
+/* Setup/Update interrupt remapping table entry. */
+setup_posted_irte(&new_ire, pi_desc, gvec);
+
+do {
+old_ire = *(uint128_t *)p;
+ret = cmpxchg16b(p, &old_ire, &new_ire);
+} while ( memcmp(&ret, &old_ire, sizeof(old_ire)) );
+
+iommu_flush_cache_entry(p, sizeof(struct iremap_entry));
+iommu_flush_iec_index(iommu, 0, remap_index);
+
+if ( iremap_entries )
+unmap_vtd_domain_page(iremap_entries);
+
+spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
+
+ unlock_out:
+spin_unlock_irq(&desc->lock);
+
+return rc;
+}
diff --git a/xen/drivers/passthrough/vtd/iommu.h 
b/xen/drivers/passthrough/vtd/iommu.h
index 49daa70..9ce941e 100644
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -329,6 +329,8 @@ struct iremap_entry {
   };
 };
 
+#define PDA_LOW_BIT26
+
 /* Max intr remapping table page order is 8, as max number of IRTEs is 64K */
 #define IREMAP_PAGE_ORDER  8
 
diff --git a/xen/include/asm-x86/iommu.h b/xen/include/asm-x86/iommu.h
index e7a65da..2a1523e 100644
--- a/xen/include/asm-x86/iommu.h
+++ b/xen/include/asm-x86/iommu.h
@@ -32,6 +32,8 @@ int iommu_supports_eim(void);
 int iommu_enable_x2apic_IR(void);
 void iommu_disable_x2apic_IR(void);
 
+int pi_update_irte(struct vcpu *v, struct pirq *pirq, uint8_t gvec);
+
 #endif /* !__ARCH_X86_IOMMU_H__ */
 /*
  * Local variables:
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [v3 01/15] Vt-d Posted-intterrupt (PI) design

2015-06-23 Thread Feng Wu
Add the design doc for VT-d PI.

Signed-off-by: Feng Wu 
---
 docs/misc/vtd-pi.txt | 333 +++
 1 file changed, 333 insertions(+)
 create mode 100644 docs/misc/vtd-pi.txt

diff --git a/docs/misc/vtd-pi.txt b/docs/misc/vtd-pi.txt
new file mode 100644
index 000..f41c688
--- /dev/null
+++ b/docs/misc/vtd-pi.txt
@@ -0,0 +1,333 @@
+Authors: Feng Wu 
+
+VT-d Posted-interrupt (PI) design for XEN
+
+Background
+==
+With the development of virtualization, there are more and more device
+assignment requirements. However, today when a VM is running with
+assigned devices (such as, NIC), external interrupt handling for the assigned
+devices always needs VMM intervention.
+
+VT-d Posted-interrupt is a more enhanced method to handle interrupts
+in the virtualization environment. Interrupt posting is the process by
+which an interrupt request is recorded in a memory-resident
+posted-interrupt-descriptor structure by the root-complex, followed by
+an optional notification event issued to the CPU complex.
+
+With VT-d Posted-interrupt we can get the following advantages:
+- Direct delivery of external interrupts to running vCPUs without VMM
+intervention
+- Decrease the interrupt migration complexity. On vCPU migration, software
+can atomically co-migrate all interrupts targeting the migrating vCPU. For
+virtual machines with assigned devices, migrating a vCPU across pCPUs
+either incur the overhead of forwarding interrupts in software (e.g. via VMM
+generated IPIS), or complexity to independently migrate each interrupt 
targeting
+the vCPU to the new pCPU. However, after enabling VT-d PI, the destination vCPU
+of an external interrupt from assigned devices is stored in the IRTE (i.e.
+Posted-interrupt Descriptor Address), when vCPU is migrated to another pCPU,
+we will set this new pCPU in the 'NDST' filed of Posted-interrupt descriptor, 
this
+make the interrupt migration automatic.
+
+Here is what Xen currently does for external interrupts from assigned devices:
+
+When a VM is running and an external interrupt from an assigned device occurs
+for it. VM-EXIT happens, then:
+
+vmx_do_extint() --> do_IRQ() --> __do_IRQ_guest() --> hvm_do_IRQ_dpci() -->
+raise_softirq_for(pirq_dpci) --> raise_softirq(HVM_DPCI_SOFTIRQ)
+
+softirq HVM_DPCI_SOFTIRQ is bound to dpci_softirq()
+
+dpci_softirq() --> hvm_dirq_assist() --> vmsi_deliver_pirq() --> 
vmsi_deliver() -->
+vmsi_inj_irq() --> vlapic_set_irq()
+
+vlapic_set_irq() does the following things:
+1. If CPU-side posted-interrupt is supported, call vmx_deliver_posted_intr() 
to deliver
+the virtual interrupt via posted-interrupt infrastructure.
+2. Else if CPU-side posted-interrupt is not supported, set the related vIRR in 
vLAPIC
+page and call vcpu_kick() to kick the related vCPU. Before VM-Entry, 
vmx_intr_assist()
+will help to inject the interrupt to guests.
+
+However, after VT-d PI is supported, when a guest is running in non-root and an
+external interrupt from an assigned device occurs for it. No VM-Exit is needed,
+the guest can handle this totally in non-root mode, thus avoiding all the above
+code flow.
+
+Posted-interrupt Introduction
+
+There are two components to the Posted-interrupt architecture:
+Processor Support and Root-Complex Support
+
+- Processor Support
+Posted-interrupt processing is a feature by which a processor processes
+the virtual interrupts by recording them as pending on the virtual-APIC
+page.
+
+Posted-interrupt processing is enabled by setting the process posted
+interrupts VM-execution control. The processing is performed in response
+to the arrival of an interrupt with the posted-interrupt notification vector.
+In response to such an interrupt, the processor processes virtual interrupts
+recorded in a data structure called a posted-interrupt descriptor.
+
+More information about APICv and CPU-side Posted-interrupt, please refer
+to Chapter 29, and Section 29.6 in the Intel SDM:
+http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf
+
+- Root-Complex Support
+Interrupt posting is the process by which an interrupt request (from IOAPIC
+or MSI/MSIx capable sources) is recorded in a memory-resident
+posted-interrupt-descriptor structure by the root-complex, followed by
+an optional notification event issued to the CPU complex. The interrupt
+request arriving at the root-complex carry the identity of the interrupt
+request source and a 'remapping-index'. The remapping-index is used to
+look-up an entry from the memory-resident interrupt-remap-table. Unlike
+with interrupt-remapping, the interrupt-remap-table-entry for a posted-
+interrupt, specifies a virtual-vector and a pointer to the posted-interrupt
+descriptor. The virtual-vector specifies the vector of the interrupt to be
+recorded in the posted-interrupt descriptor. The posted-interrupt

[Xen-devel] [v3 00/15] Add VT-d Posted-Interrupts support

2015-06-23 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

You can find the VT-d Posted-Interrtups Spec. in the following URL:
http://www.intel.com/content/www/us/en/intelligent-systems/intel-technology/vt-directed-io-spec.html

This patch set follow the following design:
http://article.gmane.org/gmane.comp.emulators.xen.devel/236476

v3:
Changelogs are in the head of each patch.

v2:
1. Add the design doc.
2. Coding style fix.
3. Add some comments for struct pi_desc.
4. Extend 'struct iremap_entry' to a more common format.
5. Delete the atomic helper functions for pi descriptor manipulation.
6. Add the new command line in docs/misc/xen-command-line.markdown.
7. Use macros to replace some magic numbers.

Feng Wu (15):
  Vt-d Posted-intterrupt (PI) design
  Add helper macro for X86_FEATURE_CX16 feature detection
  Add cmpxchg16b support for x86-64
  iommu: Add iommu_intpost to control VT-d Posted-Interrupts feature
  vt-d: VT-d Posted-Interrupts feature detection
  vmx: Extend struct pi_desc to support VT-d Posted-Interrupts
  vmx: Initialize VT-d Posted-Interrupts Descriptor
  Suppress posting interrupts when 'SN' is set
  vt-d: Extend struct iremap_entry to support VT-d Posted-Interrupts
  vt-d: Add API to update IRTE when VT-d PI is used
  Update IRTE according to guest interrupt config changes
  vmx: posted-interrupt handling when vCPU is blocked
  vmx: Properly handle notification event when vCPU is running
  Update Posted-Interrupts Descriptor during vCPU scheduling
  Add a command line parameter for VT-d posted-interrupts

 docs/misc/vtd-pi.txt   | 333 +
 docs/misc/xen-command-line.markdown|   9 +-
 xen/arch/x86/hvm/hvm.c |   6 +
 xen/arch/x86/hvm/vmx/vmcs.c|  21 +++
 xen/arch/x86/hvm/vmx/vmx.c | 263 +-
 xen/common/schedule.c  |   4 +
 xen/drivers/passthrough/io.c   |  96 +-
 xen/drivers/passthrough/iommu.c|  12 +-
 xen/drivers/passthrough/vtd/intremap.c | 190 ++-
 xen/drivers/passthrough/vtd/iommu.c|  18 +-
 xen/drivers/passthrough/vtd/iommu.h|  45 +++--
 xen/drivers/passthrough/vtd/utils.c|  10 +-
 xen/include/asm-arm/domain.h   |   2 +
 xen/include/asm-x86/cpufeature.h   |   2 +
 xen/include/asm-x86/hvm/hvm.h  |   3 +
 xen/include/asm-x86/hvm/vmx/vmcs.h |  27 ++-
 xen/include/asm-x86/hvm/vmx/vmx.h  |  18 ++
 xen/include/asm-x86/iommu.h|   2 +
 xen/include/asm-x86/x86_64/system.h|  28 +++
 xen/include/xen/iommu.h|   2 +-
 xen/include/xen/types.h|   5 +
 21 files changed, 1019 insertions(+), 77 deletions(-)
 create mode 100644 docs/misc/vtd-pi.txt

-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [v3 07/15] vmx: Initialize VT-d Posted-Interrupts Descriptor

2015-06-23 Thread Feng Wu
This patch initializes the VT-d Posted-interrupt Descriptor.

Signed-off-by: Feng Wu 
---
v3:
- Move pi_desc_init() to xen/arch/x86/hvm/vmx/vmcs.c
- Remove the 'inline' flag of pi_desc_init()

 xen/arch/x86/hvm/vmx/vmcs.c   | 18 ++
 xen/include/asm-x86/hvm/vmx/vmx.h |  2 ++
 2 files changed, 20 insertions(+)

diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 3aff365..11dc1b5 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -40,6 +40,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static bool_t __read_mostly opt_vpid_enabled = 1;
 boolean_param("vpid", opt_vpid_enabled);
@@ -921,6 +922,20 @@ void virtual_vmcs_vmwrite(void *vvmcs, u32 vmcs_encoding, 
u64 val)
 virtual_vmcs_exit(vvmcs);
 }
 
+static void pi_desc_init(struct vcpu *v)
+{
+uint32_t dest;
+
+v->arch.hvm_vmx.pi_desc.nv = posted_intr_vector;
+
+dest = cpu_physical_id(v->processor);
+
+if ( x2apic_enabled )
+v->arch.hvm_vmx.pi_desc.ndst = dest;
+else
+v->arch.hvm_vmx.pi_desc.ndst = MASK_INSR(dest, PI_xAPIC_NDST_MASK);
+}
+
 static int construct_vmcs(struct vcpu *v)
 {
 struct domain *d = v->domain;
@@ -1054,6 +1069,9 @@ static int construct_vmcs(struct vcpu *v)
 
 if ( cpu_has_vmx_posted_intr_processing )
 {
+if ( iommu_intpost )
+pi_desc_init(v);
+
 __vmwrite(PI_DESC_ADDR, virt_to_maddr(&v->arch.hvm_vmx.pi_desc));
 __vmwrite(POSTED_INTR_NOTIFICATION_VECTOR, posted_intr_vector);
 }
diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h 
b/xen/include/asm-x86/hvm/vmx/vmx.h
index 35f804a..5853563 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -89,6 +89,8 @@ typedef enum {
 #define EPT_EMT_WB  6
 #define EPT_EMT_RSV27
 
+#define PI_xAPIC_NDST_MASK  0xFF00
+
 void vmx_asm_vmexit_handler(struct cpu_user_regs);
 void vmx_asm_do_vmentry(void);
 void vmx_intr_assist(void);
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [v3 03/15] Add cmpxchg16b support for x86-64

2015-06-23 Thread Feng Wu
This patch adds cmpxchg16b support for x86-64, so software
can perform 128-bit atomic write/read.

Signed-off-by: Feng Wu 
---
v3:
Newly added.

 xen/include/asm-x86/x86_64/system.h | 28 
 xen/include/xen/types.h |  5 +
 2 files changed, 33 insertions(+)

diff --git a/xen/include/asm-x86/x86_64/system.h 
b/xen/include/asm-x86/x86_64/system.h
index 662813a..a910d00 100644
--- a/xen/include/asm-x86/x86_64/system.h
+++ b/xen/include/asm-x86/x86_64/system.h
@@ -6,6 +6,34 @@
(unsigned long)(n),sizeof(*(ptr
 
 /*
+ * Atomic 16 bytes compare and exchange.  Compare OLD with MEM, if
+ * identical, store NEW in MEM.  Return the initial value in MEM.
+ * Success is indicated by comparing RETURN with OLD.
+ *
+ * This function can only be called when cpu_has_cx16 is ture.
+ */
+
+static always_inline uint128_t __cmpxchg16b(
+volatile void *ptr, uint128_t old, uint128_t new)
+{
+uint128_t prev;
+
+ASSERT(cpu_has_cx16);
+
+asm volatile ( "lock; cmpxchg16b %4"
+   : "=d" (prev.high), "=a" (prev.low)
+   : "c" (new.high), "b" (new.low),
+   "m" (*__xg((volatile void *)ptr)),
+   "0" (old.high), "1" (old.low)
+   : "memory" );
+
+return prev;
+}
+
+#define cmpxchg16b(ptr,o,n) \
+__cmpxchg16b((ptr), *(uint128_t *)(o), *(uint128_t *)(n))
+
+/*
  * This function causes value _o to be changed to _n at location _p.
  * If this access causes a fault then we return 1, otherwise we return 0.
  * If no fault occurs then _o is updated to the value we saw at _p. If this
diff --git a/xen/include/xen/types.h b/xen/include/xen/types.h
index 8596ded..30f8a44 100644
--- a/xen/include/xen/types.h
+++ b/xen/include/xen/types.h
@@ -47,6 +47,11 @@ typedef __u64   uint64_t;
 typedef __u64   u_int64_t;
 typedef __s64   int64_t;
 
+typedef struct {
+uint64_t low;
+uint64_t high;
+} uint128_t;
+
 struct domain;
 struct vcpu;
 
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [v3 15/15] Add a command line parameter for VT-d posted-interrupts

2015-06-23 Thread Feng Wu
Enable VT-d Posted-Interrupts and add a command line
parameter for it.

Signed-off-by: Feng Wu 
---
v3:
Remove the redundant "no intremp then no intpost" logic

 docs/misc/xen-command-line.markdown | 9 -
 xen/drivers/passthrough/iommu.c | 4 +++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/docs/misc/xen-command-line.markdown 
b/docs/misc/xen-command-line.markdown
index aa684c0..f8ec15f 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -848,7 +848,7 @@ debug hypervisor only).
 > Default: `new` unless directed-EOI is supported
 
 ### iommu
-> `= List of [  | force | required | intremap | qinval | snoop | 
sharept | dom0-passthrough | dom0-strict | amd-iommu-perdev-intremap | 
workaround_bios_bug | verbose | debug ]`
+> `= List of [  | force | required | intremap | intpost | qinval | 
snoop | sharept | dom0-passthrough | dom0-strict | amd-iommu-perdev-intremap | 
workaround_bios_bug | verbose | debug ]`
 
 > Sub-options:
 
@@ -875,6 +875,13 @@ debug hypervisor only).
 >> Control the use of interrupt remapping (DMA remapping will always be enabled
 >> if IOMMU functionality is enabled).
 
+> `intpost`
+
+> Default: `true`
+
+>> Control the use of interrupt posting, interrupt posting is dependant on
+>> interrupt remapping.
+
 > `qinval` (VT-d)
 
 > Default: `true`
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 597f676..e13251c 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -52,7 +52,7 @@ bool_t __read_mostly iommu_passthrough;
 bool_t __read_mostly iommu_snoop = 1;
 bool_t __read_mostly iommu_qinval = 1;
 bool_t __read_mostly iommu_intremap = 1;
-bool_t __read_mostly iommu_intpost;
+bool_t __read_mostly iommu_intpost = 1;
 bool_t __read_mostly iommu_hap_pt_share = 1;
 bool_t __read_mostly iommu_debug;
 bool_t __read_mostly amd_iommu_perdev_intremap = 1;
@@ -97,6 +97,8 @@ static void __init parse_iommu_param(char *s)
 iommu_qinval = val;
 else if ( !strcmp(s, "intremap") )
 iommu_intremap = val;
+else if ( !strcmp(s, "intpost") )
+iommu_intpost = val;
 else if ( !strcmp(s, "debug") )
 {
 iommu_debug = val;
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [v3 14/15] Update Posted-Interrupts Descriptor during vCPU scheduling

2015-06-23 Thread Feng Wu
The basic idea here is:
1. When vCPU's state is RUNSTATE_running,
- set 'NV' to 'Notification Vector'.
- Clear 'SN' to accpet PI.
- set 'NDST' to the right pCPU.
2. When vCPU's state is RUNSTATE_blocked,
- set 'NV' to 'Wake-up Vector', so we can wake up the
  related vCPU when posted-interrupt happens for it.
- Clear 'SN' to accpet PI.
3. When vCPU's state is RUNSTATE_runnable/RUNSTATE_offline,
- Set 'SN' to suppress non-urgent interrupts.
  (Current, we only support non-urgent interrupts)
- Set 'NV' back to 'Notification Vector' if needed.

Signed-off-by: Feng Wu 
---
v3:
* Use write_atomic() to update 'NV' and 'NDST' fileds.
* Use MASK_INSR() to get the value for 'NDST' field
* Add ASSERT_UNREACHABLE() for the break case in vmx_pi_desc_update()
* Remove pointless NULL assignment to 'vmx_function_table.pi_desc_update'
* Call hvm_funcs.pi_desc_update() in arch-specific files
* coding style

 xen/arch/x86/hvm/hvm.c |   6 ++
 xen/arch/x86/hvm/vmx/vmx.c | 122 +
 xen/common/schedule.c  |   4 ++
 xen/include/asm-arm/domain.h   |   2 +
 xen/include/asm-x86/hvm/hvm.h  |   2 +
 xen/include/asm-x86/hvm/vmx/vmcs.h |   7 +++
 xen/include/asm-x86/hvm/vmx/vmx.h  |  11 
 7 files changed, 154 insertions(+)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 2736802..64ce381 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -6475,6 +6475,12 @@ enum hvm_intblk nhvm_interrupt_blocked(struct vcpu *v)
 return hvm_funcs.nhvm_intr_blocked(v);
 }
 
+void arch_pi_desc_update(struct vcpu *v, int old_state)
+{
+if ( is_hvm_vcpu(v) && hvm_funcs.pi_desc_update )
+hvm_funcs.pi_desc_update(v, old_state);
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 5795afd..cf4f292 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -168,6 +168,7 @@ static int vmx_vcpu_initialise(struct vcpu *v)
 
 INIT_LIST_HEAD(&v->arch.hvm_vmx.pi_blocked_vcpu_list);
 
+v->arch.hvm_vmx.pi_block_cpu = -1;
 return 0;
 }
 
@@ -1778,6 +1779,124 @@ static void vmx_handle_eoi(u8 vector)
 __vmwrite(GUEST_INTR_STATUS, status);
 }
 
+static void vmx_pi_desc_update(struct vcpu *v, int old_state)
+{
+struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc;
+struct pi_desc old, new;
+unsigned long flags;
+
+ASSERT(iommu_intpost);
+
+switch ( v->runstate.state )
+{
+case RUNSTATE_runnable:
+case RUNSTATE_offline:
+/*
+ * We don't need to send notification event to a non-running
+ * vcpu, the interrupt information will be delivered to it before
+ * VM-ENTRY when the vcpu is scheduled to run next time.
+ */
+pi_set_sn(pi_desc);
+
+/*
+ * If the state is transferred from RUNSTATE_blocked,
+ * we should set 'NV' feild back to posted_intr_vector,
+ * so the Posted-Interrupts can be delivered to the vCPU
+ * by VT-d HW after it is scheduled to run.
+ */
+if ( old_state == RUNSTATE_blocked )
+{
+write_atomic((uint8_t*)&new.nv, posted_intr_vector);
+
+/*
+ * Delete the vCPU from the related block list
+ * if we are resuming from blocked state
+ */
+ASSERT(v->arch.hvm_vmx.pi_block_cpu != -1);
+spin_lock_irqsave(&per_cpu(pi_blocked_vcpu_lock,
+  v->arch.hvm_vmx.pi_block_cpu), flags);
+list_del(&v->arch.hvm_vmx.pi_blocked_vcpu_list);
+spin_unlock_irqrestore(&per_cpu(pi_blocked_vcpu_lock,
+   v->arch.hvm_vmx.pi_block_cpu), flags);
+v->arch.hvm_vmx.pi_block_cpu = -1;
+}
+break;
+
+case RUNSTATE_blocked:
+ASSERT(v->arch.hvm_vmx.pi_block_cpu == -1);
+
+/*
+ * The vCPU is blocked on the block list. Add the blocked
+ * vCPU on the list of the v->arch.hvm_vmx.pi_block_cpu,
+ * which is the destination of the wake-up notification event.
+ */
+v->arch.hvm_vmx.pi_block_cpu = v->processor;
+spin_lock_irqsave(&per_cpu(pi_blocked_vcpu_lock,
+  v->arch.hvm_vmx.pi_block_cpu), flags);
+list_add_tail(&v->arch.hvm_vmx.pi_blocked_vcpu_list,
+  &per_cpu(pi_blocked_vcpu, v->arch.hvm_vmx.pi_block_cpu));
+spin_unlock_irqrestore(&per_cpu(pi_blocked_vcpu_lock,
+   v->arch.hvm_vmx.pi_block_cpu), flags);
+
+do {
+old.control = new.control

[Xen-devel] [v3 04/15] iommu: Add iommu_intpost to control VT-d Posted-Interrupts feature

2015-06-23 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

This patch adds variable 'iommu_intpost' to control whether enable VT-d
posted-interrupt or not in the generic IOMMU code.

Signed-off-by: Feng Wu 
---
v3:
- Remove pointless initializer for 'iommu_intpost'.
- Some adjustment for "if no intremap then no intpost" logic.
* For parse_iommu_param(), move it to the end of the function,
  so we don't need to add the some logic when introduing the
  new kernel parameter 'intpost' in later patch.
* Add this logic in iommu_setup() after iommu_hardware_setup()
  is called.

 xen/drivers/passthrough/iommu.c | 10 +-
 xen/include/xen/iommu.h |  2 +-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 06cb38f..597f676 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -39,6 +39,7 @@ static void iommu_dump_p2m_table(unsigned char key);
  *   no-snoop   Disable VT-d Snoop Control
  *   no-qinval  Disable VT-d Queued Invalidation
  *   no-intremapDisable VT-d Interrupt Remapping
+ *   no-intpost Disable VT-d Interrupt posting
  */
 custom_param("iommu", parse_iommu_param);
 bool_t __initdata iommu_enable = 1;
@@ -51,6 +52,7 @@ bool_t __read_mostly iommu_passthrough;
 bool_t __read_mostly iommu_snoop = 1;
 bool_t __read_mostly iommu_qinval = 1;
 bool_t __read_mostly iommu_intremap = 1;
+bool_t __read_mostly iommu_intpost;
 bool_t __read_mostly iommu_hap_pt_share = 1;
 bool_t __read_mostly iommu_debug;
 bool_t __read_mostly amd_iommu_perdev_intremap = 1;
@@ -112,6 +114,9 @@ static void __init parse_iommu_param(char *s)
 
 s = ss + 1;
 } while ( ss );
+
+if ( !iommu_intremap )
+iommu_intpost = 0;
 }
 
 int iommu_domain_init(struct domain *d)
@@ -305,6 +310,9 @@ int __init iommu_setup(void)
 panic("Couldn't enable %s and iommu=required/force",
   !iommu_enabled ? "IOMMU" : "Interrupt Remapping");
 
+if ( !iommu_intremap )
+iommu_intpost = 0;
+
 if ( !iommu_enabled )
 {
 iommu_snoop = 0;
@@ -372,7 +380,7 @@ void iommu_crash_shutdown(void)
 const struct iommu_ops *ops = iommu_get_ops();
 if ( iommu_enabled )
 ops->crash_shutdown();
-iommu_enabled = iommu_intremap = 0;
+iommu_enabled = iommu_intremap = iommu_intpost = 0;
 }
 
 bool_t iommu_has_feature(struct domain *d, enum iommu_feature feature)
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index b30bf41..a123cce 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -31,7 +31,7 @@
 extern bool_t iommu_enable, iommu_enabled;
 extern bool_t force_iommu, iommu_verbose;
 extern bool_t iommu_workaround_bios_bug, iommu_passthrough;
-extern bool_t iommu_snoop, iommu_qinval, iommu_intremap;
+extern bool_t iommu_snoop, iommu_qinval, iommu_intremap, iommu_intpost;
 extern bool_t iommu_hap_pt_share;
 extern bool_t iommu_debug;
 extern bool_t amd_iommu_perdev_intremap;
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [v3 08/15] Suppress posting interrupts when 'SN' is set

2015-06-23 Thread Feng Wu
Currently, we don't support urgent interrupt, all interrupts
are recognized as non-urgent interrupt, so we cannot send
posted-interrupt when 'SN' is set.

Signed-off-by: Feng Wu 
---
v3:
use cmpxchg to test SN/ON and set ON

 xen/arch/x86/hvm/vmx/vmx.c | 32 
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 0837627..b94ef6a 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1686,6 +1686,8 @@ static void __vmx_deliver_posted_interrupt(struct vcpu *v)
 
 static void vmx_deliver_posted_intr(struct vcpu *v, u8 vector)
 {
+struct pi_desc old, new, prev;
+
 if ( pi_test_and_set_pir(vector, &v->arch.hvm_vmx.pi_desc) )
 return;
 
@@ -1698,13 +1700,35 @@ static void vmx_deliver_posted_intr(struct vcpu *v, u8 
vector)
  */
 pi_set_on(&v->arch.hvm_vmx.pi_desc);
 }
-else if ( !pi_test_and_set_on(&v->arch.hvm_vmx.pi_desc) )
+else
 {
+prev.control = 0;
+
+do {
+old.control = v->arch.hvm_vmx.pi_desc.control &
+  ~(1 << POSTED_INTR_ON | 1 << POSTED_INTR_SN);
+new.control = v->arch.hvm_vmx.pi_desc.control |
+  1 << POSTED_INTR_ON;
+
+/*
+ * Currently, we don't support urgent interrupt, all
+ * interrupts are recognized as non-urgent interrupt,
+ * so we cannot send posted-interrupt when 'SN' is set.
+ * Besides that, if 'ON' is already set, we cannot set
+ * posted-interrupts as well.
+ */
+if ( prev.sn || prev.on )
+{
+vcpu_kick(v);
+return;
+}
+
+prev.control = cmpxchg(&v->arch.hvm_vmx.pi_desc.control,
+   old.control, new.control);
+} while ( prev.control != old.control );
+
 __vmx_deliver_posted_interrupt(v);
-return;
 }
-
-vcpu_kick(v);
 }
 
 static void vmx_sync_pir_to_irr(struct vcpu *v)
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [v3 06/15] vmx: Extend struct pi_desc to support VT-d Posted-Interrupts

2015-06-23 Thread Feng Wu
Extend struct pi_desc according to VT-d Posted-Interrupts Spec.

Signed-off-by: Feng Wu 
---
v3:
- Use u32 instead of u64 for the bitfield in 'struct pi_desc'

 xen/include/asm-x86/hvm/vmx/vmcs.h | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h 
b/xen/include/asm-x86/hvm/vmx/vmcs.h
index 1104bda..dedfaef 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -81,8 +81,19 @@ struct vmx_domain {
 
 struct pi_desc {
 DECLARE_BITMAP(pir, NR_VECTORS);
-u32 control;
-u32 rsvd[7];
+union {
+struct
+{
+u16 on : 1,  /* bit 256 - Outstanding Notification */
+sn : 1,  /* bit 257 - Suppress Notification */
+rsvd_1 : 14; /* bit 271:258 - Reserved */
+u8  nv;  /* bit 279:272 - Notification Vector */
+u8  rsvd_2;  /* bit 287:280 - Reserved */
+u32 ndst;/* bit 319:288 - Notification Destination */
+};
+u64 control;
+};
+u32 rsvd[6];
 } __attribute__ ((aligned (64)));
 
 #define ept_get_wl(ept)   ((ept)->ept_wl)
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [v3 13/15] vmx: Properly handle notification event when vCPU is running

2015-06-23 Thread Feng Wu
When a vCPU is running in Root mode and a notification event
has been injected to it. we need to set VCPU_KICK_SOFTIRQ for
the current cpu, so the pending interrupt in PIRR will be
synced to vIRR before VM-Exit in time.

Signed-off-by: Feng Wu 
---
v3:
- Make pi_notification_interrupt() static

 xen/arch/x86/hvm/vmx/vmx.c | 55 +-
 1 file changed, 54 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 7db6009..5795afd 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1896,6 +1896,59 @@ static void pi_wakeup_interrupt(struct cpu_user_regs 
*regs)
 this_cpu(irq_count)++;
 }
 
+/*
+ * Handle VT-d posted-interrupt when VCPU is running.
+ */
+
+static void pi_notification_interrupt(struct cpu_user_regs *regs)
+{
+/*
+ * We get here when a vCPU is running in root-mode
+ * (such as via hypercall, or any other reasons which
+ * can result in VM-Exit), and before vCPU is back to
+ * non-root, external interrupts from an assigned
+ * device happen and a notification event is delivered
+ * to this logical CPU.
+ *
+ * we need to set VCPU_KICK_SOFTIRQ for the current
+ * cpu, just like __vmx_deliver_posted_interrupt().
+ *
+ * So the pending interrupt in PIRR will be synced to
+ * vIRR before VM-Exit in time.
+ *
+ * Please refer to the following code fragments from
+ * xen/arch/x86/hvm/vmx/entry.S:
+ *
+ * .Lvmx_do_vmentry
+ *
+ *  ..
+ *  point 1
+ *
+ *  cmp  %ecx,(%rdx,%rax,1)
+ *  jnz  .Lvmx_process_softirqs
+ *
+ *  ..
+ *
+ *  je   .Lvmx_launch
+ *
+ *  ..
+ *
+ * .Lvmx_process_softirqs:
+ *  sti
+ *  call do_softirq
+ *  jmp  .Lvmx_do_vmentry
+ *
+ *  If VT-d engine issues a notification event at
+ *  point 1 above, it cannot be delivered to the guest
+ *  during this VM-entry without raising the softirq
+ *  in this notification handler.
+ */
+raise_softirq(VCPU_KICK_SOFTIRQ);
+
+ack_APIC_irq();
+this_cpu(irq_count)++;
+}
+
 const struct hvm_function_table * __init start_vmx(void)
 {
 set_in_cr4(X86_CR4_VMXE);
@@ -1933,7 +1986,7 @@ const struct hvm_function_table * __init start_vmx(void)
 
 if ( cpu_has_vmx_posted_intr_processing )
 {
-alloc_direct_apic_vector(&posted_intr_vector, event_check_interrupt);
+alloc_direct_apic_vector(&posted_intr_vector, 
pi_notification_interrupt);
 
 if ( iommu_intpost )
 alloc_direct_apic_vector(&pi_wakeup_vector, pi_wakeup_interrupt);
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [v3 05/15] vt-d: VT-d Posted-Interrupts feature detection

2015-06-23 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

This patch adds feature detection logic for VT-d posted-interrupt.

Signed-off-by: Feng Wu 
---
v3:
- Remove the "if no intremap then no intpost" logic in
  intel_vtd_setup(), it is covered in the iommu_setup().
- Add "if no intremap then no intpost" logic in the end
  of init_vtd_hw() which is called by vtd_resume().

So the logic exists in the following three places:
- parse_iommu_param()
- iommu_setup()
- init_vtd_hw()

 xen/drivers/passthrough/vtd/iommu.c | 18 --
 xen/drivers/passthrough/vtd/iommu.h |  1 +
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/xen/drivers/passthrough/vtd/iommu.c 
b/xen/drivers/passthrough/vtd/iommu.c
index 9053a1f..4221185 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -2071,6 +2071,9 @@ static int init_vtd_hw(void)
 disable_intremap(drhd->iommu);
 }
 
+if ( !iommu_intremap )
+iommu_intpost = 0;
+
 /*
  * Set root entries for each VT-d engine.  After set root entry,
  * must globally invalidate context cache, and then globally
@@ -2133,8 +2136,8 @@ int __init intel_vtd_setup(void)
 }
 
 /* We enable the following features only if they are supported by all VT-d
- * engines: Snoop Control, DMA passthrough, Queued Invalidation and
- * Interrupt Remapping.
+ * engines: Snoop Control, DMA passthrough, Queued Invalidation, Interrupt
+ * Remapping, and Posted Interrupt
  */
 for_each_drhd_unit ( drhd )
 {
@@ -2162,6 +2165,15 @@ int __init intel_vtd_setup(void)
 if ( iommu_intremap && !ecap_intr_remap(iommu->ecap) )
 iommu_intremap = 0;
 
+/*
+ * We cannot use posted interrupt if X86_FEATURE_CX16 is
+ * not supported, since we count on this feature to
+ * atomically update 16-byte IRTE in posted format.
+ */
+if ( !iommu_intremap &&
+ (!cap_intr_post(iommu->cap) || !cpu_has_cx16) )
+iommu_intpost = 0;
+
 if ( !vtd_ept_page_compatible(iommu) )
 iommu_hap_pt_share = 0;
 
@@ -2187,6 +2199,7 @@ int __init intel_vtd_setup(void)
 P(iommu_passthrough, "Dom0 DMA Passthrough");
 P(iommu_qinval, "Queued Invalidation");
 P(iommu_intremap, "Interrupt Remapping");
+P(iommu_intpost, "Posted Interrupt");
 P(iommu_hap_pt_share, "Shared EPT tables");
 #undef P
 
@@ -2206,6 +2219,7 @@ int __init intel_vtd_setup(void)
 iommu_passthrough = 0;
 iommu_qinval = 0;
 iommu_intremap = 0;
+iommu_intpost = 0;
 return ret;
 }
 
diff --git a/xen/drivers/passthrough/vtd/iommu.h 
b/xen/drivers/passthrough/vtd/iommu.h
index 80f8830..e807253 100644
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -69,6 +69,7 @@
 /*
  * Decoding Capability Register
  */
+#define cap_intr_post(c)   (((c) >> 59) & 1)
 #define cap_read_drain(c)  (((c) >> 55) & 1)
 #define cap_write_drain(c) (((c) >> 54) & 1)
 #define cap_max_amask_val(c)   (((c) >> 48) & 0x3f)
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [v3 02/15] Add helper macro for X86_FEATURE_CX16 feature detection

2015-06-23 Thread Feng Wu
Add macro cpu_has_cx16 to detect X86_FEATURE_CX16 feature.

Signed-off-by: Feng Wu 
---
v3:
- Newly added. We need to atomically update the IRTE in PI format
  via CMPXCHG16B which is only available with this feature.

 xen/include/asm-x86/cpufeature.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
index 7963a3a..63c1fe8 100644
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -216,6 +216,8 @@
 
 #define cpu_has_cpuid_faulting boot_cpu_has(X86_FEATURE_CPUID_FAULTING)
 
+#define cpu_has_cx16boot_cpu_has(X86_FEATURE_CX16)
+
 enum _cache_type {
 CACHE_TYPE_NULL = 0,
 CACHE_TYPE_DATA = 1,
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC v2 00/15] Add VT-d Posted-Interrupts support

2015-05-08 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

You can find the VT-d Posted-Interrtups Spec. in the following URL:
http://www.intel.com/content/www/us/en/intelligent-systems/intel-technology/vt-directed-io-spec.html

This patch set follow the following design:
http://article.gmane.org/gmane.comp.emulators.xen.devel/236476

v1 -> v2
1. Add the design doc.
2. Coding style fix.
3. Add some comments for struct pi_desc.
4. Extend 'struct iremap_entry' to a more common format.
5. Delete the atomic helper functions for pi descriptor manipulation.
6. Add the new command line in docs/misc/xen-command-line.markdown.
7. Use macros to replace some magic numbers.

One open in "[RFC v2 07/15] vt-d: Add API to update IRTE when VT-d PI is used"
How to update the IRTE for PI dynamically (in an atomic way)? I am trying
to use cmpxchg16b and it is in progress, I will add this in the next version.

Feng Wu (15):
  Vt-d Posted-intterrupt (PI) design
  iommu: Add iommu_intpost to control VT-d Posted-Interrupts feature
  vt-d: VT-d Posted-Interrupts feature detection
  vmx: Extend struct pi_desc to support VT-d Posted-Interrupts
  vmx: Initialize VT-d Posted-Interrupts Descriptor
  vt-d: Extend struct iremap_entry to support VT-d Posted-Interrupts
  vt-d: Add API to update IRTE when VT-d PI is used
  Update IRTE according to guest interrupt config changes
  Add a new per-vCPU tasklet to wakeup the blocked vCPU
  vmx: Define two per-cpu variables
  vmx: Add a global wake-up vector for VT-d Posted-Interrupts
  vmx: Properly handle notification event when vCPU is running
  Update Posted-Interrupts Descriptor during vCPU scheduling
  Suppress posting interrupts when 'SN' is set
  Add a command line parameter for VT-d posted-interrupts

 docs/misc/vtd-pi.txt   | 332 +
 docs/misc/xen-command-line.markdown|   9 +-
 xen/arch/x86/hvm/vmx/vmcs.c|   6 +
 xen/arch/x86/hvm/vmx/vmx.c | 236 ++-
 xen/common/domain.c|  11 ++
 xen/common/schedule.c  |   5 +
 xen/drivers/passthrough/io.c   |  99 +-
 xen/drivers/passthrough/iommu.c|  17 +-
 xen/drivers/passthrough/vtd/intremap.c | 188 ++-
 xen/drivers/passthrough/vtd/iommu.c|  15 +-
 xen/drivers/passthrough/vtd/iommu.h|  31 ++-
 xen/drivers/passthrough/vtd/utils.c|  10 +-
 xen/include/asm-x86/hvm/hvm.h  |   1 +
 xen/include/asm-x86/hvm/vmx/vmcs.h |  15 +-
 xen/include/asm-x86/hvm/vmx/vmx.h  |  27 ++-
 xen/include/asm-x86/iommu.h|   2 +
 xen/include/xen/iommu.h|   2 +-
 xen/include/xen/sched.h|   7 +
 18 files changed, 945 insertions(+), 68 deletions(-)
 create mode 100644 docs/misc/vtd-pi.txt

-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC v2 03/15] vt-d: VT-d Posted-Interrupts feature detection

2015-05-08 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

This patch adds feature detection logic for VT-d posted-interrupt.

Signed-off-by: Feng Wu 
---
 xen/drivers/passthrough/vtd/iommu.c | 15 +--
 xen/drivers/passthrough/vtd/iommu.h |  1 +
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/xen/drivers/passthrough/vtd/iommu.c 
b/xen/drivers/passthrough/vtd/iommu.c
index c7bda73..8ad1f58 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -2044,6 +2044,7 @@ static int init_vtd_hw(void)
 if ( ioapic_to_iommu(IO_APIC_ID(apic)) == NULL )
 {
 iommu_intremap = 0;
+iommu_intpost = 0;
 dprintk(XENLOG_ERR VTDPREFIX,
 "ioapic_to_iommu: ioapic %#x (id: %#x) is NULL! "
 "Will not try to enable Interrupt Remapping.\n",
@@ -2060,6 +2061,7 @@ static int init_vtd_hw(void)
 if ( enable_intremap(iommu, 0) != 0 )
 {
 iommu_intremap = 0;
+iommu_intpost = 0;
 dprintk(XENLOG_WARNING VTDPREFIX,
 "Interrupt Remapping not enabled\n");
 
@@ -2133,8 +2135,8 @@ int __init intel_vtd_setup(void)
 }
 
 /* We enable the following features only if they are supported by all VT-d
- * engines: Snoop Control, DMA passthrough, Queued Invalidation and
- * Interrupt Remapping.
+ * engines: Snoop Control, DMA passthrough, Queued Invalidation, Interrupt
+ * Remapping, and Posted Interrupt
  */
 for_each_drhd_unit ( drhd )
 {
@@ -2160,7 +2162,13 @@ int __init intel_vtd_setup(void)
 iommu_qinval = 0;
 
 if ( iommu_intremap && !ecap_intr_remap(iommu->ecap) )
+{
 iommu_intremap = 0;
+iommu_intpost = 0;
+}
+
+if ( iommu_intpost && !cap_intr_post(iommu->cap) )
+iommu_intpost = 0;
 
 if ( !vtd_ept_page_compatible(iommu) )
 iommu_hap_pt_share = 0;
@@ -2178,6 +2186,7 @@ int __init intel_vtd_setup(void)
 if ( !iommu_qinval && iommu_intremap )
 {
 iommu_intremap = 0;
+iommu_intpost = 0;
 dprintk(XENLOG_WARNING VTDPREFIX, "Interrupt Remapping disabled "
 "since Queued Invalidation isn't supported or enabled.\n");
 }
@@ -2187,6 +2196,7 @@ int __init intel_vtd_setup(void)
 P(iommu_passthrough, "Dom0 DMA Passthrough");
 P(iommu_qinval, "Queued Invalidation");
 P(iommu_intremap, "Interrupt Remapping");
+P(iommu_intpost, "Posted Interrupt");
 P(iommu_hap_pt_share, "Shared EPT tables");
 #undef P
 
@@ -2206,6 +2216,7 @@ int __init intel_vtd_setup(void)
 iommu_passthrough = 0;
 iommu_qinval = 0;
 iommu_intremap = 0;
+iommu_intpost = 0;
 return ret;
 }
 
diff --git a/xen/drivers/passthrough/vtd/iommu.h 
b/xen/drivers/passthrough/vtd/iommu.h
index d6e6520..42047e0 100644
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -69,6 +69,7 @@
 /*
  * Decoding Capability Register
  */
+#define cap_intr_post(c)   (((c) >> 59) & 1)
 #define cap_read_drain(c)  (((c) >> 55) & 1)
 #define cap_write_drain(c) (((c) >> 54) & 1)
 #define cap_max_amask_val(c)   (((c) >> 48) & 0x3f)
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC v2 05/15] vmx: Initialize VT-d Posted-Interrupts Descriptor

2015-05-08 Thread Feng Wu
This patch initializes the VT-d Posted-interrupt Descriptor.

Signed-off-by: Feng Wu 
---
 xen/arch/x86/hvm/vmx/vmcs.c   |  3 +++
 xen/include/asm-x86/hvm/vmx/vmx.h | 20 +++-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 63007a9..f60a454 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1004,6 +1004,9 @@ static int construct_vmcs(struct vcpu *v)
 
 if ( cpu_has_vmx_posted_intr_processing )
 {
+if ( iommu_intpost )
+pi_desc_init(v);
+
 __vmwrite(PI_DESC_ADDR, virt_to_maddr(&v->arch.hvm_vmx.pi_desc));
 __vmwrite(POSTED_INTR_NOTIFICATION_VECTOR, posted_intr_vector);
 }
diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h 
b/xen/include/asm-x86/hvm/vmx/vmx.h
index 91c5e18..e4292cc 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -28,6 +28,9 @@
 #include 
 #include 
 #include 
+#include 
+
+extern uint8_t posted_intr_vector;
 
 typedef union {
 struct {
@@ -125,6 +128,22 @@ static inline unsigned long pi_get_pir(struct pi_desc 
*pi_desc, int group)
 return xchg(&pi_desc->pir[group], 0);
 }
 
+#define PI_xAPIC_NDST_MASK   0xFF00
+
+static inline void pi_desc_init(struct vcpu *v)
+{
+uint32_t dest;
+
+v->arch.hvm_vmx.pi_desc.nv = posted_intr_vector;
+
+dest = cpu_physical_id(v->processor);
+
+if ( x2apic_enabled )
+v->arch.hvm_vmx.pi_desc.ndst = dest;
+else
+v->arch.hvm_vmx.pi_desc.ndst = MASK_INSR(dest, PI_xAPIC_NDST_MASK);
+}
+
 /*
  * Exit Reasons
  */
@@ -244,7 +263,6 @@ static inline unsigned long pi_get_pir(struct pi_desc 
*pi_desc, int group)
 #define MODRM_EAX_ECX   ".byte 0xc1\n" /* EAX, ECX */
 
 extern u64 vmx_ept_vpid_cap;
-extern uint8_t posted_intr_vector;
 
 #define cpu_has_vmx_ept_exec_only_supported\
 (vmx_ept_vpid_cap & VMX_EPT_EXEC_ONLY_SUPPORTED)
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC v2 14/15] Suppress posting interrupts when 'SN' is set

2015-05-08 Thread Feng Wu
Currently, we don't support urgent interrupt, all interrupts
are recognized as non-urgent interrupt, so we cannot send
posted-interrupt when 'SN' is set.

Signed-off-by: Feng Wu 
---
 xen/arch/x86/hvm/vmx/vmx.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index cdcc012..77a7897 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1664,9 +1664,20 @@ static void __vmx_deliver_posted_interrupt(struct vcpu 
*v)
 
 static void vmx_deliver_posted_intr(struct vcpu *v, u8 vector)
 {
+int r, sn;
+
 if ( pi_test_and_set_pir(vector, &v->arch.hvm_vmx.pi_desc) )
 return;
 
+/*
+ * Currently, we don't support urgent interrupt, all interrupts
+ * are recognized as non-urgent interrupt, so we cannot send
+ * posted-interrupt when 'SN' is set.
+ */
+
+sn = v->arch.hvm_vmx.pi_desc.sn;
+r = pi_test_and_set_on(&v->arch.hvm_vmx.pi_desc);
+
 if ( unlikely(v->arch.hvm_vmx.eoi_exitmap_changed) )
 {
 /*
@@ -1676,7 +1687,7 @@ static void vmx_deliver_posted_intr(struct vcpu *v, u8 
vector)
  */
 pi_set_on(&v->arch.hvm_vmx.pi_desc);
 }
-else if ( !pi_test_and_set_on(&v->arch.hvm_vmx.pi_desc) )
+else if ( !r && !sn )
 {
 __vmx_deliver_posted_interrupt(v);
 return;
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC v2 09/15] Add a new per-vCPU tasklet to wakeup the blocked vCPU

2015-05-08 Thread Feng Wu
This patch adds a new per-vCPU tasklet to wakeup the blocked
vCPU. It can be used in the case vcpu_unblock cannot be called
directly. This tasklet will be used in later patch in this
series.

Signed-off-by: Feng Wu 
---
 xen/common/domain.c | 11 +++
 xen/include/xen/sched.h |  3 +++
 2 files changed, 14 insertions(+)

diff --git a/xen/common/domain.c b/xen/common/domain.c
index 6803c4d..95e2a10 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -109,6 +109,13 @@ static void vcpu_check_shutdown(struct vcpu *v)
 spin_unlock(&d->shutdown_lock);
 }
 
+static void vcpu_wakeup_tasklet_handler(unsigned long arg)
+{
+struct vcpu *v = (void *)arg;
+
+vcpu_unblock(v);
+}
+
 struct vcpu *alloc_vcpu(
 struct domain *d, unsigned int vcpu_id, unsigned int cpu_id)
 {
@@ -126,6 +133,9 @@ struct vcpu *alloc_vcpu(
 
 tasklet_init(&v->continue_hypercall_tasklet, NULL, 0);
 
+tasklet_init(&v->vcpu_wakeup_tasklet, vcpu_wakeup_tasklet_handler,
+ (unsigned long)v);
+
 if ( !zalloc_cpumask_var(&v->cpu_hard_affinity) ||
  !zalloc_cpumask_var(&v->cpu_hard_affinity_tmp) ||
  !zalloc_cpumask_var(&v->cpu_hard_affinity_saved) ||
@@ -785,6 +795,7 @@ static void complete_domain_destroy(struct rcu_head *head)
 if ( (v = d->vcpu[i]) == NULL )
 continue;
 tasklet_kill(&v->continue_hypercall_tasklet);
+tasklet_kill(&v->vcpu_wakeup_tasklet);
 vcpu_destroy(v);
 sched_destroy_vcpu(v);
 destroy_waitqueue_vcpu(v);
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 80c6f62..fd9e01e 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -239,6 +239,9 @@ struct vcpu
 /* Tasklet for continue_hypercall_on_cpu(). */
 struct tasklet   continue_hypercall_tasklet;
 
+/* Tasklet for wakeup_blocked_vcpu(). */
+struct tasklet   vcpu_wakeup_tasklet;
+
 /* Multicall information. */
 struct mc_state  mc_state;
 
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC v2 11/15] vmx: Add a global wake-up vector for VT-d Posted-Interrupts

2015-05-08 Thread Feng Wu
This patch adds a global vector which is used to wake up
the blocked vCPU when an interrupt is being posted to it.

Signed-off-by: Feng Wu 
Suggested-by: Yang Zhang 
---
 xen/arch/x86/hvm/vmx/vmx.c| 31 +++
 xen/include/asm-x86/hvm/hvm.h |  1 +
 xen/include/asm-x86/hvm/vmx/vmx.h |  3 +++
 xen/include/xen/sched.h   |  2 ++
 4 files changed, 37 insertions(+)

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 2451ca5..0e71d7e 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -90,6 +90,7 @@ DEFINE_PER_CPU(struct list_head, blocked_vcpu);
 DEFINE_PER_CPU(spinlock_t, blocked_vcpu_lock);
 
 uint8_t __read_mostly posted_intr_vector;
+uint8_t __read_mostly pi_wakeup_vector;
 
 static int vmx_domain_initialise(struct domain *d)
 {
@@ -132,6 +133,8 @@ static int vmx_vcpu_initialise(struct vcpu *v)
 if ( v->vcpu_id == 0 )
 v->arch.user_regs.eax = 1;
 
+INIT_LIST_HEAD(&v->blocked_vcpu_list);
+
 return 0;
 }
 
@@ -1835,11 +1838,17 @@ const struct hvm_function_table * __init start_vmx(void)
 }
 
 if ( cpu_has_vmx_posted_intr_processing )
+{
 alloc_direct_apic_vector(&posted_intr_vector, event_check_interrupt);
+
+if ( iommu_intpost )
+alloc_direct_apic_vector(&pi_wakeup_vector, pi_wakeup_interrupt);
+}
 else
 {
 vmx_function_table.deliver_posted_intr = NULL;
 vmx_function_table.sync_pir_to_irr = NULL;
+vmx_function_table.pi_desc_update = NULL;
 }
 
 if ( cpu_has_vmx_ept
@@ -3262,6 +3271,28 @@ void vmx_vmenter_helper(const struct cpu_user_regs *regs)
 }
 
 /*
+ * Handle VT-d posted-interrupt when VCPU is blocked.
+ */
+void pi_wakeup_interrupt(struct cpu_user_regs *regs)
+{
+struct vcpu *v;
+unsigned int cpu = smp_processor_id();
+
+spin_lock(&per_cpu(blocked_vcpu_lock, cpu));
+list_for_each_entry(v, &per_cpu(blocked_vcpu, cpu),
+blocked_vcpu_list) {
+struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc;
+
+if ( pi_desc->on == 1 )
+tasklet_schedule(&v->vcpu_wakeup_tasklet);
+}
+spin_unlock(&per_cpu(blocked_vcpu_lock, cpu));
+
+ack_APIC_irq();
+this_cpu(irq_count)++;
+}
+
+/*
  * Local variables:
  * mode: C
  * c-file-style: "BSD"
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 77eeac5..e621c30 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -195,6 +195,7 @@ struct hvm_function_table {
 void (*deliver_posted_intr)(struct vcpu *v, u8 vector);
 void (*sync_pir_to_irr)(struct vcpu *v);
 void (*handle_eoi)(u8 vector);
+void (*pi_desc_update)(struct vcpu *v, int old_state);
 
 /*Walk nested p2m  */
 int (*nhvm_hap_walk_L1_p2m)(struct vcpu *v, paddr_t L2_gpa,
diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h 
b/xen/include/asm-x86/hvm/vmx/vmx.h
index 0d11f9c..3adf776 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -34,6 +34,7 @@ DECLARE_PER_CPU(struct list_head, blocked_vcpu);
 DECLARE_PER_CPU(spinlock_t, blocked_vcpu_lock);
 
 extern uint8_t posted_intr_vector;
+extern uint8_t pi_wakeup_vector;
 
 typedef union {
 struct {
@@ -552,6 +553,8 @@ int alloc_p2m_hap_data(struct p2m_domain *p2m);
 void free_p2m_hap_data(struct p2m_domain *p2m);
 void p2m_init_hap_data(struct p2m_domain *p2m);
 
+void pi_wakeup_interrupt(struct cpu_user_regs *regs);
+
 /* EPT violation qualifications definitions */
 #define _EPT_READ_VIOLATION 0
 #define EPT_READ_VIOLATION  (1UL<<_EPT_READ_VIOLATION)
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index fd9e01e..4a7e6b3 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -148,6 +148,8 @@ struct vcpu
 
 struct vcpu *next_in_list;
 
+struct list_head blocked_vcpu_list;
+
 s_time_t periodic_period;
 s_time_t periodic_last_event;
 struct timer periodic_timer;
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC v2 12/15] vmx: Properly handle notification event when vCPU is running

2015-05-08 Thread Feng Wu
When a vCPU is running in Root mode and a notification event
has been injected to it. we need to set VCPU_KICK_SOFTIRQ for
the current cpu, so the pending interrupt in PIRR will be
synced to vIRR before VM-Exit in time.

Signed-off-by: Feng Wu 
---
 xen/arch/x86/hvm/vmx/vmx.c| 55 ++-
 xen/include/asm-x86/hvm/vmx/vmx.h |  1 +
 2 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 0e71d7e..556a584 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1839,7 +1839,7 @@ const struct hvm_function_table * __init start_vmx(void)
 
 if ( cpu_has_vmx_posted_intr_processing )
 {
-alloc_direct_apic_vector(&posted_intr_vector, event_check_interrupt);
+alloc_direct_apic_vector(&posted_intr_vector, 
pi_notification_interrupt);
 
 if ( iommu_intpost )
 alloc_direct_apic_vector(&pi_wakeup_vector, pi_wakeup_interrupt);
@@ -3293,6 +3293,59 @@ void pi_wakeup_interrupt(struct cpu_user_regs *regs)
 }
 
 /*
+ * Handle VT-d posted-interrupt when VCPU is running.
+ */
+
+void pi_notification_interrupt(struct cpu_user_regs *regs)
+{
+/*
+ * We get here when a vCPU is running in root-mode
+ * (such as via hypercall, or any other reasons which
+ * can result in VM-Exit), and before vCPU is back to
+ * non-root, external interrupts from an assigned
+ * device happen and a notification event is delivered
+ * to this logical CPU.
+ *
+ * we need to set VCPU_KICK_SOFTIRQ for the current
+ * cpu, just like __vmx_deliver_posted_interrupt().
+ *
+ * So the pending interrupt in PIRR will be synced to
+ * vIRR before VM-Exit in time.
+ *
+ * Please refer to the following code fragments from
+ * xen/arch/x86/hvm/vmx/entry.S:
+ *
+ * .Lvmx_do_vmentry
+ *
+ *  ..
+ *  point 1
+ *
+ *  cmp  %ecx,(%rdx,%rax,1)
+ *  jnz  .Lvmx_process_softirqs
+ *
+ *  ..
+ *
+ *  je   .Lvmx_launch
+ *
+ *  ..
+ *
+ * .Lvmx_process_softirqs:
+ *  sti
+ *  call do_softirq
+ *  jmp  .Lvmx_do_vmentry
+ *
+ *  If VT-d engine issues a notification event at
+ *  point 1 above, it cannot be delivered to the guest
+ *  during this VM-entry without raising the softirq
+ *  in this notification handler.
+ */
+raise_softirq(VCPU_KICK_SOFTIRQ);
+
+ack_APIC_irq();
+this_cpu(irq_count)++;
+}
+
+/*
  * Local variables:
  * mode: C
  * c-file-style: "BSD"
diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h 
b/xen/include/asm-x86/hvm/vmx/vmx.h
index 3adf776..89c0f56 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -554,6 +554,7 @@ void free_p2m_hap_data(struct p2m_domain *p2m);
 void p2m_init_hap_data(struct p2m_domain *p2m);
 
 void pi_wakeup_interrupt(struct cpu_user_regs *regs);
+void pi_notification_interrupt(struct cpu_user_regs *regs);
 
 /* EPT violation qualifications definitions */
 #define _EPT_READ_VIOLATION 0
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC v2 08/15] Update IRTE according to guest interrupt config changes

2015-05-08 Thread Feng Wu
When guest changes its interrupt configuration (such as, vector, etc.)
for direct-assigned devices, we need to update the associated IRTE
with the new guest vector, so external interrupts from the assigned
devices can be injected to guests without VM-Exit.

For lowest-priority interrupts, we use vector-hashing mechamisn to find
the destination vCPU. This follows the hardware behavior, since modern
Intel CPUs use vector hashing to handle the lowest-priority interrupt.

For multicast/broadcast vCPU, we cannot handle it via interrupt posting,
still use interrupt remapping.

Signed-off-by: Feng Wu 
---
 xen/drivers/passthrough/io.c | 99 +++-
 1 file changed, 98 insertions(+), 1 deletion(-)

diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index 9b77334..7b1c094 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -26,6 +26,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static DEFINE_PER_CPU(struct list_head, dpci_list);
 
@@ -199,6 +200,73 @@ void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci)
 xfree(dpci);
 }
 
+/*
+ * The purpose of this routine is to find the right destination vCPU for
+ * an interrupt which will be delivered by VT-d posted-interrupt. There
+ * are several cases as below:
+ *
+ * - For lowest-priority interrupts, we find the destination vCPU from the
+ *   guest vector using vector-hashing mechanism and return true. This follows
+ *   the hardware behavior, since modern Intel CPUs use vector hashing to
+ *   handle the lowest-priority interrupt.
+ * - Otherwise, for single destination interrupt, it is straightforward to
+ *   find the destination vCPU and return true.
+ * - For multicast/broadcast vCPU, we cannot handle it via interrupt posting,
+ *   so return false.
+ *
+ *   Here is the details about the vector-hashing mechanism:
+ *   1. For lowest-priority interrupts, store all the possible destination
+ *  vCPUs in an array.
+ *   2. Use "gvec % max number of destination vCPUs" to find the right
+ *  destination vCPU in the array for the lowest-priority interrupt.
+ */
+static bool_t pi_find_dest_vcpu(struct domain *d, uint8_t dest_id,
+uint8_t dest_mode, uint8_t delivery_mode,
+uint8_t gvec, struct vcpu **dest_vcpu)
+{
+struct vcpu *v, **dest_vcpu_array;
+unsigned int dest_vcpu_num = 0;
+int ret;
+
+dest_vcpu_array = xzalloc_array(struct vcpu *, d->max_vcpus);
+if ( !dest_vcpu_array )
+{
+dprintk(XENLOG_G_INFO,
+"dom%d: failed to allocate memeory.\n", d->domain_id);
+return 0;
+}
+
+for_each_vcpu ( d, v )
+{
+if ( !vlapic_match_dest(vcpu_vlapic(v), NULL, 0,
+dest_id, dest_mode) )
+continue;
+
+dest_vcpu_array[dest_vcpu_num++] = v;
+}
+
+if ( delivery_mode == dest_LowestPrio )
+{
+if (  dest_vcpu_num != 0 )
+{
+*dest_vcpu = dest_vcpu_array[gvec % dest_vcpu_num];
+ret = 1;
+}
+else
+ret = 0;
+}
+else if (  dest_vcpu_num == 1 )
+{
+*dest_vcpu = dest_vcpu_array[0];
+ret = 1;
+}
+else
+ret = 0;
+
+xfree(dest_vcpu_array);
+return ret;
+}
+
 int pt_irq_create_bind(
 struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind)
 {
@@ -257,7 +325,7 @@ int pt_irq_create_bind(
 {
 case PT_IRQ_TYPE_MSI:
 {
-uint8_t dest, dest_mode;
+uint8_t dest, dest_mode, delivery_mode;
 int dest_vcpu_id;
 
 if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
@@ -330,11 +398,40 @@ int pt_irq_create_bind(
 /* Calculate dest_vcpu_id for MSI-type pirq migration. */
 dest = pirq_dpci->gmsi.gflags & VMSI_DEST_ID_MASK;
 dest_mode = !!(pirq_dpci->gmsi.gflags & VMSI_DM_MASK);
+delivery_mode = (pirq_dpci->gmsi.gflags >> GFLAGS_SHIFT_DELIV_MODE) &
+VMSI_DELIV_MASK;
 dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode);
 pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id;
 spin_unlock(&d->event_lock);
 if ( dest_vcpu_id >= 0 )
 hvm_migrate_pirqs(d->vcpu[dest_vcpu_id]);
+
+/* Use interrupt posting if it is supported */
+if ( iommu_intpost )
+{
+struct vcpu *vcpu = NULL;
+
+if ( !pi_find_dest_vcpu(d, dest, dest_mode, delivery_mode,
+pirq_dpci->gmsi.gvec, &vcpu) )
+{
+dprintk(XENLOG_G_WARNING,
+"%pv: failed to find the dest vCPU for PI, guest "
+"vector:%u use software way to deliver the "
+" interrupts.\n", vcpu, pirq_dpci->gmsi.gvec)

[Xen-devel] [RFC v2 02/15] iommu: Add iommu_intpost to control VT-d Posted-Interrupts feature

2015-05-08 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

This patch adds variable 'iommu_intpost' to control whether enable VT-d
posted-interrupt or not in the generic IOMMU code.

Signed-off-by: Feng Wu 
---
 xen/drivers/passthrough/iommu.c | 11 ++-
 xen/include/xen/iommu.h |  2 +-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 92ea26f..302e3e4 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -39,6 +39,7 @@ static void iommu_dump_p2m_table(unsigned char key);
  *   no-snoop   Disable VT-d Snoop Control
  *   no-qinval  Disable VT-d Queued Invalidation
  *   no-intremapDisable VT-d Interrupt Remapping
+ *   no-intpost Disable VT-d Interrupt posting
  */
 custom_param("iommu", parse_iommu_param);
 bool_t __initdata iommu_enable = 1;
@@ -51,6 +52,7 @@ bool_t __read_mostly iommu_passthrough;
 bool_t __read_mostly iommu_snoop = 1;
 bool_t __read_mostly iommu_qinval = 1;
 bool_t __read_mostly iommu_intremap = 1;
+bool_t __read_mostly iommu_intpost = 0;
 bool_t __read_mostly iommu_hap_pt_share = 1;
 bool_t __read_mostly iommu_debug;
 bool_t __read_mostly amd_iommu_perdev_intremap = 1;
@@ -94,7 +96,11 @@ static void __init parse_iommu_param(char *s)
 else if ( !strcmp(s, "qinval") )
 iommu_qinval = val;
 else if ( !strcmp(s, "intremap") )
+{
 iommu_intremap = val;
+if ( iommu_intremap == 0 )
+iommu_intpost = 0;
+}
 else if ( !strcmp(s, "debug") )
 {
 iommu_debug = val;
@@ -272,7 +278,10 @@ int __init iommu_setup(void)
 iommu_enabled = (rc == 0);
 }
 if ( !iommu_enabled )
+{
 iommu_intremap = 0;
+iommu_intpost = 0;
+}
 
 if ( (force_iommu && !iommu_enabled) ||
  (force_intremap && !iommu_intremap) )
@@ -341,7 +350,7 @@ void iommu_crash_shutdown(void)
 const struct iommu_ops *ops = iommu_get_ops();
 if ( iommu_enabled )
 ops->crash_shutdown();
-iommu_enabled = iommu_intremap = 0;
+iommu_enabled = iommu_intremap = iommu_intpost = 0;
 }
 
 bool_t iommu_has_feature(struct domain *d, enum iommu_feature feature)
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index bf4aff0..91063bb 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -31,7 +31,7 @@
 extern bool_t iommu_enable, iommu_enabled;
 extern bool_t force_iommu, iommu_verbose;
 extern bool_t iommu_workaround_bios_bug, iommu_passthrough;
-extern bool_t iommu_snoop, iommu_qinval, iommu_intremap;
+extern bool_t iommu_snoop, iommu_qinval, iommu_intremap, iommu_intpost;
 extern bool_t iommu_hap_pt_share;
 extern bool_t iommu_debug;
 extern bool_t amd_iommu_perdev_intremap;
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC v2 13/15] Update Posted-Interrupts Descriptor during vCPU scheduling

2015-05-08 Thread Feng Wu
The basic idea here is:
1. When vCPU's state is RUNSTATE_running,
- set 'NV' to 'Notification Vector'.
- Clear 'SN' to accpet PI.
- set 'NDST' to the right pCPU.
2. When vCPU's state is RUNSTATE_blocked,
- set 'NV' to 'Wake-up Vector', so we can wake up the
  related vCPU when posted-interrupt happens for it.
- Clear 'SN' to accpet PI.
3. When vCPU's state is RUNSTATE_runnable/RUNSTATE_offline,
- Set 'SN' to suppress non-urgent interrupts.
  (Current, we only support non-urgent interrupts)
- Set 'NV' back to 'Notification Vector' if needed.

Signed-off-by: Feng Wu 
---
 xen/arch/x86/hvm/vmx/vmx.c | 130 +
 xen/common/schedule.c  |   5 ++
 xen/include/xen/sched.h|   2 +
 3 files changed, 137 insertions(+)

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 556a584..cdcc012 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1711,6 +1711,131 @@ static void vmx_handle_eoi(u8 vector)
 __vmwrite(GUEST_INTR_STATUS, status);
 }
 
+static void vmx_pi_desc_update(struct vcpu *v, int old_state)
+{
+struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc;
+struct pi_desc old, new;
+unsigned long flags;
+
+if ( !iommu_intpost )
+return;
+
+switch ( v->runstate.state )
+{
+case RUNSTATE_runnable:
+case RUNSTATE_offline:
+/*
+ * We don't need to send notification event to a non-running
+ * vcpu, the interrupt information will be delivered to it before
+ * VM-ENTRY when the vcpu is scheduled to run next time.
+ */
+pi_desc->sn = 1;
+
+/*
+ * If the state is transferred from RUNSTATE_blocked,
+ * we should set 'NV' feild back to posted_intr_vector,
+ * so the Posted-Interrupts can be delivered to the vCPU
+ * by VT-d HW after it is scheduled to run.
+ */
+if ( old_state == RUNSTATE_blocked )
+{
+do
+{
+old.control = new.control = pi_desc->control;
+new.nv = posted_intr_vector;
+}
+while ( cmpxchg(&pi_desc->control, old.control, new.control)
+!= old.control );
+
+   /*
+* Delete the vCPU from the related block list
+* if we are resuming from blocked state
+*/
+   spin_lock_irqsave(&per_cpu(blocked_vcpu_lock,
+ v->pre_pcpu), flags);
+   list_del(&v->blocked_vcpu_list);
+   spin_unlock_irqrestore(&per_cpu(blocked_vcpu_lock,
+  v->pre_pcpu), flags);
+}
+break;
+
+case RUNSTATE_blocked:
+/*
+ * The vCPU is blocked on the block list.
+ * Add the blocked vCPU on the list of the
+ * vcpu->pre_pcpu, which is the destination
+ * of the wake-up notification event.
+ */
+v->pre_pcpu = v->processor;
+spin_lock_irqsave(&per_cpu(blocked_vcpu_lock,
+  v->pre_pcpu), flags);
+list_add_tail(&v->blocked_vcpu_list,
+  &per_cpu(blocked_vcpu, v->pre_pcpu));
+spin_unlock_irqrestore(&per_cpu(blocked_vcpu_lock,
+   v->pre_pcpu), flags);
+
+do
+{
+old.control = new.control = pi_desc->control;
+
+/*
+ * We should not block the vCPU if
+ * an interrupt was posted for it.
+ */
+
+if ( old.on == 1 )
+{
+/*
+ * The vCPU will be removed from the block list
+ * during its state transferring from RUNSTATE_blocked
+ * to RUNSTATE_runnable after the following tasklet
+ * is scheduled to run.
+ */
+tasklet_schedule(&v->vcpu_wakeup_tasklet);
+return;
+}
+
+/*
+ * Change the 'NDST' field to v->pre_pcpu, so when
+ * external interrupts from assigned deivces happen,
+ * wakeup notifiction event will go to v->pre_pcpu,
+ * then in pi_wakeup_interrupt() we can find the
+ * vCPU in the right list to wake up.
+ */
+if ( x2apic_enabled )
+new.ndst = cpu_physical_id(v->pre_pcpu);
+else
+new.ndst = MASK_INSR(cpu_physical_id(v->pre_pcpu),
+ PI_xAPIC_NDST_MASK);
+new.sn = 0;
+new.nv = pi_wakeup_vector;
+}
+while ( cmpxchg(&pi_desc->control, old.contro

[Xen-devel] [RFC v2 01/15] Vt-d Posted-intterrupt (PI) design

2015-05-08 Thread Feng Wu
Add the design doc for VT-d PI.

Signed-off-by: Feng Wu 
---
 docs/misc/vtd-pi.txt | 332 +++
 1 file changed, 332 insertions(+)
 create mode 100644 docs/misc/vtd-pi.txt

diff --git a/docs/misc/vtd-pi.txt b/docs/misc/vtd-pi.txt
new file mode 100644
index 000..b1444c3
--- /dev/null
+++ b/docs/misc/vtd-pi.txt
@@ -0,0 +1,332 @@
+Authors: Feng Wu 
+
+VT-d Posted-interrupt (PI) design for XEN
+
+Background
+==
+With the development of virtualization, there are more and more device
+assignment requirements. However, today when a VM is running with
+assigned devices (such as, NIC), external interrupt handling for the assigned
+devices always needs VMM intervention.
+
+VT-d Posted-interrupt is a more enhanced method to handle interrupts
+in the virtualization environment. Interrupt posting is the process by
+which an interrupt request is recorded in a memory-resident
+posted-interrupt-descriptor structure by the root-complex, followed by
+an optional notification event issued to the CPU complex.
+
+With VT-d Posted-interrupt we can get the following advantages:
+- Direct delivery of external interrupts to running vCPUs without VMM
+intervention
+- Decrease the interrupt migration complexity. On vCPU migration, software
+can atomically co-migrate all interrupts targeting the migrating vCPU. For
+virtual machines with assigned devices, migrating a vCPU across pCPUs
+either incur the overhead of forwarding interrupts in software (e.g. via VMM
+generated IPIS), or complexity to independently migrate each interrupt 
targeting
+the vCPU to the new pCPU. However, after enabling VT-d PI, the destination vCPU
+of an external interrupt from assigned devices is stored in the IRTE (i.e.
+Posted-interrupt Descriptor Address), when vCPU is migrated to another pCPU,
+we will set this new pCPU in the 'NDST' filed of Posted-interrupt descriptor, 
this
+make the interrupt migration automatic.
+
+Here is what Xen currently does for external interrupts from assigned devices:
+
+When a VM is running and an external interrupt from an assigned device occurs
+for it. VM-EXIT happens, then:
+
+vmx_do_extint() --> do_IRQ() --> __do_IRQ_guest() --> hvm_do_IRQ_dpci() -->
+raise_softirq_for(pirq_dpci) --> raise_softirq(HVM_DPCI_SOFTIRQ)
+
+softirq HVM_DPCI_SOFTIRQ is bound to dpci_softirq()
+
+dpci_softirq() --> hvm_dirq_assist() --> vmsi_deliver_pirq() --> 
vmsi_deliver() -->
+vmsi_inj_irq() --> vlapic_set_irq()
+
+vlapic_set_irq() does the following things:
+1. If CPU-side posted-interrupt is supported, call vmx_deliver_posted_intr() 
to deliver
+the virtual interrupt via posted-interrupt infrastructure.
+2. Else if CPU-side posted-interrupt is not supported, set the related vIRR in 
vLAPIC
+page and call vcpu_kick() to kick the related vCPU. Before VM-Entry, 
vmx_intr_assist()
+will help to inject the interrupt to guests.
+
+However, after VT-d PI is supported, when a guest is running in non-root and an
+external interrupt from an assigned device occurs for it. No VM-Exit is needed,
+the guest can handle this totally in non-root mode, thus avoiding all the above
+code flow.
+
+Posted-interrupt Introduction
+
+There are two components to the Posted-interrupt architecture:
+Processor Support and Root-Complex Support
+
+- Processor Support
+Posted-interrupt processing is a feature by which a processor processes
+the virtual interrupts by recording them as pending on the virtual-APIC
+page.
+
+Posted-interrupt processing is enabled by setting the process posted
+interrupts VM-execution control. The processing is performed in response
+to the arrival of an interrupt with the posted-interrupt notification vector.
+In response to such an interrupt, the processor processes virtual interrupts
+recorded in a data structure called a posted-interrupt descriptor.
+
+More information about APICv and CPU-side Posted-interrupt, please refer
+to Chapter 29, and Section 29.6 in the Intel SDM:
+http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf
+
+- Root-Complex Support
+Interrupt posting is the process by which an interrupt request (from IOAPIC
+or MSI/MSIx capable sources) is recorded in a memory-resident
+posted-interrupt-descriptor structure by the root-complex, followed by
+an optional notification event issued to the CPU complex. The interrupt
+request arriving at the root-complex carry the identity of the interrupt
+request source and a 'remapping-index'. The remapping-index is used to
+look-up an entry from the memory-resident interrupt-remap-table. Unlike
+with interrupt-remapping, the interrupt-remap-table-entry for a posted-
+interrupt, specifies a virtual-vector and a pointer to the posted-interrupt
+descriptor. The virtual-vector specifies the vector of the interrupt to be
+recorded in the posted-interrupt descriptor. The posted-interrupt

[Xen-devel] [RFC v2 07/15] vt-d: Add API to update IRTE when VT-d PI is used

2015-05-08 Thread Feng Wu
This patch adds an API which is used to update the IRTE
for posted-interrupt when guest changes MSI/MSI-X information.

Signed-off-by: Feng Wu 
---
 xen/drivers/passthrough/vtd/intremap.c | 98 ++
 xen/drivers/passthrough/vtd/iommu.h|  4 ++
 xen/include/asm-x86/iommu.h|  2 +
 3 files changed, 104 insertions(+)

diff --git a/xen/drivers/passthrough/vtd/intremap.c 
b/xen/drivers/passthrough/vtd/intremap.c
index 5ec76b4..dd1a3d8 100644
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -898,3 +898,101 @@ void iommu_disable_x2apic_IR(void)
 for_each_drhd_unit ( drhd )
 disable_qinval(drhd->iommu);
 }
+
+static inline void setup_posted_irte(
+struct iremap_entry *new_ire, struct pi_desc *pi_desc, uint8_t gvec)
+{
+new_ire->post.urg = 0;
+new_ire->post.vector = gvec;
+new_ire->post.pda_l = (((u64)virt_to_maddr(pi_desc)) >>
+   (32 - PDA_LOW_BIT)) & PDA_MASK(LOW);
+new_ire->post.pda_h = (((u64)virt_to_maddr(pi_desc)) >> 32) &
+   PDA_MASK(HIGH);
+
+new_ire->post.res_1 = 0;
+new_ire->post.res_2 = 0;
+new_ire->post.res_3 = 0;
+new_ire->post.res_4 = 0;
+
+new_ire->post.im = 1;
+}
+
+/*
+ * This function is used to update the IRTE for posted-interrupt
+ * when guest changes MSI/MSI-X information.
+ */
+bool_t pi_update_irte(struct vcpu *v, struct pirq *pirq, uint8_t gvec)
+{
+struct irq_desc *desc;
+struct msi_desc *msi_desc;
+int remap_index;
+bool_t rc = 0;
+struct pci_dev *pci_dev;
+struct acpi_drhd_unit *drhd;
+struct iommu *iommu;
+struct ir_ctrl *ir_ctrl;
+struct iremap_entry *iremap_entries = NULL, *p = NULL;
+struct iremap_entry new_ire;
+struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc;
+unsigned long flags;
+
+desc = pirq_spin_lock_irq_desc(pirq, NULL);
+if ( !desc )
+return 0;
+
+msi_desc = desc->msi_desc;
+if ( !msi_desc )
+goto unlock_out;
+
+pci_dev = msi_desc->dev;
+if ( !pci_dev )
+goto unlock_out;
+
+remap_index = msi_desc->remap_index;
+drhd = acpi_find_matched_drhd_unit(pci_dev);
+if ( !drhd )
+{
+dprintk(XENLOG_INFO VTDPREFIX,
+"%pv: failed to get drhd, pci device: "
+"%04x:%02x:%02x.%u, guest vector: %u\n",
+v, pci_dev->seg, pci_dev->bus, PCI_SLOT(pci_dev->devfn),
+PCI_FUNC(pci_dev->devfn), gvec);
+goto unlock_out;
+}
+
+iommu = drhd->iommu;
+ir_ctrl = iommu_ir_ctrl(iommu);
+if ( !ir_ctrl )
+{
+dprintk(XENLOG_INFO VTDPREFIX,
+"%pv: failed to get ir_ctrl, pci device: "
+"%04x:%02x:%02x.%u, guest vector: %u\n",
+v, pci_dev->seg, pci_dev->bus, PCI_SLOT(pci_dev->devfn),
+PCI_FUNC(pci_dev->devfn), gvec);
+goto unlock_out;
+}
+
+spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
+
+GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, remap_index, iremap_entries, p);
+
+memcpy(&new_ire, p, sizeof(new_ire));
+
+/* Setup/Update interrupt remapping table entry. */
+setup_posted_irte(&new_ire, pi_desc, gvec);
+
+memcpy(p, &new_ire, sizeof(new_ire));
+iommu_flush_cache_entry(p, sizeof(struct iremap_entry));
+iommu_flush_iec_index(iommu, 0, remap_index);
+
+if ( iremap_entries )
+unmap_vtd_domain_page(iremap_entries);
+
+spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
+
+rc = 1;
+ unlock_out:
+spin_unlock_irq(&desc->lock);
+
+return rc;
+}
diff --git a/xen/drivers/passthrough/vtd/iommu.h 
b/xen/drivers/passthrough/vtd/iommu.h
index 77a9227..f41b4e2 100644
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -327,6 +327,10 @@ struct iremap_entry {
   };
 };
 
+#define PDA_LOW_BIT26
+#define PDA_HIGH_BIT   32
+#define PDA_MASK(XX)   (~(-1UL << PDA_##XX##_BIT))
+
 /* Max intr remapping table page order is 8, as max number of IRTEs is 64K */
 #define IREMAP_PAGE_ORDER  8
 
diff --git a/xen/include/asm-x86/iommu.h b/xen/include/asm-x86/iommu.h
index e7a65da..1528af8 100644
--- a/xen/include/asm-x86/iommu.h
+++ b/xen/include/asm-x86/iommu.h
@@ -32,6 +32,8 @@ int iommu_supports_eim(void);
 int iommu_enable_x2apic_IR(void);
 void iommu_disable_x2apic_IR(void);
 
+bool_t pi_update_irte(struct vcpu *v, struct pirq *pirq, uint8_t gvec);
+
 #endif /* !__ARCH_X86_IOMMU_H__ */
 /*
  * Local variables:
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC v2 10/15] vmx: Define two per-cpu variables

2015-05-08 Thread Feng Wu
This patch defines two per-cpu variables:

blocked_vcpu:
A list storing the vCPUs which were blocked on this pCPU.

blocked_vcpu_lock:
The spinlock to protect blocked_vcpu.

Signed-off-by: Feng Wu 
---
 xen/arch/x86/hvm/vmx/vmcs.c   | 3 +++
 xen/arch/x86/hvm/vmx/vmx.c| 7 +++
 xen/include/asm-x86/hvm/vmx/vmx.h | 3 +++
 3 files changed, 13 insertions(+)

diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index f60a454..8166f08 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -585,6 +585,9 @@ int vmx_cpu_up(void)
 if ( cpu_has_vmx_vpid )
 vpid_sync_all();
 
+INIT_LIST_HEAD(&per_cpu(blocked_vcpu, cpu));
+spin_lock_init(&per_cpu(blocked_vcpu_lock, cpu));
+
 return 0;
 }
 
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 6c4f78c..2451ca5 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -82,6 +82,13 @@ static int vmx_msr_read_intercept(unsigned int msr, uint64_t 
*msr_content);
 static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content);
 static void vmx_invlpg_intercept(unsigned long vaddr);
 
+/*
+ * We maintian a per-CPU linked-list of vCPU, so in PI wakeup handler we
+ * can find which vCPU should be waken up.
+ */
+DEFINE_PER_CPU(struct list_head, blocked_vcpu);
+DEFINE_PER_CPU(spinlock_t, blocked_vcpu_lock);
+
 uint8_t __read_mostly posted_intr_vector;
 
 static int vmx_domain_initialise(struct domain *d)
diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h 
b/xen/include/asm-x86/hvm/vmx/vmx.h
index e4292cc..0d11f9c 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -30,6 +30,9 @@
 #include 
 #include 
 
+DECLARE_PER_CPU(struct list_head, blocked_vcpu);
+DECLARE_PER_CPU(spinlock_t, blocked_vcpu_lock);
+
 extern uint8_t posted_intr_vector;
 
 typedef union {
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC v2 06/15] vt-d: Extend struct iremap_entry to support VT-d Posted-Interrupts

2015-05-08 Thread Feng Wu
Extend struct iremap_entry according to VT-d Posted-Interrupts Spec.

Signed-off-by: Feng Wu 
---
 xen/drivers/passthrough/vtd/intremap.c | 90 +-
 xen/drivers/passthrough/vtd/iommu.h| 26 +++---
 xen/drivers/passthrough/vtd/utils.c| 10 ++--
 3 files changed, 69 insertions(+), 57 deletions(-)

diff --git a/xen/drivers/passthrough/vtd/intremap.c 
b/xen/drivers/passthrough/vtd/intremap.c
index 0333686..5ec76b4 100644
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -123,9 +123,9 @@ static u16 hpetid_to_bdf(unsigned int hpet_id)
 static void set_ire_sid(struct iremap_entry *ire,
 unsigned int svt, unsigned int sq, unsigned int sid)
 {
-ire->hi.svt = svt;
-ire->hi.sq = sq;
-ire->hi.sid = sid;
+ire->remap.svt = svt;
+ire->remap.sq = sq;
+ire->remap.sid = sid;
 }
 
 static void set_ioapic_source_id(int apic_id, struct iremap_entry *ire)
@@ -220,7 +220,7 @@ static unsigned int alloc_remap_entry(struct iommu *iommu, 
unsigned int nr)
 else
 p = &iremap_entries[i % (1 << IREMAP_ENTRY_ORDER)];
 
-if ( p->lo_val || p->hi_val ) /* not a free entry */
+if ( p->lo || p->hi ) /* not a free entry */
 found = 0;
 else if ( ++found == nr )
 break;
@@ -254,7 +254,7 @@ static int remap_entry_to_ioapic_rte(
 GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
  iremap_entries, iremap_entry);
 
-if ( iremap_entry->hi_val == 0 && iremap_entry->lo_val == 0 )
+if ( iremap_entry->hi == 0 && iremap_entry->lo == 0 )
 {
 dprintk(XENLOG_ERR VTDPREFIX,
 "%s: index (%d) get an empty entry!\n",
@@ -264,13 +264,13 @@ static int remap_entry_to_ioapic_rte(
 return -EFAULT;
 }
 
-old_rte->vector = iremap_entry->lo.vector;
-old_rte->delivery_mode = iremap_entry->lo.dlm;
-old_rte->dest_mode = iremap_entry->lo.dm;
-old_rte->trigger = iremap_entry->lo.tm;
+old_rte->vector = iremap_entry->remap.vector;
+old_rte->delivery_mode = iremap_entry->remap.dlm;
+old_rte->dest_mode = iremap_entry->remap.dm;
+old_rte->trigger = iremap_entry->remap.tm;
 old_rte->__reserved_2 = 0;
 old_rte->dest.logical.__reserved_1 = 0;
-old_rte->dest.logical.logical_dest = iremap_entry->lo.dst >> 8;
+old_rte->dest.logical.logical_dest = iremap_entry->remap.dst >> 8;
 
 unmap_vtd_domain_page(iremap_entries);
 spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
@@ -318,27 +318,27 @@ static int ioapic_rte_to_remap_entry(struct iommu *iommu,
 if ( rte_upper )
 {
 if ( x2apic_enabled )
-new_ire.lo.dst = value;
+new_ire.remap.dst = value;
 else
-new_ire.lo.dst = (value >> 24) << 8;
+new_ire.remap.dst = (value >> 24) << 8;
 }
 else
 {
 *(((u32 *)&new_rte) + 0) = value;
-new_ire.lo.fpd = 0;
-new_ire.lo.dm = new_rte.dest_mode;
-new_ire.lo.tm = new_rte.trigger;
-new_ire.lo.dlm = new_rte.delivery_mode;
+new_ire.remap.fpd = 0;
+new_ire.remap.dm = new_rte.dest_mode;
+new_ire.remap.tm = new_rte.trigger;
+new_ire.remap.dlm = new_rte.delivery_mode;
 /* Hardware require RH = 1 for LPR delivery mode */
-new_ire.lo.rh = (new_ire.lo.dlm == dest_LowestPrio);
-new_ire.lo.avail = 0;
-new_ire.lo.res_1 = 0;
-new_ire.lo.vector = new_rte.vector;
-new_ire.lo.res_2 = 0;
+new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
+new_ire.remap.avail = 0;
+new_ire.remap.res_1 = 0;
+new_ire.remap.vector = new_rte.vector;
+new_ire.remap.res_2 = 0;
 
 set_ioapic_source_id(IO_APIC_ID(apic), &new_ire);
-new_ire.hi.res_1 = 0;
-new_ire.lo.p = 1; /* finally, set present bit */
+new_ire.remap.res_3 = 0;
+new_ire.remap.p = 1; /* finally, set present bit */
 
 /* now construct new ioapic rte entry */
 remap_rte->vector = new_rte.vector;
@@ -511,7 +511,7 @@ static int remap_entry_to_msi_msg(
 GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
  iremap_entries, iremap_entry);
 
-if ( iremap_entry->hi_val == 0 && iremap_entry->lo_val == 0 )
+if ( iremap_entry->hi == 0 && iremap_entry->lo == 0 )
 {
 dprintk(XENLOG_ERR VTDPREFIX,
 "%s: index (%d) get an empty entry!\n",
@@ -524,25 +524,25 @@ static int remap_entry_to_msi_msg(
 msg->address_hi = MSI_ADDR_BASE_HI;
 msg->address_lo =
 MSI_ADDR_BASE_LO |
-((iremap_entry->lo.dm == 0) ?
+((irema

[Xen-devel] [RFC v2 04/15] vmx: Extend struct pi_desc to support VT-d Posted-Interrupts

2015-05-08 Thread Feng Wu
Extend struct pi_desc according to VT-d Posted-Interrupts Spec.

Signed-off-by: Feng Wu 
---
 xen/include/asm-x86/hvm/vmx/vmcs.h | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h 
b/xen/include/asm-x86/hvm/vmx/vmcs.h
index 6fce6aa..3707d1c 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -76,8 +76,19 @@ struct vmx_domain {
 
 struct pi_desc {
 DECLARE_BITMAP(pir, NR_VECTORS);
-u32 control;
-u32 rsvd[7];
+union {
+struct
+{
+u64 on : 1,  /* bit 256 - Outstanding Notification */
+sn : 1,  /* bit 257 - Suppress Notification */
+rsvd_1 : 14, /* bit 271:258 - Reserved */
+nv : 8,  /* bit 279:272 - Notification Vector */
+rsvd_2 : 8,  /* bit 287:280 - Reserved */
+ndst   : 32; /* bit 319:288 - Notification Destination */
+};
+u64 control;
+};
+u32 rsvd[6];
 } __attribute__ ((aligned (64)));
 
 #define ept_get_wl(ept)   ((ept)->ept_wl)
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC v2 15/15] Add a command line parameter for VT-d posted-interrupts

2015-05-08 Thread Feng Wu
Enable VT-d Posted-Interrupts and add a command line
parameter for it.

Signed-off-by: Feng Wu 
---
 docs/misc/xen-command-line.markdown | 9 -
 xen/drivers/passthrough/iommu.c | 8 +++-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/docs/misc/xen-command-line.markdown 
b/docs/misc/xen-command-line.markdown
index 1dda1f0..3faa073 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -821,7 +821,7 @@ debug hypervisor only).
 > Default: `new` unless directed-EOI is supported
 
 ### iommu
-> `= List of [  | force | required | intremap | qinval | snoop | 
sharept | dom0-passthrough | dom0-strict | amd-iommu-perdev-intremap | 
workaround_bios_bug | verbose | debug ]`
+> `= List of [  | force | required | intremap | intpost | qinval | 
snoop | sharept | dom0-passthrough | dom0-strict | amd-iommu-perdev-intremap | 
workaround_bios_bug | verbose | debug ]`
 
 > Sub-options:
 
@@ -848,6 +848,13 @@ debug hypervisor only).
 >> Control the use of interrupt remapping (DMA remapping will always be enabled
 >> if IOMMU functionality is enabled).
 
+> `intpost`
+
+> Default: `true`
+
+>> Control the use of interrupt posting, interrupt posting is dependant on
+>> interrupt remapping.
+
 > `qinval` (VT-d)
 
 > Default: `true`
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 302e3e4..1bda7e9 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -52,7 +52,7 @@ bool_t __read_mostly iommu_passthrough;
 bool_t __read_mostly iommu_snoop = 1;
 bool_t __read_mostly iommu_qinval = 1;
 bool_t __read_mostly iommu_intremap = 1;
-bool_t __read_mostly iommu_intpost = 0;
+bool_t __read_mostly iommu_intpost = 1;
 bool_t __read_mostly iommu_hap_pt_share = 1;
 bool_t __read_mostly iommu_debug;
 bool_t __read_mostly amd_iommu_perdev_intremap = 1;
@@ -101,6 +101,12 @@ static void __init parse_iommu_param(char *s)
 if ( iommu_intremap == 0 )
 iommu_intpost = 0;
 }
+else if ( !strcmp(s, "intpost") )
+{
+iommu_intpost = val;
+if ( iommu_intremap == 0 )
+iommu_intpost = 0;
+}
 else if ( !strcmp(s, "debug") )
 {
 iommu_debug = val;
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [v3 12/15] vmx: posted-interrupt handling when vCPU is blocked

2015-06-23 Thread Feng Wu
This patch includes the following aspects:
- Add a global vector to wake up the blocked vCPU
  when an interrupt is being posted to it (This
  part was sugguested by Yang Zhang ).
- Adds a new per-vCPU tasklet to wakeup the blocked
  vCPU. It can be used in the case vcpu_unblock
  cannot be called directly.
- Define two per-cpu variables:
  * pi_blocked_vcpu:
  A list storing the vCPUs which were blocked on this pCPU.

  * pi_blocked_vcpu_lock:
  The spinlock to protect pi_blocked_vcpu.

Signed-off-by: Feng Wu 
---
v3:
- This patch is generated by merging the following three patches in v2:
   [RFC v2 09/15] Add a new per-vCPU tasklet to wakeup the blocked vCPU
   [RFC v2 10/15] vmx: Define two per-cpu variables
   [RFC v2 11/15] vmx: Add a global wake-up vector for VT-d Posted-Interrupts
- rename 'vcpu_wakeup_tasklet' to 'pi_vcpu_wakeup_tasklet'
- Move the definition of 'pi_vcpu_wakeup_tasklet' to 'struct arch_vmx_struct'
- rename 'vcpu_wakeup_tasklet_handler' to 'pi_vcpu_wakeup_tasklet_handler'
- Make pi_wakeup_interrupt() static
- Rename 'blocked_vcpu_list' to 'pi_blocked_vcpu_list'
- move 'pi_blocked_vcpu_list' to 'struct arch_vmx_struct'
- Rename 'blocked_vcpu' to 'pi_blocked_vcpu'
- Rename 'blocked_vcpu_lock' to 'pi_blocked_vcpu_lock'

 xen/arch/x86/hvm/vmx/vmcs.c|  3 +++
 xen/arch/x86/hvm/vmx/vmx.c | 54 ++
 xen/include/asm-x86/hvm/hvm.h  |  1 +
 xen/include/asm-x86/hvm/vmx/vmcs.h |  5 
 xen/include/asm-x86/hvm/vmx/vmx.h  |  5 
 5 files changed, 68 insertions(+)

diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 11dc1b5..0c5ce3f 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -631,6 +631,9 @@ int vmx_cpu_up(void)
 if ( cpu_has_vmx_vpid )
 vpid_sync_all();
 
+INIT_LIST_HEAD(&per_cpu(pi_blocked_vcpu, cpu));
+spin_lock_init(&per_cpu(pi_blocked_vcpu_lock, cpu));
+
 return 0;
 }
 
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index b94ef6a..7db6009 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -82,7 +82,20 @@ static int vmx_msr_read_intercept(unsigned int msr, uint64_t 
*msr_content);
 static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content);
 static void vmx_invlpg_intercept(unsigned long vaddr);
 
+/*
+ * We maintian a per-CPU linked-list of vCPU, so in PI wakeup handler we
+ * can find which vCPU should be waken up.
+ */
+DEFINE_PER_CPU(struct list_head, pi_blocked_vcpu);
+DEFINE_PER_CPU(spinlock_t, pi_blocked_vcpu_lock);
+
 uint8_t __read_mostly posted_intr_vector;
+uint8_t __read_mostly pi_wakeup_vector;
+
+static void pi_vcpu_wakeup_tasklet_handler(unsigned long arg)
+{
+vcpu_unblock((struct vcpu *)arg);
+}
 
 static int vmx_domain_initialise(struct domain *d)
 {
@@ -148,11 +161,19 @@ static int vmx_vcpu_initialise(struct vcpu *v)
 if ( v->vcpu_id == 0 )
 v->arch.user_regs.eax = 1;
 
+tasklet_init(
+&v->arch.hvm_vmx.pi_vcpu_wakeup_tasklet,
+pi_vcpu_wakeup_tasklet_handler,
+(unsigned long)v);
+
+INIT_LIST_HEAD(&v->arch.hvm_vmx.pi_blocked_vcpu_list);
+
 return 0;
 }
 
 static void vmx_vcpu_destroy(struct vcpu *v)
 {
+tasklet_kill(&v->arch.hvm_vmx.pi_vcpu_wakeup_tasklet);
 /*
  * There are cases that domain still remains in log-dirty mode when it is
  * about to be destroyed (ex, user types 'xl destroy '), in which case
@@ -1848,6 +1869,33 @@ static struct hvm_function_table __initdata 
vmx_function_table = {
 .enable_msr_exit_interception = vmx_enable_msr_exit_interception,
 };
 
+/*
+ * Handle VT-d posted-interrupt when VCPU is blocked.
+ */
+static void pi_wakeup_interrupt(struct cpu_user_regs *regs)
+{
+struct arch_vmx_struct *vmx;
+unsigned int cpu = smp_processor_id();
+
+spin_lock(&per_cpu(pi_blocked_vcpu_lock, cpu));
+
+/*
+ * FIXME: The length of the list depends on how many
+ * vCPU is current blocked on this specific pCPU.
+ * This may hurt the interrupt latency if the list
+ * grows to too many entries.
+ */
+list_for_each_entry(vmx, &per_cpu(pi_blocked_vcpu, cpu),
+pi_blocked_vcpu_list)
+if ( vmx->pi_desc.on )
+tasklet_schedule(&vmx->pi_vcpu_wakeup_tasklet);
+
+spin_unlock(&per_cpu(pi_blocked_vcpu_lock, cpu));
+
+ack_APIC_irq();
+this_cpu(irq_count)++;
+}
+
 const struct hvm_function_table * __init start_vmx(void)
 {
 set_in_cr4(X86_CR4_VMXE);
@@ -1884,11 +1932,17 @@ const struct hvm_function_table * __init start_vmx(void)
 }
 
 if ( cpu_has_vmx_posted_intr_processing )
+{
 alloc_direct_apic_vector(&posted_intr_vector, event_check_interrupt);
+
+if ( iommu_i

[Xen-devel] [v3 11/15] Update IRTE according to guest interrupt config changes

2015-06-23 Thread Feng Wu
When guest changes its interrupt configuration (such as, vector, etc.)
for direct-assigned devices, we need to update the associated IRTE
with the new guest vector, so external interrupts from the assigned
devices can be injected to guests without VM-Exit.

For lowest-priority interrupts, we use vector-hashing mechamisn to find
the destination vCPU. This follows the hardware behavior, since modern
Intel CPUs use vector hashing to handle the lowest-priority interrupt.

For multicast/broadcast vCPU, we cannot handle it via interrupt posting,
still use interrupt remapping.

Signed-off-by: Feng Wu 
---
v3:
- Use bitmap to store the all the possible destination vCPUs of an
interrupt, then trying to find the right destination from the bitmap
- Typo and some small changes

 xen/drivers/passthrough/io.c | 96 +++-
 1 file changed, 95 insertions(+), 1 deletion(-)

diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index 9b77334..18e24e1 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -26,6 +26,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static DEFINE_PER_CPU(struct list_head, dpci_list);
 
@@ -199,6 +200,78 @@ void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci)
 xfree(dpci);
 }
 
+/*
+ * The purpose of this routine is to find the right destination vCPU for
+ * an interrupt which will be delivered by VT-d posted-interrupt. There
+ * are several cases as below:
+ *
+ * - For lowest-priority interrupts, we find the destination vCPU from the
+ *   guest vector using vector-hashing mechanism and return true. This follows
+ *   the hardware behavior, since modern Intel CPUs use vector hashing to
+ *   handle the lowest-priority interrupt.
+ * - Otherwise, for single destination interrupt, it is straightforward to
+ *   find the destination vCPU and return true.
+ * - For multicast/broadcast vCPU, we cannot handle it via interrupt posting,
+ *   so return false.
+ *
+ *   Here is the details about the vector-hashing mechanism:
+ *   1. For lowest-priority interrupts, store all the possible destination
+ *  vCPUs in an array.
+ *   2. Use "gvec % max number of destination vCPUs" to find the right
+ *  destination vCPU in the array for the lowest-priority interrupt.
+ */
+static struct vcpu *pi_find_dest_vcpu(struct domain *d, uint8_t dest_id,
+  uint8_t dest_mode, uint8_t delivery_mode,
+  uint8_t gvec)
+{
+unsigned long *dest_vcpu_bitmap = NULL;
+unsigned int dest_vcpu_num = 0, idx = 0;
+int size = (d->max_vcpus + BITS_PER_LONG - 1) / BITS_PER_LONG;
+struct vcpu *v, *dest = NULL;
+int i;
+
+dest_vcpu_bitmap = xzalloc_array(unsigned long, size);
+if ( !dest_vcpu_bitmap )
+{
+dprintk(XENLOG_G_INFO,
+"dom%d: failed to allocate memory\n", d->domain_id);
+return NULL;
+}
+
+for_each_vcpu ( d, v )
+{
+if ( !vlapic_match_dest(vcpu_vlapic(v), NULL, 0,
+dest_id, dest_mode) )
+continue;
+
+__set_bit(v->vcpu_id, dest_vcpu_bitmap);
+dest_vcpu_num++;
+}
+
+if ( delivery_mode == dest_LowestPrio )
+{
+if (  dest_vcpu_num != 0 )
+{
+for ( i = 0; i <= gvec % dest_vcpu_num; i++)
+idx = find_next_bit(dest_vcpu_bitmap, d->max_vcpus, idx) + 1;
+idx--;
+
+BUG_ON(idx >= d->max_vcpus || idx < 0);
+dest = d->vcpu[idx];
+}
+}
+else if (  dest_vcpu_num == 1 )
+{
+idx = find_first_bit(dest_vcpu_bitmap, d->max_vcpus);
+BUG_ON(idx >= d->max_vcpus || idx < 0);
+dest = d->vcpu[idx];
+}
+
+xfree(dest_vcpu_bitmap);
+
+return dest;
+}
+
 int pt_irq_create_bind(
 struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind)
 {
@@ -257,7 +330,7 @@ int pt_irq_create_bind(
 {
 case PT_IRQ_TYPE_MSI:
 {
-uint8_t dest, dest_mode;
+uint8_t dest, dest_mode, delivery_mode;
 int dest_vcpu_id;
 
 if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
@@ -330,11 +403,32 @@ int pt_irq_create_bind(
 /* Calculate dest_vcpu_id for MSI-type pirq migration. */
 dest = pirq_dpci->gmsi.gflags & VMSI_DEST_ID_MASK;
 dest_mode = !!(pirq_dpci->gmsi.gflags & VMSI_DM_MASK);
+delivery_mode = (pirq_dpci->gmsi.gflags >> GFLAGS_SHIFT_DELIV_MODE) &
+VMSI_DELIV_MASK;
 dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode);
 pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id;
 spin_unlock(&d->event_lock);
 if ( dest_vcpu_id >= 0 )
 hvm_migrate_pirqs(d->vcpu[dest_vcpu_id]);
+
+/* Use interrupt posting if it is supported *

[Xen-devel] [RFC v1 08/15] Update IRTE according to guest interrupt config changes

2015-03-25 Thread Feng Wu
When guest changes its interrupt configuration (such as, vector, etc.)
for direct-assigned devices, we need to update the associated IRTE
with the new guest vector, so external interrupts from the assigned
devices can be injected to guests without VM-Exit.

For lowest-priority interrupts, we use vector-hashing mechamisn to find
the destination vCPU. This follows the hardware behavior, since modern
Intel CPUs use vector hashing to handle the lowest-priority interrupt.

For multicase/broadcast vCPU, we cannot handle it via interrupt posting,
still use interrupt remapping.

Signed-off-by: Feng Wu 
---
 xen/drivers/passthrough/io.c | 77 +++-
 1 file changed, 76 insertions(+), 1 deletion(-)

diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index ae050df..1d9a132 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -26,6 +26,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static DEFINE_PER_CPU(struct list_head, dpci_list);
 
@@ -199,6 +200,61 @@ void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci)
 xfree(dpci);
 }
 
+/*
+ * Here we handle the following cases:
+ * - For lowest-priority interrupts, we find the destination vCPU from the
+ *   guest vector using vector-hashing mechamisn and return true. This follows
+ *   the hardware behavior, since modern Intel CPUs use vector hashing to
+ *   handle the lowest-priority interrupt.
+ * - Otherwise, for single destination interrupt, it is straightforward to
+ *   find the destination vCPU and return true.
+ * - For multicase/broadcast vCPU, we cannot handle it via interrupt posting,
+ *   so return false.
+ */
+static bool_t pi_find_dest_vcpu(struct domain *d, uint8_t dest_id,
+uint8_t dest_mode, uint8_t deliver_mode,
+uint32_t gvec, struct vcpu **dest_vcpu)
+{
+struct vcpu *v, **dest_vcpu_array;
+unsigned int dest_vcpu_num = 0;
+int ret;
+
+if ( deliver_mode == dest_LowestPrio )
+dest_vcpu_array = xzalloc_array(struct vcpu *, d->max_vcpus);
+
+for_each_vcpu ( d, v )
+{
+if ( !vlapic_match_dest(vcpu_vlapic(v), NULL, 0,
+dest_id, dest_mode) )
+continue;
+
+dest_vcpu_num++;
+
+if ( deliver_mode == dest_LowestPrio )
+dest_vcpu_array[dest_vcpu_num] = v;
+else
+*dest_vcpu = v;
+}
+
+if ( deliver_mode == dest_LowestPrio )
+{
+if (  dest_vcpu_num != 0 )
+{
+*dest_vcpu = dest_vcpu_array[gvec % dest_vcpu_num];
+ret = 1;
+}
+else
+ret = 0;
+
+xfree(dest_vcpu_array);
+return ret;
+}
+else if (  dest_vcpu_num == 1 )
+return 1;
+else
+return 0;
+}
+
 int pt_irq_create_bind(
 struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind)
 {
@@ -257,7 +313,7 @@ int pt_irq_create_bind(
 {
 case PT_IRQ_TYPE_MSI:
 {
-uint8_t dest, dest_mode;
+uint8_t dest, dest_mode, deliver_mode;
 int dest_vcpu_id;
 
 if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
@@ -330,11 +386,30 @@ int pt_irq_create_bind(
 /* Calculate dest_vcpu_id for MSI-type pirq migration. */
 dest = pirq_dpci->gmsi.gflags & VMSI_DEST_ID_MASK;
 dest_mode = !!(pirq_dpci->gmsi.gflags & VMSI_DM_MASK);
+deliver_mode = (pirq_dpci->gmsi.gflags >> GFLAGS_SHIFT_DELIV_MODE) &
+VMSI_DELIV_MASK;
 dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode);
 pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id;
 spin_unlock(&d->event_lock);
 if ( dest_vcpu_id >= 0 )
 hvm_migrate_pirqs(d->vcpu[dest_vcpu_id]);
+
+/* Use interrupt posting if it is supported */
+if ( iommu_intpost )
+{
+struct vcpu *vcpu = NULL;
+
+if ( !pi_find_dest_vcpu(d, dest, dest_mode, deliver_mode,
+pirq_dpci->gmsi.gvec, &vcpu) )
+break;
+
+if ( pi_update_irte( vcpu, info, pirq_dpci->gmsi.gvec ) != 0 )
+{
+dprintk(XENLOG_G_INFO, "failed to update PI IRTE\n");
+return -EBUSY;
+}
+}
+
 break;
 }
 
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC v1 10/15] vmx: Define two per-cpu variants

2015-03-25 Thread Feng Wu
This patch defines two per-cpu variants:

blocked_vcpu_on_cpu:
A list storing the vCPUs which were blocked on this pCPU.

blocked_vcpu_on_cpu_lock:
The spinlock to protect blocked_vcpu_on_cpu.

Signed-off-by: Feng Wu 
---
 xen/arch/x86/hvm/vmx/vmcs.c   | 3 +++
 xen/arch/x86/hvm/vmx/vmx.c| 7 +++
 xen/include/asm-x86/hvm/vmx/vmx.h | 3 +++
 3 files changed, 13 insertions(+)

diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 942f4b7..1345e69 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -585,6 +585,9 @@ int vmx_cpu_up(void)
 if ( cpu_has_vmx_vpid )
 vpid_sync_all();
 
+INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
+spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+
 return 0;
 }
 
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index e1c55ce..ff5544d 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -81,6 +81,13 @@ static int vmx_msr_read_intercept(unsigned int msr, uint64_t 
*msr_content);
 static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content);
 static void vmx_invlpg_intercept(unsigned long vaddr);
 
+/*
+ * We maintian a per-CPU linked-list of vCPU, so in PI wakeup handler we
+ * can find which vCPU should be waken up.
+ */
+DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
+DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
+
 uint8_t __read_mostly posted_intr_vector;
 
 static int vmx_domain_initialise(struct domain *d)
diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h 
b/xen/include/asm-x86/hvm/vmx/vmx.h
index 3cd75eb..e643c3c 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -30,6 +30,9 @@
 #include 
 #include 
 
+DECLARE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
+DECLARE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
+
 extern uint8_t posted_intr_vector;
 
 typedef union {
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC v1 05/15] vmx: Initialize VT-d Posted-Interrupts Descriptor

2015-03-25 Thread Feng Wu
This patch initializes the VT-d Posted-interrupt Descriptor.

Signed-off-by: Feng Wu 
---
 xen/arch/x86/hvm/vmx/vmcs.c   |  3 +++
 xen/include/asm-x86/hvm/vmx/vmx.h | 21 -
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index d614638..942f4b7 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1004,6 +1004,9 @@ static int construct_vmcs(struct vcpu *v)
 
 if ( cpu_has_vmx_posted_intr_processing )
 {
+if ( iommu_intpost == 1 )
+pi_desc_init(v);
+
 __vmwrite(PI_DESC_ADDR, virt_to_maddr(&v->arch.hvm_vmx.pi_desc));
 __vmwrite(POSTED_INTR_NOTIFICATION_VECTOR, posted_intr_vector);
 }
diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h 
b/xen/include/asm-x86/hvm/vmx/vmx.h
index ecc5e17..3cd75eb 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -28,6 +28,9 @@
 #include 
 #include 
 #include 
+#include 
+
+extern uint8_t posted_intr_vector;
 
 typedef union {
 struct {
@@ -146,6 +149,23 @@ static inline unsigned long pi_get_pir(struct pi_desc 
*pi_desc, int group)
 return xchg(&pi_desc->pir[group], 0);
 }
 
+static inline void pi_desc_init(struct vcpu *v)
+{
+uint32_t dest;
+
+pi_clear_sn(&v->arch.hvm_vmx.pi_desc);
+v->arch.hvm_vmx.pi_desc.nv = posted_intr_vector;
+
+/* Physical mode for Notificaiton Event */
+v->arch.hvm_vmx.pi_desc.ndm = 0;
+dest = cpu_physical_id(v->processor);
+
+if ( x2apic_enabled )
+v->arch.hvm_vmx.pi_desc.ndst = dest;
+else
+v->arch.hvm_vmx.pi_desc.ndst = (dest << 8) & 0xFF00;
+}
+
 /*
  * Exit Reasons
  */
@@ -265,7 +285,6 @@ static inline unsigned long pi_get_pir(struct pi_desc 
*pi_desc, int group)
 #define MODRM_EAX_ECX   ".byte 0xc1\n" /* EAX, ECX */
 
 extern u64 vmx_ept_vpid_cap;
-extern uint8_t posted_intr_vector;
 
 #define cpu_has_vmx_ept_exec_only_supported\
 (vmx_ept_vpid_cap & VMX_EPT_EXEC_ONLY_SUPPORTED)
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC v1 13/15] Update Posted-Interrupts Descriptor during vCPU scheduling

2015-03-25 Thread Feng Wu
The basic idea here is:
1. When vCPU's state is RUNSTATE_running,
- set 'NV' to 'Notification Vector'.
- Clear 'SN' to accpet PI.
- set 'NDST' to the right pCPU.
2. When vCPU's state is RUNSTATE_blocked,
- set 'NV' to 'Wake-up Vector', so we can wake up the
  related vCPU when posted-interrupt happens for it.
- Clear 'SN' to accpet PI.
3. When vCPU's state is RUNSTATE_runnable/RUNSTATE_offline,
- Set 'SN' to suppress non-urgent interrupts.
  (Current, we only support non-urgent interrupts)
- Set 'NV' back to 'Notification Vector' if needed.

Signed-off-by: Feng Wu 
---
 xen/arch/x86/hvm/vmx/vmx.c | 108 +
 xen/common/schedule.c  |   3 ++
 2 files changed, 111 insertions(+)

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index b30392c..6323bd6 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1710,6 +1710,113 @@ static void vmx_handle_eoi(u8 vector)
 __vmwrite(GUEST_INTR_STATUS, status);
 }
 
+static void vmx_pi_desc_update(struct vcpu *v, int new_state)
+{
+struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc;
+struct pi_desc old, new;
+int old_state = v->runstate.state;
+unsigned long flags;
+
+if ( !iommu_intpost )
+return;
+
+switch ( new_state )
+{
+case RUNSTATE_runnable:
+case RUNSTATE_offline:
+/*
+ * We don't need to send notification event to a non-running
+ * vcpu, the interrupt information will be delivered to it before
+ * VM-ENTRY when the vcpu is scheduled to run next time.
+ */
+pi_set_sn(pi_desc);
+
+/*
+ * If the state is transferred from RUNSTATE_blocked,
+ * we should set 'NV' feild back to posted_intr_vector,
+ * so the Posted-Interrupts can be delivered to the vCPU
+ * by VT-d HW after it is scheduled to run.
+ */
+if ( old_state == RUNSTATE_blocked )
+{
+do
+{
+old.control = new.control = pi_desc->control;
+new.nv = posted_intr_vector;
+}
+while ( cmpxchg(&pi_desc->control, old.control, new.control)
+!= old.control );
+
+   /*
+* Delete the vCPU from the related wakeup queue
+* if we are resuming from blocked state
+*/
+   spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
+ v->processor), flags);
+   list_del(&v->blocked_vcpu_list);
+   spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
+  v->processor), flags);
+}
+break;
+
+case RUNSTATE_blocked:
+/*
+ * The vCPU is blocked on the wait queue.
+ * Store the blocked vCPU on the list of the
+ * vcpu->wakeup_cpu, which is the destination
+ * of the wake-up notification event.
+ */
+spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
+  v->processor), flags);
+list_add_tail(&v->blocked_vcpu_list,
+  &per_cpu(blocked_vcpu_on_cpu, v->processor));
+spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
+   v->processor), flags);
+
+do
+{
+old.control = new.control = pi_desc->control;
+
+/*
+ * We should not block the vCPU if
+ * an interrupt is posted for it.
+ */
+
+if ( pi_test_on(&old) == 1 )
+{
+tasklet_schedule(&v->vcpu_wakeup_tasklet);
+return;
+}
+
+pi_clear_sn(&new);
+new.nv = pi_wakeup_vector;
+}
+while ( cmpxchg(&pi_desc->control, old.control, new.control)
+!= old.control );
+break;
+
+case RUNSTATE_running:
+ASSERT( pi_test_sn(pi_desc) == 1 );
+
+do
+{
+old.control = new.control = pi_desc->control;
+if ( x2apic_enabled )
+new.ndst = cpu_physical_id(v->processor);
+else
+new.ndst = (cpu_physical_id(v->processor) << 8) & 0xFF00;
+
+pi_clear_sn(&new);
+}
+while ( cmpxchg(&pi_desc->control, old.control, new.control)
+!= old.control );
+break;
+
+default:
+break;
+}
+}
+
 void vmx_hypervisor_cpuid_leaf(uint32_t sub_idx,
uint32_t *eax, uint32_t *ebx,
uint32_t *ecx, uint32_t *edx)
@@ -1795,6 +1902,7 @@ static struct hvm_function

[Xen-devel] [RFC v1 15/15] Add a command line parameter for VT-d posted-interrupts

2015-03-25 Thread Feng Wu
Enable VT-d Posted-Interrupts and add a command line
parameter for it.

Signed-off-by: Feng Wu 
---
 xen/drivers/passthrough/iommu.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 302e3e4..1bda7e9 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -52,7 +52,7 @@ bool_t __read_mostly iommu_passthrough;
 bool_t __read_mostly iommu_snoop = 1;
 bool_t __read_mostly iommu_qinval = 1;
 bool_t __read_mostly iommu_intremap = 1;
-bool_t __read_mostly iommu_intpost = 0;
+bool_t __read_mostly iommu_intpost = 1;
 bool_t __read_mostly iommu_hap_pt_share = 1;
 bool_t __read_mostly iommu_debug;
 bool_t __read_mostly amd_iommu_perdev_intremap = 1;
@@ -101,6 +101,12 @@ static void __init parse_iommu_param(char *s)
 if ( iommu_intremap == 0 )
 iommu_intpost = 0;
 }
+else if ( !strcmp(s, "intpost") )
+{
+iommu_intpost = val;
+if ( iommu_intremap == 0 )
+iommu_intpost = 0;
+}
 else if ( !strcmp(s, "debug") )
 {
 iommu_debug = val;
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC v1 03/15] vmx: Extend struct pi_desc to support VT-d Posted-Interrupts

2015-03-25 Thread Feng Wu
Extend struct pi_desc according to VT-d Posted-Interrupts Spec.

Signed-off-by: Feng Wu 
---
 xen/include/asm-x86/hvm/vmx/vmcs.h | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h 
b/xen/include/asm-x86/hvm/vmx/vmcs.h
index 6fce6aa..9631461 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -76,8 +76,20 @@ struct vmx_domain {
 
 struct pi_desc {
 DECLARE_BITMAP(pir, NR_VECTORS);
-u32 control;
-u32 rsvd[7];
+union {
+struct
+{
+u64 on : 1,
+sn : 1,
+rsvd_1 : 13,
+ndm: 1,
+nv : 8,
+rsvd_2 : 8,
+ndst   : 32;
+};
+u64 control;
+};
+u32 rsvd[6];
 } __attribute__ ((aligned (64)));
 
 #define ept_get_wl(ept)   ((ept)->ept_wl)
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC v1 02/15] vt-d: VT-d Posted-Interrupts feature detection

2015-03-25 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

This patch adds feature detection logic for VT-d posted-interrupt.

Signed-off-by: Feng Wu 
---
 xen/drivers/passthrough/vtd/iommu.c | 15 +--
 xen/drivers/passthrough/vtd/iommu.h |  1 +
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/xen/drivers/passthrough/vtd/iommu.c 
b/xen/drivers/passthrough/vtd/iommu.c
index 891b9e3..86798a3 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -2030,6 +2030,7 @@ static int init_vtd_hw(void)
 if ( ioapic_to_iommu(IO_APIC_ID(apic)) == NULL )
 {
 iommu_intremap = 0;
+iommu_intpost = 0;
 dprintk(XENLOG_ERR VTDPREFIX,
 "ioapic_to_iommu: ioapic %#x (id: %#x) is NULL! "
 "Will not try to enable Interrupt Remapping.\n",
@@ -2046,6 +2047,7 @@ static int init_vtd_hw(void)
 if ( enable_intremap(iommu, 0) != 0 )
 {
 iommu_intremap = 0;
+iommu_intpost = 0;
 dprintk(XENLOG_WARNING VTDPREFIX,
 "Interrupt Remapping not enabled\n");
 
@@ -2119,8 +2121,8 @@ int __init intel_vtd_setup(void)
 }
 
 /* We enable the following features only if they are supported by all VT-d
- * engines: Snoop Control, DMA passthrough, Queued Invalidation and
- * Interrupt Remapping.
+ * engines: Snoop Control, DMA passthrough, Queued Invalidation, Interrupt
+ * Remapping, and Posted Interrupt
  */
 for_each_drhd_unit ( drhd )
 {
@@ -2146,7 +2148,13 @@ int __init intel_vtd_setup(void)
 iommu_qinval = 0;
 
 if ( iommu_intremap && !ecap_intr_remap(iommu->ecap) )
+{
 iommu_intremap = 0;
+iommu_intpost = 0;
+}
+
+if ( iommu_intpost && !cap_intr_post(iommu->cap))
+iommu_intpost = 0;
 
 if ( !vtd_ept_page_compatible(iommu) )
 iommu_hap_pt_share = 0;
@@ -2164,6 +2172,7 @@ int __init intel_vtd_setup(void)
 if ( !iommu_qinval && iommu_intremap )
 {
 iommu_intremap = 0;
+iommu_intpost = 0;
 dprintk(XENLOG_WARNING VTDPREFIX, "Interrupt Remapping disabled "
 "since Queued Invalidation isn't supported or enabled.\n");
 }
@@ -2173,6 +2182,7 @@ int __init intel_vtd_setup(void)
 P(iommu_passthrough, "Dom0 DMA Passthrough");
 P(iommu_qinval, "Queued Invalidation");
 P(iommu_intremap, "Interrupt Remapping");
+P(iommu_intpost, "Posted Interrupt");
 P(iommu_hap_pt_share, "Shared EPT tables");
 #undef P
 
@@ -2192,6 +2202,7 @@ int __init intel_vtd_setup(void)
 iommu_passthrough = 0;
 iommu_qinval = 0;
 iommu_intremap = 0;
+iommu_intpost = 0;
 return ret;
 }
 
diff --git a/xen/drivers/passthrough/vtd/iommu.h 
b/xen/drivers/passthrough/vtd/iommu.h
index d6e6520..42047e0 100644
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -69,6 +69,7 @@
 /*
  * Decoding Capability Register
  */
+#define cap_intr_post(c)   (((c) >> 59) & 1)
 #define cap_read_drain(c)  (((c) >> 55) & 1)
 #define cap_write_drain(c) (((c) >> 54) & 1)
 #define cap_max_amask_val(c)   (((c) >> 48) & 0x3f)
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC v1 12/15] vmx: Properly handle notification event when vCPU is running

2015-03-25 Thread Feng Wu
When a vCPU is running in Root mode and a notification event
has been injected to it. we need to set VCPU_KICK_SOFTIRQ for
the current cpu, so the pending interrupt in PIRR will be
synced to vIRR before VM-Exit in time.

Signed-off-by: Feng Wu 
---
 xen/arch/x86/hvm/vmx/vmx.c| 24 +++-
 xen/include/asm-x86/hvm/vmx/vmx.h |  1 +
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index b2b4c26..b30392c 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1838,7 +1838,7 @@ const struct hvm_function_table * __init start_vmx(void)
 
 if ( cpu_has_vmx_posted_intr_processing )
 {
-alloc_direct_apic_vector(&posted_intr_vector, event_check_interrupt);
+alloc_direct_apic_vector(&posted_intr_vector, 
pi_notification_interrupt);
 
 if ( iommu_intpost )
 alloc_direct_apic_vector(&pi_wakeup_vector, pi_wakeup_interrupt);
@@ -3288,6 +3288,28 @@ void pi_wakeup_interrupt(struct cpu_user_regs *regs)
 }
 
 /*
+ * Handle VT-d posted-interrupt when VCPU is running.
+ */
+
+void pi_notification_interrupt(struct cpu_user_regs *regs)
+{
+/*
+ * We get here because a vCPU is running in Root mode
+ * and a notification event has been injected to it.
+ *
+ * we need to set VCPU_KICK_SOFTIRQ for the current
+ * cpu, just like __vmx_deliver_posted_interrupt().
+ *
+ * So the pending interrupt in PIRR will be synced to
+ * vIRR before VM-Exit in time.
+ */
+set_bit(VCPU_KICK_SOFTIRQ, &softirq_pending(smp_processor_id()));
+
+ack_APIC_irq();
+this_cpu(irq_count)++;
+}
+
+/*
  * Local variables:
  * mode: C
  * c-file-style: "BSD"
diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h 
b/xen/include/asm-x86/hvm/vmx/vmx.h
index f4296ab..e53275b 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -576,6 +576,7 @@ void free_p2m_hap_data(struct p2m_domain *p2m);
 void p2m_init_hap_data(struct p2m_domain *p2m);
 
 void pi_wakeup_interrupt(struct cpu_user_regs *regs);
+void pi_notification_interrupt(struct cpu_user_regs *regs);
 
 /* EPT violation qualifications definitions */
 #define _EPT_READ_VIOLATION 0
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC v1 04/15] vmx: Add some helper functions for Posted-Interrupts

2015-03-25 Thread Feng Wu
This patch adds some helper functions to manipulate the
Posted-Interrupts Descriptor.

Signed-off-by: Feng Wu 
---
 xen/include/asm-x86/hvm/vmx/vmx.h | 21 +
 1 file changed, 21 insertions(+)

diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h 
b/xen/include/asm-x86/hvm/vmx/vmx.h
index 91c5e18..ecc5e17 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -100,6 +100,7 @@ void vmx_update_cpu_exec_control(struct vcpu *v);
 void vmx_update_secondary_exec_control(struct vcpu *v);
 
 #define POSTED_INTR_ON  0
+#define POSTED_INTR_SN  1
 static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
 {
 return test_and_set_bit(vector, pi_desc->pir);
@@ -120,6 +121,26 @@ static inline int pi_test_and_clear_on(struct pi_desc 
*pi_desc)
 return test_and_clear_bit(POSTED_INTR_ON, &pi_desc->control);
 }
 
+static inline int pi_test_on(struct pi_desc *pi_desc)
+{
+return test_bit(POSTED_INTR_ON, &pi_desc->control);
+}
+
+static inline void pi_set_sn(struct pi_desc *pi_desc)
+{
+set_bit(POSTED_INTR_SN, &pi_desc->control);
+}
+
+static inline int pi_test_sn(struct pi_desc *pi_desc)
+{
+return test_bit(POSTED_INTR_SN, &pi_desc->control);
+}
+
+static inline void pi_clear_sn(struct pi_desc *pi_desc)
+{
+clear_bit(POSTED_INTR_SN, &pi_desc->control);
+}
+
 static inline unsigned long pi_get_pir(struct pi_desc *pi_desc, int group)
 {
 return xchg(&pi_desc->pir[group], 0);
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC v1 09/15] Add a new per-vCPU tasklet to wakeup the blocked vCPU

2015-03-25 Thread Feng Wu
This patch adds a new per-vCPU tasklet to wakeup the blocked
vCPU. It can be used in the case vcpu_unblock cannot be called
directly.

Signed-off-by: Feng Wu 
---
 xen/common/domain.c | 11 +++
 xen/include/xen/sched.h |  3 +++
 2 files changed, 14 insertions(+)

diff --git a/xen/common/domain.c b/xen/common/domain.c
index aa78fd7..fe89658 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -109,6 +109,13 @@ static void vcpu_check_shutdown(struct vcpu *v)
 spin_unlock(&d->shutdown_lock);
 }
 
+static void vcpu_wakeup_tasklet_handler(unsigned long arg)
+{
+struct vcpu *v = (void *)arg;
+
+vcpu_unblock(v);
+}
+
 struct vcpu *alloc_vcpu(
 struct domain *d, unsigned int vcpu_id, unsigned int cpu_id)
 {
@@ -126,6 +133,9 @@ struct vcpu *alloc_vcpu(
 
 tasklet_init(&v->continue_hypercall_tasklet, NULL, 0);
 
+tasklet_init(&v->vcpu_wakeup_tasklet, vcpu_wakeup_tasklet_handler,
+ (unsigned long)v);
+
 if ( !zalloc_cpumask_var(&v->cpu_hard_affinity) ||
  !zalloc_cpumask_var(&v->cpu_hard_affinity_tmp) ||
  !zalloc_cpumask_var(&v->cpu_hard_affinity_saved) ||
@@ -784,6 +794,7 @@ static void complete_domain_destroy(struct rcu_head *head)
 if ( (v = d->vcpu[i]) == NULL )
 continue;
 tasklet_kill(&v->continue_hypercall_tasklet);
+tasklet_kill(&v->vcpu_wakeup_tasklet);
 vcpu_destroy(v);
 sched_destroy_vcpu(v);
 destroy_waitqueue_vcpu(v);
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index ccd7ed8..c874dd4 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -239,6 +239,9 @@ struct vcpu
 /* Tasklet for continue_hypercall_on_cpu(). */
 struct tasklet   continue_hypercall_tasklet;
 
+/* Tasklet for wakeup_blocked_vcpu(). */
+struct tasklet   vcpu_wakeup_tasklet;
+
 /* Multicall information. */
 struct mc_state  mc_state;
 
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC v1 11/15] vmx: Add a global wake-up vector for VT-d Posted-Interrupts

2015-03-25 Thread Feng Wu
This patch adds a global vector which is used to wake up
the blocked vCPU when an interrupt is being posted to it.

Signed-off-by: Feng Wu 
Suggested-by: Yang Zhang 
---
 xen/arch/x86/hvm/vmx/vmx.c| 33 +
 xen/include/asm-x86/hvm/hvm.h |  1 +
 xen/include/asm-x86/hvm/vmx/vmx.h |  3 +++
 xen/include/xen/sched.h   |  2 ++
 4 files changed, 39 insertions(+)

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index ff5544d..b2b4c26 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -89,6 +89,7 @@ DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
 DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
 
 uint8_t __read_mostly posted_intr_vector;
+uint8_t __read_mostly pi_wakeup_vector;
 
 static int vmx_domain_initialise(struct domain *d)
 {
@@ -131,6 +132,8 @@ static int vmx_vcpu_initialise(struct vcpu *v)
 if ( v->vcpu_id == 0 )
 v->arch.user_regs.eax = 1;
 
+INIT_LIST_HEAD(&v->blocked_vcpu_list);
+
 return 0;
 }
 
@@ -1834,11 +1837,19 @@ const struct hvm_function_table * __init start_vmx(void)
 }
 
 if ( cpu_has_vmx_posted_intr_processing )
+{
 alloc_direct_apic_vector(&posted_intr_vector, event_check_interrupt);
+
+if ( iommu_intpost )
+alloc_direct_apic_vector(&pi_wakeup_vector, pi_wakeup_interrupt);
+else
+vmx_function_table.pi_desc_update = NULL;
+}
 else
 {
 vmx_function_table.deliver_posted_intr = NULL;
 vmx_function_table.sync_pir_to_irr = NULL;
+vmx_function_table.pi_desc_update = NULL;
 }
 
 if ( cpu_has_vmx_ept
@@ -3255,6 +3266,28 @@ void vmx_vmenter_helper(const struct cpu_user_regs *regs)
 }
 
 /*
+ * Handle VT-d posted-interrupt when VCPU is blocked.
+ */
+void pi_wakeup_interrupt(struct cpu_user_regs *regs)
+{
+struct vcpu *v;
+int cpu = smp_processor_id();
+
+spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+list_for_each_entry(v, &per_cpu(blocked_vcpu_on_cpu, cpu),
+blocked_vcpu_list) {
+struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc;
+
+if ( pi_test_on(pi_desc) == 1 )
+tasklet_schedule(&v->vcpu_wakeup_tasklet);
+}
+spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+
+ack_APIC_irq();
+this_cpu(irq_count)++;
+}
+
+/*
  * Local variables:
  * mode: C
  * c-file-style: "BSD"
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 0dc909b..a11a256 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -195,6 +195,7 @@ struct hvm_function_table {
 void (*deliver_posted_intr)(struct vcpu *v, u8 vector);
 void (*sync_pir_to_irr)(struct vcpu *v);
 void (*handle_eoi)(u8 vector);
+void (*pi_desc_update)(struct vcpu *v, int new_state);
 
 /*Walk nested p2m  */
 int (*nhvm_hap_walk_L1_p2m)(struct vcpu *v, paddr_t L2_gpa,
diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h 
b/xen/include/asm-x86/hvm/vmx/vmx.h
index e643c3c..f4296ab 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -34,6 +34,7 @@ DECLARE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
 DECLARE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
 
 extern uint8_t posted_intr_vector;
+extern uint8_t pi_wakeup_vector;
 
 typedef union {
 struct {
@@ -574,6 +575,8 @@ int alloc_p2m_hap_data(struct p2m_domain *p2m);
 void free_p2m_hap_data(struct p2m_domain *p2m);
 void p2m_init_hap_data(struct p2m_domain *p2m);
 
+void pi_wakeup_interrupt(struct cpu_user_regs *regs);
+
 /* EPT violation qualifications definitions */
 #define _EPT_READ_VIOLATION 0
 #define EPT_READ_VIOLATION  (1UL<<_EPT_READ_VIOLATION)
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index c874dd4..91f0912 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -148,6 +148,8 @@ struct vcpu
 
 struct vcpu *next_in_list;
 
+struct list_head blocked_vcpu_list;
+
 s_time_t periodic_period;
 s_time_t periodic_last_event;
 struct timer periodic_timer;
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC v1 06/15] vt-d: Extend struct iremap_entry to support VT-d Posted-Interrupts

2015-03-25 Thread Feng Wu
Extend struct iremap_entry according to VT-d Posted-Interrupts Spec.

Signed-off-by: Feng Wu 
---
 xen/drivers/passthrough/vtd/iommu.h | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/xen/drivers/passthrough/vtd/iommu.h 
b/xen/drivers/passthrough/vtd/iommu.h
index 42047e0..cd61e12 100644
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -303,6 +303,18 @@ struct iremap_entry {
 res_2   : 8,
 dst : 32;
 }lo;
+struct {
+u64 p   : 1,
+fpd : 1,
+res_1   : 6,
+avail   : 4,
+res_2   : 2,
+urg : 1,
+im  : 1,
+vector  : 8,
+res_3   : 14,
+pda_l   : 26;
+}lo_intpost;
   };
   union {
 u64 hi_val;
@@ -312,6 +324,13 @@ struct iremap_entry {
 svt : 2,
 res_1   : 44;
 }hi;
+struct {
+u64 sid : 16,
+sq  : 2,
+svt : 2,
+res_1   : 12,
+pda_h   : 32;
+}hi_intpost;
   };
 };
 
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC v1 14/15] Suppress posting interrupts when 'SN' is set

2015-03-25 Thread Feng Wu
Currently, we don't support urgent interrupt, all interrupts
are recognized as non-urgent interrupt, so we cannot send
posted-interrupt when 'SN' is set.

Signed-off-by: Feng Wu 
---
 xen/arch/x86/hvm/vmx/vmx.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 6323bd6..40c7b0e 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1663,9 +1663,20 @@ static void __vmx_deliver_posted_interrupt(struct vcpu 
*v)
 
 static void vmx_deliver_posted_intr(struct vcpu *v, u8 vector)
 {
+int r, sn;
+
 if ( pi_test_and_set_pir(vector, &v->arch.hvm_vmx.pi_desc) )
 return;
 
+/*
+ * Currently, we don't support urgent interrupt, all interrupts
+ * are recognized as non-urgent interrupt, so we cannot send
+ * posted-interrupt when 'SN' is set.
+ */
+
+sn = pi_test_sn(&v->arch.hvm_vmx.pi_desc);
+r = pi_test_and_set_on(&v->arch.hvm_vmx.pi_desc);
+
 if ( unlikely(v->arch.hvm_vmx.eoi_exitmap_changed) )
 {
 /*
@@ -1675,7 +1686,7 @@ static void vmx_deliver_posted_intr(struct vcpu *v, u8 
vector)
  */
 pi_set_on(&v->arch.hvm_vmx.pi_desc);
 }
-else if ( !pi_test_and_set_on(&v->arch.hvm_vmx.pi_desc) )
+else if ( !r && !sn )
 {
 __vmx_deliver_posted_interrupt(v);
 return;
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC v1 01/15] iommu: Add iommu_intpost to control VT-d Posted-Interrupts feature

2015-03-25 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

This patch adds variable 'iommu_intpost' to control whether enable VT-d
posted-interrupt or not in the generic IOMMU code.

Signed-off-by: Feng Wu 
---
 xen/drivers/passthrough/iommu.c | 11 ++-
 xen/include/xen/iommu.h |  2 +-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 92ea26f..302e3e4 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -39,6 +39,7 @@ static void iommu_dump_p2m_table(unsigned char key);
  *   no-snoop   Disable VT-d Snoop Control
  *   no-qinval  Disable VT-d Queued Invalidation
  *   no-intremapDisable VT-d Interrupt Remapping
+ *   no-intpost Disable VT-d Interrupt posting
  */
 custom_param("iommu", parse_iommu_param);
 bool_t __initdata iommu_enable = 1;
@@ -51,6 +52,7 @@ bool_t __read_mostly iommu_passthrough;
 bool_t __read_mostly iommu_snoop = 1;
 bool_t __read_mostly iommu_qinval = 1;
 bool_t __read_mostly iommu_intremap = 1;
+bool_t __read_mostly iommu_intpost = 0;
 bool_t __read_mostly iommu_hap_pt_share = 1;
 bool_t __read_mostly iommu_debug;
 bool_t __read_mostly amd_iommu_perdev_intremap = 1;
@@ -94,7 +96,11 @@ static void __init parse_iommu_param(char *s)
 else if ( !strcmp(s, "qinval") )
 iommu_qinval = val;
 else if ( !strcmp(s, "intremap") )
+{
 iommu_intremap = val;
+if ( iommu_intremap == 0 )
+iommu_intpost = 0;
+}
 else if ( !strcmp(s, "debug") )
 {
 iommu_debug = val;
@@ -272,7 +278,10 @@ int __init iommu_setup(void)
 iommu_enabled = (rc == 0);
 }
 if ( !iommu_enabled )
+{
 iommu_intremap = 0;
+iommu_intpost = 0;
+}
 
 if ( (force_iommu && !iommu_enabled) ||
  (force_intremap && !iommu_intremap) )
@@ -341,7 +350,7 @@ void iommu_crash_shutdown(void)
 const struct iommu_ops *ops = iommu_get_ops();
 if ( iommu_enabled )
 ops->crash_shutdown();
-iommu_enabled = iommu_intremap = 0;
+iommu_enabled = iommu_intremap = iommu_intpost = 0;
 }
 
 bool_t iommu_has_feature(struct domain *d, enum iommu_feature feature)
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index bf4aff0..91063bb 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -31,7 +31,7 @@
 extern bool_t iommu_enable, iommu_enabled;
 extern bool_t force_iommu, iommu_verbose;
 extern bool_t iommu_workaround_bios_bug, iommu_passthrough;
-extern bool_t iommu_snoop, iommu_qinval, iommu_intremap;
+extern bool_t iommu_snoop, iommu_qinval, iommu_intremap, iommu_intpost;
 extern bool_t iommu_hap_pt_share;
 extern bool_t iommu_debug;
 extern bool_t amd_iommu_perdev_intremap;
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC v1 00/15] Add VT-d Posted-Interrupts support

2015-03-25 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

You can find the VT-d Posted-Interrtups Spec. in the following URL:
http://www.intel.com/content/www/us/en/intelligent-systems/intel-technology/vt-directed-io-spec.html

This patch set follow the following design:
http://article.gmane.org/gmane.comp.emulators.xen.devel/236476

Feng Wu (15):
  iommu: Add iommu_intpost to control VT-d Posted-Interrupts feature
  vt-d: VT-d Posted-Interrupts feature detection
  vmx: Extend struct pi_desc to support VT-d Posted-Interrupts
  vmx: Add some helper functions for Posted-Interrupts
  vmx: Initialize VT-d Posted-Interrupts Descriptor
  vt-d: Extend struct iremap_entry to support VT-d Posted-Interrupts
  vt-d: Add API to update IRTE when VT-d PI is used
  Update IRTE according to guest interrupt config changes
  Add a new per-vCPU tasklet to wakeup the blocked vCPU
  vmx: Define two per-cpu variants
  vmx: Add a global wake-up vector for VT-d Posted-Interrupts
  vmx: Properly handle notification event when vCPU is running
  Update Posted-Interrupts Descriptor during vCPU scheduling
  Suppress posting interrupts when 'SN' is set
  Add a command line parameter for VT-d posted-interrupts

 xen/arch/x86/hvm/vmx/vmcs.c|   6 ++
 xen/arch/x86/hvm/vmx/vmx.c | 185 -
 xen/common/domain.c|  11 ++
 xen/common/schedule.c  |   3 +
 xen/drivers/passthrough/io.c   |  77 +-
 xen/drivers/passthrough/iommu.c|  17 ++-
 xen/drivers/passthrough/vtd/intremap.c |  83 +++
 xen/drivers/passthrough/vtd/iommu.c|  15 ++-
 xen/drivers/passthrough/vtd/iommu.h|  23 
 xen/include/asm-x86/hvm/hvm.h  |   1 +
 xen/include/asm-x86/hvm/vmx/vmcs.h |  16 ++-
 xen/include/asm-x86/hvm/vmx/vmx.h  |  49 -
 xen/include/asm-x86/iommu.h|   2 +
 xen/include/xen/iommu.h|   2 +-
 xen/include/xen/sched.h|   5 +
 15 files changed, 485 insertions(+), 10 deletions(-)

-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [RFC v1 07/15] vt-d: Add API to update IRTE when VT-d PI is used

2015-03-25 Thread Feng Wu
This patch adds an API which is used to update the IRTE
for posted-interrupt when guest changes MSI/MSI-X information.

Signed-off-by: Feng Wu 
---
 xen/drivers/passthrough/vtd/intremap.c | 83 ++
 xen/drivers/passthrough/vtd/iommu.h|  3 ++
 xen/include/asm-x86/iommu.h|  2 +
 3 files changed, 88 insertions(+)

diff --git a/xen/drivers/passthrough/vtd/intremap.c 
b/xen/drivers/passthrough/vtd/intremap.c
index 0333686..f44e74d 100644
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -898,3 +898,86 @@ void iommu_disable_x2apic_IR(void)
 for_each_drhd_unit ( drhd )
 disable_qinval(drhd->iommu);
 }
+
+/*
+ * This function is used to update the IRTE for posted-interrupt
+ * when guest changes MSI/MSI-X information
+ */
+int pi_update_irte(struct vcpu *v, struct pirq *pirq, uint32_t gvec )
+{
+struct irq_desc *desc;
+struct msi_desc *msi_desc;
+int remap_index, rc = -1;
+struct pci_dev *pci_dev;
+struct acpi_drhd_unit *drhd;
+struct iommu *iommu;
+struct ir_ctrl *ir_ctrl;
+struct iremap_entry *iremap_entries = NULL, *p = NULL;
+struct iremap_entry new_ire;
+struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc;
+unsigned long flags;
+
+desc = pirq_spin_lock_irq_desc(pirq, NULL);
+if ( !desc )
+return -1;
+
+msi_desc = desc->msi_desc;
+if ( !msi_desc )
+goto unlock_out;
+
+remap_index = msi_desc->remap_index;
+pci_dev = msi_desc->dev;
+if ( !pci_dev )
+goto unlock_out;
+
+drhd = acpi_find_matched_drhd_unit(pci_dev);
+if (!drhd)
+{
+dprintk(XENLOG_INFO VTDPREFIX, "failed to get drhd!\n");
+goto unlock_out;
+}
+
+iommu = drhd->iommu;
+ir_ctrl = iommu_ir_ctrl(iommu);
+if ( !ir_ctrl )
+{
+dprintk(XENLOG_INFO VTDPREFIX, "failed to get ir_ctrl!\n");
+goto unlock_out;
+}
+
+spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
+
+GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, remap_index, iremap_entries, p);
+
+memcpy(&new_ire, p, sizeof(struct iremap_entry));
+
+/* Setup/Update interrupt remapping table entry */
+new_ire.lo_intpost.urg = 0;
+new_ire.lo_intpost.vector = gvec;
+new_ire.lo_intpost.pda_l = (((u64)virt_to_maddr(pi_desc)) >>
+(32 - PDA_LOW_BIT)) & ~(-1UL << PDA_LOW_BIT);
+new_ire.hi_intpost.pda_h = (((u64)virt_to_maddr(pi_desc)) >>  32) &
+~(-1UL << PDA_HIGH_BIT);
+
+new_ire.lo_intpost.res_1 = 0;
+new_ire.lo_intpost.res_2 = 0;
+new_ire.lo_intpost.res_3 = 0;
+new_ire.hi_intpost.res_1 = 0;
+
+new_ire.lo_intpost.im = 1;
+
+memcpy(p, &new_ire, sizeof(struct iremap_entry));
+iommu_flush_cache_entry(p, sizeof(struct iremap_entry));
+iommu_flush_iec_index(iommu, 0, remap_index);
+
+if ( iremap_entries )
+unmap_vtd_domain_page(iremap_entries);
+
+spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
+
+rc = 0;
+ unlock_out:
+spin_unlock_irq(&desc->lock);
+
+return rc;
+}
diff --git a/xen/drivers/passthrough/vtd/iommu.h 
b/xen/drivers/passthrough/vtd/iommu.h
index cd61e12..ffa72c8 100644
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -334,6 +334,9 @@ struct iremap_entry {
   };
 };
 
+#define PDA_LOW_BIT26
+#define PDA_HIGH_BIT   32
+
 /* Max intr remapping table page order is 8, as max number of IRTEs is 64K */
 #define IREMAP_PAGE_ORDER  8
 
diff --git a/xen/include/asm-x86/iommu.h b/xen/include/asm-x86/iommu.h
index e7a65da..d233621 100644
--- a/xen/include/asm-x86/iommu.h
+++ b/xen/include/asm-x86/iommu.h
@@ -32,6 +32,8 @@ int iommu_supports_eim(void);
 int iommu_enable_x2apic_IR(void);
 void iommu_disable_x2apic_IR(void);
 
+int pi_update_irte(struct vcpu *v, struct pirq *pirq, uint32_t gvec);
+
 #endif /* !__ARCH_X86_IOMMU_H__ */
 /*
  * Local variables:
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v12 1/2] vmx: VT-d posted-interrupt core logic handling

2016-02-18 Thread Feng Wu
This is the core logic handling for VT-d posted-interrupts. Basically it
deals with how and when to update posted-interrupts during the following
scenarios:
- vCPU is preempted
- vCPU is slept
- vCPU is blocked

When vCPU is preempted/slept, we update the posted-interrupts during
scheduling by introducing two new architecutral scheduler hooks:
vmx_pi_switch_from() and vmx_pi_switch_to(). When vCPU is blocked, we
introduce a new architectural hook: arch_vcpu_block() to update
posted-interrupts descriptor.

Besides that, before VM-entry, we will make sure the 'NV' filed is set
to 'posted_intr_vector' and the vCPU is not in any blocking lists, which
is needed when vCPU is running in non-root mode. The reason we do this check
is because we change the posted-interrupts descriptor in vcpu_block(),
however, we don't change it back in vcpu_unblock() or when vcpu_block()
directly returns due to event delivery (in fact, we don't need to do it
in the two places, that is why we do it before VM-Entry).

When we handle the lazy context switch for the following two scenarios:
- Preempted by a tasklet, which uses in an idle context.
- the prev vcpu is in offline and no new available vcpus in run queue.
We don't change the 'SN' bit in posted-interrupt descriptor, this
may incur spurious PI notification events, but since PI notification
event is only sent when 'ON' is clear, and once the PI notificatoin
is sent, ON is set by hardware, hence no more notification events
before 'ON' is clear. Besides that, spurious PI notification events are
going to happen from time to time in Xen hypervisor, such as, when
guests trap to Xen and PI notification event happens, there is
nothing Xen actually needs to do about it, the interrupts will be
delivered to guest atht the next time we do a VMENTRY.

CC: Keir Fraser 
CC: Jan Beulich 
CC: Andrew Cooper 
CC: Kevin Tian 
CC: George Dunlap 
CC: Dario Faggioli 
Suggested-by: Yang Zhang 
Suggested-by: Dario Faggioli 
Suggested-by: George Dunlap 
Suggested-by: Jan Beulich 
Signed-off-by: Feng Wu 
---
v12:
- Move the ASSERT to the locked region in vmx_vcpu_block()
- Add barrier() before using the local variable in vmx_pi_do_resume()
- Split vmx_pi_hooks_reassign() to two functions:
  * vmx_pi_hooks_assign()
  * vmx_pi_hooks_deassign()
- Add more comments about how PI works during vCPU state transition
- coding style

v11:
- Add ASSERT() in vmx_vcpu_block()
- Add some comments in vmx_pi_switch_from()
- Remove some comments which should have been removed when the
  related code was removed during v9 -> v10
- Rename 'vmx_pi_state_to_normal' to 'vmx_pi_do_resume'
- Coding style
- Make arch_vcpu_block() a macro
- Make 'pi_wakeup_vector' static
- Move hook 'vcpu_block' to 'struct hvm_vcpu'
- Initial hook 'vcpu_block' when assigning the first pci device
  and zap it on removal of the last device
- Save pointer to the block list lock instead of the processor
  id in 'struct arch_vmx_struct'
- Implement the following functions as hooks, so we
  can elimilate lots of checkings and spinlocks in scheduling
  related code path, which is good for performance.
vmx_pi_switch_from
vmx_pi_switch_to
vmx_pi_do_resume

v10:
- Check iommu_intpost first
- Remove pointless checking of has_hvm_container_vcpu(v)
- Rename 'vmx_pi_state_change' to 'vmx_pi_state_to_normal'
- Since vcpu_unblock() doesn't acquire 'pi_blocked_vcpu_lock', we
  don't need use another list to save the vCPUs with 'ON' set, just
  directly call vcpu_unblock(v).

v9:
- Remove arch_vcpu_block_cancel() and arch_vcpu_wake_prepare()
- Add vmx_pi_state_change() and call it before VM Entry

v8:
- Remove the lazy context switch handling for PI state transition
- Change PI state in vcpu_block() and do_poll() when the vCPU
  is going to be blocked

v7:
- Merge [PATCH v6 16/18] vmx: Add some scheduler hooks for VT-d posted 
interrupts
  and "[PATCH v6 14/18] vmx: posted-interrupt handling when vCPU is blocked"
  into this patch, so it is self-contained and more convenient
  for code review.
- Make 'pi_blocked_vcpu' and 'pi_blocked_vcpu_lock' static
- Coding style
- Use per_cpu() instead of this_cpu() in pi_wakeup_interrupt()
- Move ack_APIC_irq() to the beginning of pi_wakeup_interrupt()
- Rename 'pi_ctxt_switch_from' to 'ctxt_switch_prepare'
- Rename 'pi_ctxt_switch_to' to 'ctxt_switch_cancel'
- Use 'has_hvm_container_vcpu' instead of 'is_hvm_vcpu'
- Use 'spin_lock' and 'spin_unlock' when the interrupt has been
  already disabled.
- Rename arch_vcpu_wake_prepare to vmx_vcpu_wake_prepare
- Define vmx_vcpu_wake_prepare in xen/arch/x86/hvm/hvm.c
- Call .pi_ctxt_switch_to() __context_switch() instead of directly
  calling vmx_post_ctx_

[Xen-devel] [PATCH v12 0/2] Add VT-d Posted-Interrupts support

2016-02-18 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

You can find the VT-d Posted-Interrtups Spec. in the following URL:
http://www.intel.com/content/www/us/en/intelligent-systems/intel-technology/vt-directed-io-spec.html

Feng Wu (2):
  vmx: VT-d posted-interrupt core logic handling
  Add a command line parameter for VT-d posted-interrupts

 docs/misc/xen-command-line.markdown |   9 +-
 xen/arch/x86/hvm/vmx/vmcs.c |   2 +
 xen/arch/x86/hvm/vmx/vmx.c  | 185 
 xen/common/schedule.c   |   4 +
 xen/drivers/passthrough/iommu.c |   3 +
 xen/drivers/passthrough/vtd/iommu.c |   7 ++
 xen/include/asm-arm/domain.h|   2 +
 xen/include/asm-x86/hvm/hvm.h   |   5 +
 xen/include/asm-x86/hvm/vmx/vmcs.h  |  67 +
 xen/include/asm-x86/hvm/vmx/vmx.h   |   5 +
 10 files changed, 288 insertions(+), 1 deletion(-)

-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v12 2/2] Add a command line parameter for VT-d posted-interrupts

2016-02-18 Thread Feng Wu
Enable VT-d Posted-Interrupts and add a command line
parameter for it.

CC: Jan Beulich 
Signed-off-by: Feng Wu 
Reviewed-by: Kevin Tian 
Acked-by: Jan Beulich 
---
 docs/misc/xen-command-line.markdown | 9 -
 xen/drivers/passthrough/iommu.c | 3 +++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/docs/misc/xen-command-line.markdown 
b/docs/misc/xen-command-line.markdown
index 467dc8f..ea1d60d 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -868,7 +868,7 @@ debug hypervisor only).
 > Default: `new` unless directed-EOI is supported
 
 ### iommu
-> `= List of [  | force | required | intremap | qinval | snoop | 
sharept | dom0-passthrough | dom0-strict | amd-iommu-perdev-intremap | 
workaround_bios_bug | igfx | verbose | debug ]`
+> `= List of [  | force | required | intremap | intpost | qinval | 
snoop | sharept | dom0-passthrough | dom0-strict | amd-iommu-perdev-intremap | 
workaround_bios_bug | igfx | verbose | debug ]`
 
 > Sub-options:
 
@@ -895,6 +895,13 @@ debug hypervisor only).
 >> Control the use of interrupt remapping (DMA remapping will always be enabled
 >> if IOMMU functionality is enabled).
 
+> `intpost`
+
+> Default: `false`
+
+>> Control the use of interrupt posting, which depends on the availability of
+>> interrupt remapping.
+
 > `qinval` (VT-d)
 
 > Default: `true`
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 0b2abf4..50d74a5 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -32,6 +32,7 @@ static void iommu_dump_p2m_table(unsigned char key);
  *   off|no|false|disable   Disable IOMMU (default)
  *   force|required Don't boot unless IOMMU is enabled
  *   no-intremapDisable interrupt remapping
+ *   no-intpost Disable VT-d Interrupt posting
  *   verboseBe more verbose
  *   debug  Enable debugging messages and checks
  *   workaround_bios_bugWorkaround some bios issue to still enable
@@ -105,6 +106,8 @@ static void __init parse_iommu_param(char *s)
 iommu_qinval = val;
 else if ( !strcmp(s, "intremap") )
 iommu_intremap = val;
+else if ( !strcmp(s, "intpost") )
+iommu_intpost = val;
 else if ( !strcmp(s, "debug") )
 {
 iommu_debug = val;
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v13 2/2] Add a command line parameter for VT-d posted-interrupts

2016-02-23 Thread Feng Wu
Enable VT-d Posted-Interrupts and add a command line
parameter for it.

CC: Jan Beulich 
Signed-off-by: Feng Wu 
Reviewed-by: Kevin Tian 
Acked-by: Jan Beulich 
---
 docs/misc/xen-command-line.markdown | 9 -
 xen/drivers/passthrough/iommu.c | 3 +++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/docs/misc/xen-command-line.markdown 
b/docs/misc/xen-command-line.markdown
index 467dc8f..ea1d60d 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -868,7 +868,7 @@ debug hypervisor only).
 > Default: `new` unless directed-EOI is supported
 
 ### iommu
-> `= List of [  | force | required | intremap | qinval | snoop | 
sharept | dom0-passthrough | dom0-strict | amd-iommu-perdev-intremap | 
workaround_bios_bug | igfx | verbose | debug ]`
+> `= List of [  | force | required | intremap | intpost | qinval | 
snoop | sharept | dom0-passthrough | dom0-strict | amd-iommu-perdev-intremap | 
workaround_bios_bug | igfx | verbose | debug ]`
 
 > Sub-options:
 
@@ -895,6 +895,13 @@ debug hypervisor only).
 >> Control the use of interrupt remapping (DMA remapping will always be enabled
 >> if IOMMU functionality is enabled).
 
+> `intpost`
+
+> Default: `false`
+
+>> Control the use of interrupt posting, which depends on the availability of
+>> interrupt remapping.
+
 > `qinval` (VT-d)
 
 > Default: `true`
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 0b2abf4..50d74a5 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -32,6 +32,7 @@ static void iommu_dump_p2m_table(unsigned char key);
  *   off|no|false|disable   Disable IOMMU (default)
  *   force|required Don't boot unless IOMMU is enabled
  *   no-intremapDisable interrupt remapping
+ *   no-intpost Disable VT-d Interrupt posting
  *   verboseBe more verbose
  *   debug  Enable debugging messages and checks
  *   workaround_bios_bugWorkaround some bios issue to still enable
@@ -105,6 +106,8 @@ static void __init parse_iommu_param(char *s)
 iommu_qinval = val;
 else if ( !strcmp(s, "intremap") )
 iommu_intremap = val;
+else if ( !strcmp(s, "intpost") )
+iommu_intpost = val;
 else if ( !strcmp(s, "debug") )
 {
 iommu_debug = val;
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v13 0/2] Add VT-d Posted-Interrupts support

2016-02-23 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

You can find the VT-d Posted-Interrtups Spec. in the following URL:
http://www.intel.com/content/www/us/en/intelligent-systems/intel-technology/vt-directed-io-spec.html

Feng Wu (2):
  vmx: VT-d posted-interrupt core logic handling
  Add a command line parameter for VT-d posted-interrupts

 docs/misc/xen-command-line.markdown |   9 +-
 xen/arch/x86/hvm/vmx/vmcs.c |   2 +
 xen/arch/x86/hvm/vmx/vmx.c  | 193 
 xen/common/schedule.c   |   4 +
 xen/drivers/passthrough/iommu.c |   3 +
 xen/drivers/passthrough/vtd/iommu.c |  11 ++
 xen/include/asm-arm/domain.h|   2 +
 xen/include/asm-x86/hvm/hvm.h   |   6 ++
 xen/include/asm-x86/hvm/vmx/vmcs.h  |  77 ++
 xen/include/asm-x86/hvm/vmx/vmx.h   |   5 +
 10 files changed, 311 insertions(+), 1 deletion(-)

-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v13 1/2] vmx: VT-d posted-interrupt core logic handling

2016-02-23 Thread Feng Wu
This is the core logic handling for VT-d posted-interrupts. Basically it
deals with how and when to update posted-interrupts during the following
scenarios:
- vCPU is preempted
- vCPU is slept
- vCPU is blocked

When vCPU is preempted/slept, we update the posted-interrupts during
scheduling by introducing two new architecutral scheduler hooks:
vmx_pi_switch_from() and vmx_pi_switch_to(). When vCPU is blocked, we
introduce a new architectural hook: arch_vcpu_block() to update
posted-interrupts descriptor.

Besides that, before VM-entry, we will make sure the 'NV' filed is set
to 'posted_intr_vector' and the vCPU is not in any blocking lists, which
is needed when vCPU is running in non-root mode. The reason we do this check
is because we change the posted-interrupts descriptor in vcpu_block(),
however, we don't change it back in vcpu_unblock() or when vcpu_block()
directly returns due to event delivery (in fact, we don't need to do it
in the two places, that is why we do it before VM-Entry).

When we handle the lazy context switch for the following two scenarios:
- Preempted by a tasklet, which uses in an idle context.
- the prev vcpu is in offline and no new available vcpus in run queue.
We don't change the 'SN' bit in posted-interrupt descriptor, this
may incur spurious PI notification events, but since PI notification
event is only sent when 'ON' is clear, and once the PI notificatoin
is sent, ON is set by hardware, hence no more notification events
before 'ON' is clear. Besides that, spurious PI notification events are
going to happen from time to time in Xen hypervisor, such as, when
guests trap to Xen and PI notification event happens, there is
nothing Xen actually needs to do about it, the interrupts will be
delivered to guest atht the next time we do a VMENTRY.

CC: Keir Fraser 
CC: Jan Beulich 
CC: Andrew Cooper 
CC: Kevin Tian 
CC: George Dunlap 
CC: Dario Faggioli 
Suggested-by: Yang Zhang 
Suggested-by: Dario Faggioli 
Suggested-by: George Dunlap 
Suggested-by: Jan Beulich 
Signed-off-by: Feng Wu 
---
v13:
- Define the blocking vcpu list and lock in a structure
- Define the two local per-CPU variables in a structure
- Some adjustment to vmx_pi_hooks_assign() and vmx_pi_hooks_deassign()
- Use smp_rmb() instead of barrier(), and put it a little earlier
- Minor changes to macro arch_vcpu_block() to make 'v' evaluated only once.
- Remove the pointless parentheses in the function arguments in macro 
arch_vcpu_block()
- coding style

v12:
- Move the ASSERT to the locked region in vmx_vcpu_block()
- Add barrier() before using the local variable in vmx_pi_do_resume()
- Split vmx_pi_hooks_reassign() to two functions:
  * vmx_pi_hooks_assign()
  * vmx_pi_hooks_deassign()
- Add more comments about how PI works during vCPU state transition
- coding style

v11:
- Add ASSERT() in vmx_vcpu_block()
- Add some comments in vmx_pi_switch_from()
- Remove some comments which should have been removed when the
  related code was removed during v9 -> v10
- Rename 'vmx_pi_state_to_normal' to 'vmx_pi_do_resume'
- Coding style
- Make arch_vcpu_block() a macro
- Make 'pi_wakeup_vector' static
- Move hook 'vcpu_block' to 'struct hvm_vcpu'
- Initial hook 'vcpu_block' when assigning the first pci device
  and zap it on removal of the last device
- Save pointer to the block list lock instead of the processor
  id in 'struct arch_vmx_struct'
- Implement the following functions as hooks, so we
  can elimilate lots of checkings and spinlocks in scheduling
  related code path, which is good for performance.
vmx_pi_switch_from
vmx_pi_switch_to
vmx_pi_do_resume

v10:
- Check iommu_intpost first
- Remove pointless checking of has_hvm_container_vcpu(v)
- Rename 'vmx_pi_state_change' to 'vmx_pi_state_to_normal'
- Since vcpu_unblock() doesn't acquire 'pi_blocked_vcpu_lock', we
  don't need use another list to save the vCPUs with 'ON' set, just
  directly call vcpu_unblock(v).

v9:
- Remove arch_vcpu_block_cancel() and arch_vcpu_wake_prepare()
- Add vmx_pi_state_change() and call it before VM Entry

v8:
- Remove the lazy context switch handling for PI state transition
- Change PI state in vcpu_block() and do_poll() when the vCPU
  is going to be blocked

v7:
- Merge [PATCH v6 16/18] vmx: Add some scheduler hooks for VT-d posted 
interrupts
  and "[PATCH v6 14/18] vmx: posted-interrupt handling when vCPU is blocked"
  into this patch, so it is self-contained and more convenient
  for code review.
- Make 'pi_blocked_vcpu' and 'pi_blocked_vcpu_lock' static
- Coding style
- Use per_cpu() instead of this_cpu() in pi_wakeup_interrupt()
- Move ack_APIC_irq() to the beginning of pi_wakeup_interrupt()
- Rename 'pi_ctxt_switch_from' to 'ctxt_switch_prepare'
- Rename &

[Xen-devel] [PATCH v14 2/2] Add a command line parameter for VT-d posted-interrupts

2016-02-28 Thread Feng Wu
Enable VT-d Posted-Interrupts and add a command line
parameter for it.

CC: Jan Beulich 
Signed-off-by: Feng Wu 
Reviewed-by: Kevin Tian 
Acked-by: Jan Beulich 
---
 docs/misc/xen-command-line.markdown | 9 -
 xen/drivers/passthrough/iommu.c | 3 +++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/docs/misc/xen-command-line.markdown 
b/docs/misc/xen-command-line.markdown
index 467dc8f..ea1d60d 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -868,7 +868,7 @@ debug hypervisor only).
 > Default: `new` unless directed-EOI is supported
 
 ### iommu
-> `= List of [  | force | required | intremap | qinval | snoop | 
sharept | dom0-passthrough | dom0-strict | amd-iommu-perdev-intremap | 
workaround_bios_bug | igfx | verbose | debug ]`
+> `= List of [  | force | required | intremap | intpost | qinval | 
snoop | sharept | dom0-passthrough | dom0-strict | amd-iommu-perdev-intremap | 
workaround_bios_bug | igfx | verbose | debug ]`
 
 > Sub-options:
 
@@ -895,6 +895,13 @@ debug hypervisor only).
 >> Control the use of interrupt remapping (DMA remapping will always be enabled
 >> if IOMMU functionality is enabled).
 
+> `intpost`
+
+> Default: `false`
+
+>> Control the use of interrupt posting, which depends on the availability of
+>> interrupt remapping.
+
 > `qinval` (VT-d)
 
 > Default: `true`
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 0b2abf4..50d74a5 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -32,6 +32,7 @@ static void iommu_dump_p2m_table(unsigned char key);
  *   off|no|false|disable   Disable IOMMU (default)
  *   force|required Don't boot unless IOMMU is enabled
  *   no-intremapDisable interrupt remapping
+ *   no-intpost Disable VT-d Interrupt posting
  *   verboseBe more verbose
  *   debug  Enable debugging messages and checks
  *   workaround_bios_bugWorkaround some bios issue to still enable
@@ -105,6 +106,8 @@ static void __init parse_iommu_param(char *s)
 iommu_qinval = val;
 else if ( !strcmp(s, "intremap") )
 iommu_intremap = val;
+else if ( !strcmp(s, "intpost") )
+iommu_intpost = val;
 else if ( !strcmp(s, "debug") )
 {
 iommu_debug = val;
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v14 1/2] vmx: VT-d posted-interrupt core logic handling

2016-02-28 Thread Feng Wu
This is the core logic handling for VT-d posted-interrupts. Basically it
deals with how and when to update posted-interrupts during the following
scenarios:
- vCPU is preempted
- vCPU is slept
- vCPU is blocked

When vCPU is preempted/slept, we update the posted-interrupts during
scheduling by introducing two new architecutral scheduler hooks:
vmx_pi_switch_from() and vmx_pi_switch_to(). When vCPU is blocked, we
introduce a new architectural hook: arch_vcpu_block() to update
posted-interrupts descriptor.

Besides that, before VM-entry, we will make sure the 'NV' filed is set
to 'posted_intr_vector' and the vCPU is not in any blocking lists, which
is needed when vCPU is running in non-root mode. The reason we do this check
is because we change the posted-interrupts descriptor in vcpu_block(),
however, we don't change it back in vcpu_unblock() or when vcpu_block()
directly returns due to event delivery (in fact, we don't need to do it
in the two places, that is why we do it before VM-Entry).

When we handle the lazy context switch for the following two scenarios:
- Preempted by a tasklet, which uses in an idle context.
- the prev vcpu is in offline and no new available vcpus in run queue.
We don't change the 'SN' bit in posted-interrupt descriptor, this
may incur spurious PI notification events, but since PI notification
event is only sent when 'ON' is clear, and once the PI notificatoin
is sent, ON is set by hardware, hence no more notification events
before 'ON' is clear. Besides that, spurious PI notification events are
going to happen from time to time in Xen hypervisor, such as, when
guests trap to Xen and PI notification event happens, there is
nothing Xen actually needs to do about it, the interrupts will be
delivered to guest atht the next time we do a VMENTRY.

CC: Keir Fraser 
CC: Jan Beulich 
CC: Andrew Cooper 
CC: Kevin Tian 
CC: George Dunlap 
CC: Dario Faggioli 
Suggested-by: Yang Zhang 
Suggested-by: Dario Faggioli 
Suggested-by: George Dunlap 
Suggested-by: Jan Beulich 
Signed-off-by: Feng Wu 
Reviewed-by: George Dunlap 
---
v14:
- Introduce a local variable in macro arch_vcpu_block()
- Shorten some variable name and remove the related macro accordingly
- Only use has_arch_pdevs() to determine whether needs to call
  vmx_pi_hooks_assign()/vmx_pi_hooks_deassign()
- Use local variable for the last parameter in cmpxchg()
- Check has_hvm_container_vcpu() in arch_vcpu_block()

v13:
- Define the blocking vcpu list and lock in a structure
- Define the two local per-CPU variables in a structure
- Some adjustment to vmx_pi_hooks_assign() and vmx_pi_hooks_deassign()
- Use smp_rmb() instead of barrier(), and put it a little earlier
- Minor changes to macro arch_vcpu_block() to make 'v' evaluated only once.
- Remove the pointless parentheses in the function arguments in macro 
arch_vcpu_block()
- coding style

v12:
- Move the ASSERT to the locked region in vmx_vcpu_block()
- Add barrier() before using the local variable in vmx_pi_do_resume()
- Split vmx_pi_hooks_reassign() to two functions:
  * vmx_pi_hooks_assign()
  * vmx_pi_hooks_deassign()
- Add more comments about how PI works during vCPU state transition
- coding style

v11:
- Add ASSERT() in vmx_vcpu_block()
- Add some comments in vmx_pi_switch_from()
- Remove some comments which should have been removed when the
  related code was removed during v9 -> v10
- Rename 'vmx_pi_state_to_normal' to 'vmx_pi_do_resume'
- Coding style
- Make arch_vcpu_block() a macro
- Make 'pi_wakeup_vector' static
- Move hook 'vcpu_block' to 'struct hvm_vcpu'
- Initial hook 'vcpu_block' when assigning the first pci device
  and zap it on removal of the last device
- Save pointer to the block list lock instead of the processor
  id in 'struct arch_vmx_struct'
- Implement the following functions as hooks, so we
  can elimilate lots of checkings and spinlocks in scheduling
  related code path, which is good for performance.
vmx_pi_switch_from
vmx_pi_switch_to
vmx_pi_do_resume

v10:
- Check iommu_intpost first
- Remove pointless checking of has_hvm_container_vcpu(v)
- Rename 'vmx_pi_state_change' to 'vmx_pi_state_to_normal'
- Since vcpu_unblock() doesn't acquire 'pi_blocked_vcpu_lock', we
  don't need use another list to save the vCPUs with 'ON' set, just
  directly call vcpu_unblock(v).

v9:
- Remove arch_vcpu_block_cancel() and arch_vcpu_wake_prepare()
- Add vmx_pi_state_change() and call it before VM Entry

v8:
- Remove the lazy context switch handling for PI state transition
- Change PI state in vcpu_block() and do_poll() when the vCPU
  is going to be blocked

v7:
- Merge [PATCH v6 16/18] vmx: Add some scheduler hooks for VT-d posted 
interrupts
  and "[PATCH v6 14/18] vmx: posted-interrupt handling when vCPU is blocked"
  into t

[Xen-devel] [PATCH v14 0/2] Add VT-d Posted-Interrupts support

2016-02-28 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

You can find the VT-d Posted-Interrtups Spec. in the following URL:
http://www.intel.com/content/www/us/en/intelligent-systems/intel-technology/vt-directed-io-spec.html

Feng Wu (2):
  vmx: VT-d posted-interrupt core logic handling
  Add a command line parameter for VT-d posted-interrupts

 docs/misc/xen-command-line.markdown |   9 +-
 xen/arch/x86/hvm/vmx/vmcs.c |   2 +
 xen/arch/x86/hvm/vmx/vmx.c  | 187 
 xen/common/schedule.c   |   4 +
 xen/drivers/passthrough/iommu.c |   3 +
 xen/drivers/passthrough/vtd/iommu.c |  11 +++
 xen/include/asm-arm/domain.h|   2 +
 xen/include/asm-x86/hvm/hvm.h   |  12 +++
 xen/include/asm-x86/hvm/vmx/vmcs.h  |  71 ++
 xen/include/asm-x86/hvm/vmx/vmx.h   |   5 +
 10 files changed, 305 insertions(+), 1 deletion(-)

-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v11 2/2] Add a command line parameter for VT-d posted-interrupts

2016-01-27 Thread Feng Wu
Enable VT-d Posted-Interrupts and add a command line
parameter for it.

CC: Jan Beulich 
Signed-off-by: Feng Wu 
Reviewed-by: Kevin Tian 
Acked-by: Jan Beulich 
---
 docs/misc/xen-command-line.markdown | 9 -
 xen/drivers/passthrough/iommu.c | 3 +++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/docs/misc/xen-command-line.markdown 
b/docs/misc/xen-command-line.markdown
index 467dc8f..ea1d60d 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -868,7 +868,7 @@ debug hypervisor only).
 > Default: `new` unless directed-EOI is supported
 
 ### iommu
-> `= List of [  | force | required | intremap | qinval | snoop | 
sharept | dom0-passthrough | dom0-strict | amd-iommu-perdev-intremap | 
workaround_bios_bug | igfx | verbose | debug ]`
+> `= List of [  | force | required | intremap | intpost | qinval | 
snoop | sharept | dom0-passthrough | dom0-strict | amd-iommu-perdev-intremap | 
workaround_bios_bug | igfx | verbose | debug ]`
 
 > Sub-options:
 
@@ -895,6 +895,13 @@ debug hypervisor only).
 >> Control the use of interrupt remapping (DMA remapping will always be enabled
 >> if IOMMU functionality is enabled).
 
+> `intpost`
+
+> Default: `false`
+
+>> Control the use of interrupt posting, which depends on the availability of
+>> interrupt remapping.
+
 > `qinval` (VT-d)
 
 > Default: `true`
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 0b2abf4..50d74a5 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -32,6 +32,7 @@ static void iommu_dump_p2m_table(unsigned char key);
  *   off|no|false|disable   Disable IOMMU (default)
  *   force|required Don't boot unless IOMMU is enabled
  *   no-intremapDisable interrupt remapping
+ *   no-intpost Disable VT-d Interrupt posting
  *   verboseBe more verbose
  *   debug  Enable debugging messages and checks
  *   workaround_bios_bugWorkaround some bios issue to still enable
@@ -105,6 +106,8 @@ static void __init parse_iommu_param(char *s)
 iommu_qinval = val;
 else if ( !strcmp(s, "intremap") )
 iommu_intremap = val;
+else if ( !strcmp(s, "intpost") )
+iommu_intpost = val;
 else if ( !strcmp(s, "debug") )
 {
 iommu_debug = val;
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v11 0/2] Add VT-d Posted-Interrupts support

2016-01-27 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

You can find the VT-d Posted-Interrtups Spec. in the following URL:
http://www.intel.com/content/www/us/en/intelligent-systems/intel-technology/vt-directed-io-spec.html

Feng Wu (2):
  vmx: VT-d posted-interrupt core logic handling
  Add a command line parameter for VT-d posted-interrupts

 docs/misc/xen-command-line.markdown |   9 +-
 xen/arch/x86/hvm/vmx/vmcs.c |   2 +
 xen/arch/x86/hvm/vmx/vmx.c  | 179 
 xen/common/schedule.c   |   4 +
 xen/drivers/passthrough/iommu.c |   3 +
 xen/drivers/passthrough/vtd/iommu.c |   2 +
 xen/include/asm-arm/domain.h|   2 +
 xen/include/asm-x86/hvm/hvm.h   |   5 +
 xen/include/asm-x86/hvm/vmx/vmcs.h  |  14 +++
 xen/include/asm-x86/hvm/vmx/vmx.h   |   4 +
 10 files changed, 223 insertions(+), 1 deletion(-)

-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v11 1/2] vmx: VT-d posted-interrupt core logic handling

2016-01-27 Thread Feng Wu
This is the core logic handling for VT-d posted-interrupts. Basically it
deals with how and when to update posted-interrupts during the following
scenarios:
- vCPU is preempted
- vCPU is slept
- vCPU is blocked

When vCPU is preempted/slept, we update the posted-interrupts during
scheduling by introducing two new architecutral scheduler hooks:
vmx_pi_switch_from() and vmx_pi_switch_to(). When vCPU is blocked, we
introduce a new architectural hook: arch_vcpu_block() to update
posted-interrupts descriptor.

Besides that, before VM-entry, we will make sure the 'NV' filed is set
to 'posted_intr_vector' and the vCPU is not in any blocking lists, which
is needed when vCPU is running in non-root mode. The reason we do this check
is because we change the posted-interrupts descriptor in vcpu_block(),
however, we don't change it back in vcpu_unblock() or when vcpu_block()
directly returns due to event delivery (in fact, we don't need to do it
in the two places, that is why we do it before VM-Entry).

When we handle the lazy context switch for the following two scenarios:
- Preempted by a tasklet, which uses in an idle context.
- the prev vcpu is in offline and no new available vcpus in run queue.
We don't change the 'SN' bit in posted-interrupt descriptor, this
may incur spurious PI notification events, but since PI notification
event is only sent when 'ON' is clear, and once the PI notificatoin
is sent, ON is set by hardware, hence no more notification events
before 'ON' is clear. Besides that, spurious PI notification events are
going to happen from time to time in Xen hypervisor, such as, when
guests trap to Xen and PI notification event happens, there is
nothing Xen actually needs to do about it, the interrupts will be
delivered to guest atht the next time we do a VMENTRY.

CC: Keir Fraser 
CC: Jan Beulich 
CC: Andrew Cooper 
CC: Kevin Tian 
CC: George Dunlap 
CC: Dario Faggioli 
Suggested-by: Yang Zhang 
Suggested-by: Dario Faggioli 
Suggested-by: George Dunlap 
Suggested-by: Jan Beulich 
Signed-off-by: Feng Wu 
---
v11:
- Add ASSERT() in vmx_vcpu_block()
- Add some comments in vmx_pi_switch_from()
- Remove some comments which should have been removed when the
  related code was removed during v9 -> v10
- Rename 'vmx_pi_state_to_normal' to 'vmx_pi_do_resume'
- Coding style
- Make arch_vcpu_block() a macro
- Make 'pi_wakeup_vector' static
- Move hook 'vcpu_block' to 'struct hvm_vcpu'
- Initial hook 'vcpu_block' when assigning the first pci device
  and zap it on removal of the last device
- Save pointer to the block list lock instead of the processor
  id in 'struct arch_vmx_struct'
- Implement the following functions as hooks, so we
  can elimilate lots of checkings and spinlocks in scheduling
  related code path, which is good for performance.
vmx_pi_switch_from
vmx_pi_switch_to
vmx_pi_do_resume

v10:
- Check iommu_intpost first
- Remove pointless checking of has_hvm_container_vcpu(v)
- Rename 'vmx_pi_state_change' to 'vmx_pi_state_to_normal'
- Since vcpu_unblock() doesn't acquire 'pi_blocked_vcpu_lock', we
  don't need use another list to save the vCPUs with 'ON' set, just
  directly call vcpu_unblock(v).

v9:
- Remove arch_vcpu_block_cancel() and arch_vcpu_wake_prepare()
- Add vmx_pi_state_change() and call it before VM Entry

v8:
- Remove the lazy context switch handling for PI state transition
- Change PI state in vcpu_block() and do_poll() when the vCPU
  is going to be blocked

v7:
- Merge [PATCH v6 16/18] vmx: Add some scheduler hooks for VT-d posted 
interrupts
  and "[PATCH v6 14/18] vmx: posted-interrupt handling when vCPU is blocked"
  into this patch, so it is self-contained and more convenient
  for code review.
- Make 'pi_blocked_vcpu' and 'pi_blocked_vcpu_lock' static
- Coding style
- Use per_cpu() instead of this_cpu() in pi_wakeup_interrupt()
- Move ack_APIC_irq() to the beginning of pi_wakeup_interrupt()
- Rename 'pi_ctxt_switch_from' to 'ctxt_switch_prepare'
- Rename 'pi_ctxt_switch_to' to 'ctxt_switch_cancel'
- Use 'has_hvm_container_vcpu' instead of 'is_hvm_vcpu'
- Use 'spin_lock' and 'spin_unlock' when the interrupt has been
  already disabled.
- Rename arch_vcpu_wake_prepare to vmx_vcpu_wake_prepare
- Define vmx_vcpu_wake_prepare in xen/arch/x86/hvm/hvm.c
- Call .pi_ctxt_switch_to() __context_switch() instead of directly
  calling vmx_post_ctx_switch_pi() in vmx_ctxt_switch_to()
- Make .pi_block_cpu unsigned int
- Use list_del() instead of list_del_init()
- Coding style

One remaining item in v7:
Jan has concern about calling vcpu_unblock() in vmx_pre_ctx_switch_pi(),
need Dario or George's input about this.

v6:
- Add two static inline functions for pi context swit

[Xen-devel] [PATCH v8 04/17] vt-d: VT-d Posted-Interrupts feature detection

2015-10-12 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

CC: Yang Zhang 
CC: Kevin Tian 
Signed-off-by: Feng Wu 
Reviewed-by: Konrad Rzeszutek Wilk 
Reviewed-by: Jan Beulich 
---
v7:
- Remove pointless "if non iommu_intremap then disable iommu_intpost" logic
- Don't need to check !iommu_intremap or !iommu_intpost when setting 
iommu_intpost to 0

v5:
- Remove blank line

v4:
- Correct a logic error when setting iommu_intpost to 0

v3:
- Remove the "if no intremap then no intpost" logic in
  intel_vtd_setup(), it is covered in the iommu_setup().
- Add "if no intremap then no intpost" logic in the end
  of init_vtd_hw() which is called by vtd_resume().

So the logic exists in the following three places:
- parse_iommu_param()
- iommu_setup()
- init_vtd_hw()

 xen/drivers/passthrough/vtd/iommu.c | 14 --
 xen/drivers/passthrough/vtd/iommu.h |  1 +
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/xen/drivers/passthrough/vtd/iommu.c 
b/xen/drivers/passthrough/vtd/iommu.c
index 1dffc40..8dee731 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -2147,8 +2147,8 @@ int __init intel_vtd_setup(void)
 }
 
 /* We enable the following features only if they are supported by all VT-d
- * engines: Snoop Control, DMA passthrough, Queued Invalidation and
- * Interrupt Remapping.
+ * engines: Snoop Control, DMA passthrough, Queued Invalidation, Interrupt
+ * Remapping, and Posted Interrupt
  */
 for_each_drhd_unit ( drhd )
 {
@@ -2176,6 +2176,14 @@ int __init intel_vtd_setup(void)
 if ( iommu_intremap && !ecap_intr_remap(iommu->ecap) )
 iommu_intremap = 0;
 
+/*
+ * We cannot use posted interrupt if X86_FEATURE_CX16 is
+ * not supported, since we count on this feature to
+ * atomically update 16-byte IRTE in posted format.
+ */
+if ( !cap_intr_post(iommu->cap) || !cpu_has_cx16 )
+iommu_intpost = 0;
+
 if ( !vtd_ept_page_compatible(iommu) )
 iommu_hap_pt_share = 0;
 
@@ -2201,6 +2209,7 @@ int __init intel_vtd_setup(void)
 P(iommu_passthrough, "Dom0 DMA Passthrough");
 P(iommu_qinval, "Queued Invalidation");
 P(iommu_intremap, "Interrupt Remapping");
+P(iommu_intpost, "Posted Interrupt");
 P(iommu_hap_pt_share, "Shared EPT tables");
 #undef P
 
@@ -2220,6 +2229,7 @@ int __init intel_vtd_setup(void)
 iommu_passthrough = 0;
 iommu_qinval = 0;
 iommu_intremap = 0;
+iommu_intpost = 0;
 return ret;
 }
 
diff --git a/xen/drivers/passthrough/vtd/iommu.h 
b/xen/drivers/passthrough/vtd/iommu.h
index ac71ed1..22abefe 100644
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -61,6 +61,7 @@
 /*
  * Decoding Capability Register
  */
+#define cap_intr_post(c)   (((c) >> 59) & 1)
 #define cap_read_drain(c)  (((c) >> 55) & 1)
 #define cap_write_drain(c) (((c) >> 54) & 1)
 #define cap_max_amask_val(c)   (((c) >> 48) & 0x3f)
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v8 08/17] vmx: Suppress posting interrupts when 'SN' is set

2015-10-12 Thread Feng Wu
Currently, we don't support urgent interrupt, all interrupts
are recognized as non-urgent interrupt, so we cannot send
posted-interrupt when 'SN' is set.

CC: Kevin Tian 
CC: Keir Fraser 
CC: Jan Beulich 
CC: Andrew Cooper 
Signed-off-by: Feng Wu 
Reviewed-by: Konrad Rzeszutek Wilk 
Reviewed-by: Jan Beulich 
---
v8:
- Parenthesize '1 << POSTED_INTR_ON' and '1 << POSTED_INTR_SN'

v7:
- Coding style
- Read the current pi_desc.control as the intial value of prev.control

v6:
- Add some comments

v5:
- keep the vcpu_kick() at the end of vmx_deliver_posted_intr()
- Keep the 'return' after calling __vmx_deliver_posted_interrupt()

v4:
- Coding style.
- V3 removes a vcpu_kick() from the eoi_exitmap_changed path
  incorrectly, fix it.

v3:
- use cmpxchg to test SN/ON and set ON

 xen/arch/x86/hvm/vmx/vmx.c | 29 -
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index c32d863..741a271 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1701,8 +1701,35 @@ static void vmx_deliver_posted_intr(struct vcpu *v, u8 
vector)
  */
 pi_set_on(&v->arch.hvm_vmx.pi_desc);
 }
-else if ( !pi_test_and_set_on(&v->arch.hvm_vmx.pi_desc) )
+else
 {
+struct pi_desc old, new, prev;
+
+prev.control = v->arch.hvm_vmx.pi_desc.control;
+
+do {
+/*
+ * Currently, we don't support urgent interrupt, all
+ * interrupts are recognized as non-urgent interrupt,
+ * so we cannot send posted-interrupt when 'SN' is set.
+ * Besides that, if 'ON' is already set, we cannot set
+ * posted-interrupts as well.
+ */
+if ( pi_test_sn(&prev) || pi_test_on(&prev) )
+{
+vcpu_kick(v);
+return;
+}
+
+old.control = v->arch.hvm_vmx.pi_desc.control &
+  ~((1 << POSTED_INTR_ON) | (1 << POSTED_INTR_SN));
+new.control = v->arch.hvm_vmx.pi_desc.control |
+  (1 << POSTED_INTR_ON);
+
+prev.control = cmpxchg(&v->arch.hvm_vmx.pi_desc.control,
+   old.control, new.control);
+} while ( prev.control != old.control );
+
 __vmx_deliver_posted_interrupt(v);
 return;
 }
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v8 09/17] VT-d: Remove pointless casts

2015-10-12 Thread Feng Wu
Remove pointless casts.

CC: Yang Zhang 
CC: Kevin Tian 
Suggested-by: Jan Beulich 
Signed-off-by: Feng Wu 
Reviewed-by: Konrad Rzeszutek Wilk 
---
v7:
- Remove an 'u32' casting omitted in v5

v5:
- Newly added.

 xen/drivers/passthrough/vtd/utils.c | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/xen/drivers/passthrough/vtd/utils.c 
b/xen/drivers/passthrough/vtd/utils.c
index 44c4ef5..a75059f 100644
--- a/xen/drivers/passthrough/vtd/utils.c
+++ b/xen/drivers/passthrough/vtd/utils.c
@@ -234,10 +234,9 @@ static void dump_iommu_info(unsigned char key)
 continue;
 printk("  %04x:  %x   %x  %04x %08x %02x%x   %x  %x  %x  
%x"
 "   %x %x\n", i,
-(u32)p->hi.svt, (u32)p->hi.sq, (u32)p->hi.sid,
-(u32)p->lo.dst, (u32)p->lo.vector, (u32)p->lo.avail,
-(u32)p->lo.dlm, (u32)p->lo.tm, (u32)p->lo.rh,
-(u32)p->lo.dm, (u32)p->lo.fpd, (u32)p->lo.p);
+p->hi.svt, p->hi.sq, p->hi.sid, p->lo.dst, p->lo.vector,
+p->lo.avail, p->lo.dlm, p->lo.tm, p->lo.rh, p->lo.dm,
+p->lo.fpd, p->lo.p);
 print_cnt++;
 }
 if ( iremap_entries )
@@ -281,11 +280,10 @@ static void dump_iommu_info(unsigned char key)
 
 printk("   %02x:  %04x   %x%x   %x   %x   %x%x"
 "%x %02x\n", i,
-(u32)remap->index_0_14 | ((u32)remap->index_15 << 15),
-(u32)remap->format, (u32)remap->mask, (u32)remap->trigger,
-(u32)remap->irr, (u32)remap->polarity,
-(u32)remap->delivery_status, (u32)remap->delivery_mode,
-(u32)remap->vector);
+remap->index_0_14 | (remap->index_15 << 15),
+remap->format, remap->mask, remap->trigger, remap->irr,
+remap->polarity, remap->delivery_status, 
remap->delivery_mode,
+remap->vector);
 }
 }
 }
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v8 06/17] vmx: Add some helper functions for Posted-Interrupts

2015-10-12 Thread Feng Wu
This patch adds some helper functions to manipulate the
Posted-Interrupts Descriptor.

CC: Kevin Tian 
CC: Keir Fraser 
CC: Jan Beulich 
CC: Andrew Cooper 
Signed-off-by: Feng Wu 
Reviewed-by: Konrad Rzeszutek Wilk 
---
v7:
- Use bitfield in pi_test_on() and pi_test_sn()

v4:
- Newly added

 xen/include/asm-x86/hvm/vmx/vmx.h | 21 +
 1 file changed, 21 insertions(+)

diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h 
b/xen/include/asm-x86/hvm/vmx/vmx.h
index 3fbfa44..8d91110 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -101,6 +101,7 @@ void vmx_update_cpu_exec_control(struct vcpu *v);
 void vmx_update_secondary_exec_control(struct vcpu *v);
 
 #define POSTED_INTR_ON  0
+#define POSTED_INTR_SN  1
 static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
 {
 return test_and_set_bit(vector, pi_desc->pir);
@@ -121,11 +122,31 @@ static inline int pi_test_and_clear_on(struct pi_desc 
*pi_desc)
 return test_and_clear_bit(POSTED_INTR_ON, &pi_desc->control);
 }
 
+static inline int pi_test_on(struct pi_desc *pi_desc)
+{
+return pi_desc->on;
+}
+
 static inline unsigned long pi_get_pir(struct pi_desc *pi_desc, int group)
 {
 return xchg(&pi_desc->pir[group], 0);
 }
 
+static inline int pi_test_sn(struct pi_desc *pi_desc)
+{
+return pi_desc->sn;
+}
+
+static inline void pi_set_sn(struct pi_desc *pi_desc)
+{
+set_bit(POSTED_INTR_SN, &pi_desc->control);
+}
+
+static inline void pi_clear_sn(struct pi_desc *pi_desc)
+{
+clear_bit(POSTED_INTR_SN, &pi_desc->control);
+}
+
 /*
  * Exit Reasons
  */
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v8 02/17] Add cmpxchg16b support for x86-64

2015-10-12 Thread Feng Wu
This patch adds cmpxchg16b support for x86-64, so software
can perform 128-bit atomic write/read.

CC: Keir Fraser 
CC: Jan Beulich 
CC: Andrew Cooper 
Signed-off-by: Feng Wu 
---
v8:
- Remove pointless cast when assigning 'new_low'
- properly parenthesize cmpxchg16b()

v7:
- Make the last two parameters of __cmpxchg16b() const
- Remove memory clobber
- Add run-time and build-build check in cmpxchg16b()
- Cast the last two parameter to void * when calling __cmpxchg16b()

v6:
- Fix a typo

v5:
- Change back the parameters of __cmpxchg16b() to __uint128_t *
- Remove pointless cast for 'ptr'
- Remove pointless parentheses
- Use A constraint for the output

v4:
- Use pointer as the parameter of __cmpxchg16b().
- Use gcc's __uint128_t built-in type
- Make the parameters of __cmpxchg16b() void *

v3:
- Newly added.

 xen/include/asm-x86/x86_64/system.h | 33 +
 1 file changed, 33 insertions(+)

diff --git a/xen/include/asm-x86/x86_64/system.h 
b/xen/include/asm-x86/x86_64/system.h
index 662813a..8f87d1e 100644
--- a/xen/include/asm-x86/x86_64/system.h
+++ b/xen/include/asm-x86/x86_64/system.h
@@ -6,6 +6,39 @@
(unsigned long)(n),sizeof(*(ptr
 
 /*
+ * Atomic 16 bytes compare and exchange.  Compare OLD with MEM, if
+ * identical, store NEW in MEM.  Return the initial value in MEM.
+ * Success is indicated by comparing RETURN with OLD.
+ *
+ * This function can only be called when cpu_has_cx16 is true.
+ */
+
+static always_inline __uint128_t __cmpxchg16b(
+volatile void *ptr, const __uint128_t *old, const __uint128_t *new)
+{
+__uint128_t prev;
+uint64_t new_high = *new >> 64;
+uint64_t new_low = *new;
+
+ASSERT(cpu_has_cx16);
+
+asm volatile ( "lock; cmpxchg16b %3"
+   : "=A" (prev)
+   : "c" (new_high), "b" (new_low),
+ "m" (*__xg(ptr)), "0" (*old) );
+
+return prev;
+}
+
+#define cmpxchg16b(ptr, o, n) ({   \
+volatile void *_p = (ptr); \
+ASSERT(!((unsigned long)_p & 0xf));\
+BUILD_BUG_ON(sizeof(*(o)) != sizeof(__uint128_t)); \
+BUILD_BUG_ON(sizeof(*(n)) != sizeof(__uint128_t)); \
+__cmpxchg16b(_p, (void *)(o), (void *)(n));\
+})
+
+/*
  * This function causes value _o to be changed to _n at location _p.
  * If this access causes a fault then we return 1, otherwise we return 0.
  * If no fault occurs then _o is updated to the value we saw at _p. If this
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v8 03/17] iommu: Add iommu_intpost to control VT-d Posted-Interrupts feature

2015-10-12 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

This patch adds variable 'iommu_intpost' to control whether enable VT-d
posted-interrupt or not in the generic IOMMU code.

CC: Jan Beulich 
CC: Kevin Tian 
Signed-off-by: Feng Wu 
Reviewed-by: Kevin Tian 
Reviewed-by: Konrad Rzeszutek Wilk 
Acked-by: Jan Beulich 
---
v5:
- Remove the "if no intremap then no intpost" logic in parse_iommu_param(), 
which
  can be covered in iommu_setup()

v3:
- Remove pointless initializer for 'iommu_intpost'.
- Some adjustment for "if no intremap then no intpost" logic.
* For parse_iommu_param(), move it to the end of the function,
  so we don't need to add the some logic when introduing the
  new kernel parameter 'intpost' in later patch.
* Add this logic in iommu_setup() after iommu_hardware_setup()
  is called.

 xen/drivers/passthrough/iommu.c | 13 -
 xen/include/xen/iommu.h |  2 +-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index fc7831e..36d5cc0 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -51,6 +51,14 @@ bool_t __read_mostly iommu_passthrough;
 bool_t __read_mostly iommu_snoop = 1;
 bool_t __read_mostly iommu_qinval = 1;
 bool_t __read_mostly iommu_intremap = 1;
+
+/*
+ * In the current implementation of VT-d posted interrupts, in some extreme
+ * cases, the per cpu list which saves the blocked vCPU will be very long,
+ * and this will affect the interrupt latency, so let this feature off by
+ * default until we find a good solution to resolve it.
+ */
+bool_t __read_mostly iommu_intpost;
 bool_t __read_mostly iommu_hap_pt_share = 1;
 bool_t __read_mostly iommu_debug;
 bool_t __read_mostly amd_iommu_perdev_intremap = 1;
@@ -307,6 +315,9 @@ int __init iommu_setup(void)
 panic("Couldn't enable %s and iommu=required/force",
   !iommu_enabled ? "IOMMU" : "Interrupt Remapping");
 
+if ( !iommu_intremap )
+iommu_intpost = 0;
+
 if ( !iommu_enabled )
 {
 iommu_snoop = 0;
@@ -374,7 +385,7 @@ void iommu_crash_shutdown(void)
 const struct iommu_ops *ops = iommu_get_ops();
 if ( iommu_enabled )
 ops->crash_shutdown();
-iommu_enabled = iommu_intremap = 0;
+iommu_enabled = iommu_intremap = iommu_intpost = 0;
 }
 
 int iommu_get_reserved_device_memory(iommu_grdm_t *func, void *ctxt)
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index 8f3a20e..1f5d04a 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -30,7 +30,7 @@
 extern bool_t iommu_enable, iommu_enabled;
 extern bool_t force_iommu, iommu_verbose;
 extern bool_t iommu_workaround_bios_bug, iommu_igfx, iommu_passthrough;
-extern bool_t iommu_snoop, iommu_qinval, iommu_intremap;
+extern bool_t iommu_snoop, iommu_qinval, iommu_intremap, iommu_intpost;
 extern bool_t iommu_hap_pt_share;
 extern bool_t iommu_debug;
 extern bool_t amd_iommu_perdev_intremap;
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v8 12/17] x86: move some APIC related macros to apicdef.h

2015-10-12 Thread Feng Wu
Move some APIC related macros to apicdef.h, so they can be used
outside of vlapic.c.

CC: Keir Fraser 
CC: Jan Beulich 
CC: Andrew Cooper 
Signed-off-by: Feng Wu 
Acked-by: Jan Beulich 
---
v8:
- Minor changes

v7:
- Put the Macros to the right place inside the file.

v6:
- Newly introduced.

 xen/arch/x86/hvm/vlapic.c | 5 -
 xen/include/asm-x86/apicdef.h | 3 +++
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
index b893b40..9b7c871 100644
--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -65,11 +65,6 @@ static const unsigned int vlapic_lvt_mask[VLAPIC_LVT_NUM] =
  LVT_MASK
 };
 
-/* Following could belong in apicdef.h */
-#define APIC_SHORT_MASK  0xc
-#define APIC_DEST_NOSHORT0x0
-#define APIC_DEST_MASK   0x800
-
 #define vlapic_lvt_vector(vlapic, lvt_type) \
 (vlapic_get_reg(vlapic, lvt_type) & APIC_VECTOR_MASK)
 
diff --git a/xen/include/asm-x86/apicdef.h b/xen/include/asm-x86/apicdef.h
index 6069fce..8752287 100644
--- a/xen/include/asm-x86/apicdef.h
+++ b/xen/include/asm-x86/apicdef.h
@@ -54,9 +54,11 @@
 #defineAPIC_ESR_RECVILL0x00040
 #defineAPIC_ESR_ILLREGA0x00080
 #defineAPIC_ICR0x300
+#defineAPIC_DEST_NOSHORT   0x0
 #defineAPIC_DEST_SELF  0x4
 #defineAPIC_DEST_ALLINC0x8
 #defineAPIC_DEST_ALLBUT0xC
+#defineAPIC_SHORT_MASK 0xC
 #defineAPIC_ICR_RR_MASK0x3
 #defineAPIC_ICR_RR_INVALID 0x0
 #defineAPIC_ICR_RR_INPROG  0x1
@@ -64,6 +66,7 @@
 #defineAPIC_INT_LEVELTRIG  0x08000
 #defineAPIC_INT_ASSERT 0x04000
 #defineAPIC_ICR_BUSY   0x01000
+#defineAPIC_DEST_MASK  0x00800
 #defineAPIC_DEST_LOGICAL   0x00800
 #defineAPIC_DEST_PHYSICAL  0x0
 #defineAPIC_DM_FIXED   0x0
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v8 13/17] Update IRTE according to guest interrupt config changes

2015-10-12 Thread Feng Wu
When guest changes its interrupt configuration (such as, vector, etc.)
for direct-assigned devices, we need to update the associated IRTE
with the new guest vector, so external interrupts from the assigned
devices can be injected to guests without VM-Exit.

For lowest-priority interrupts, we use vector-hashing mechamisn to find
the destination vCPU. This follows the hardware behavior, since modern
Intel CPUs use vector hashing to handle the lowest-priority interrupt.

For multicast/broadcast vCPU, we cannot handle it via interrupt posting,
still use interrupt remapping.

CC: Jan Beulich 
Signed-off-by: Feng Wu 
Acked-by: Jan Beulich 
---
v8:
- Remove local variable 'bitmap_array_size'
- Use switch to replace if-else

v7:
- Remove some pointless debug printk
- Fix a logic error when assigning 'delivery_mode'
- Adjust the definition of local variable 'idx'
- Add a dprintk if we cannot find the vCPU from 'pi_find_dest_vcpu'
- Add 'else if ( delivery_mode == dest_Fixed )' in 'pi_find_dest_vcpu'

v6:
- Use macro to replace plain numbers
- Correct the overflow error in a loop

v5:
- Make 'struct vcpu *vcpu' const

v4:
- Make some 'int' variables 'unsigned int' in pi_find_dest_vcpu()
- Make 'dest_id' uint32_t
- Rename 'size' to 'bitmap_array_size'
- find_next_bit() and find_first_bit() always return unsigned int,
  so no need to check whether the return value is less than 0.
- Message error level XENLOG_G_WARNING -> XENLOG_G_INFO
- Remove useless warning message
- Create a seperate function vector_hashing_dest() to find the
- destination of lowest-priority interrupts.
- Change some comments

v3:
- Use bitmap to store the all the possible destination vCPUs of an
  interrupt, then trying to find the right destination from the bitmap
- Typo and some small changes

 xen/drivers/passthrough/io.c | 123 ++-
 1 file changed, 122 insertions(+), 1 deletion(-)

diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index bda9374..6b1ee6a 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static DEFINE_PER_CPU(struct list_head, dpci_list);
 
@@ -198,6 +199,108 @@ void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci)
 xfree(dpci);
 }
 
+/*
+ * This routine handles lowest-priority interrupts using vector-hashing
+ * mechanism. As an example, modern Intel CPUs use this method to handle
+ * lowest-priority interrupts.
+ *
+ * Here is the details about the vector-hashing mechanism:
+ * 1. For lowest-priority interrupts, store all the possible destination
+ *vCPUs in an array.
+ * 2. Use "gvec % max number of destination vCPUs" to find the right
+ *destination vCPU in the array for the lowest-priority interrupt.
+ */
+static struct vcpu *vector_hashing_dest(const struct domain *d,
+uint32_t dest_id,
+bool_t dest_mode,
+uint8_t gvec)
+
+{
+unsigned long *dest_vcpu_bitmap;
+unsigned int dest_vcpus = 0;
+struct vcpu *v, *dest = NULL;
+unsigned int i;
+
+dest_vcpu_bitmap = xzalloc_array(unsigned long,
+ BITS_TO_LONGS(d->max_vcpus));
+if ( !dest_vcpu_bitmap )
+return NULL;
+
+for_each_vcpu ( d, v )
+{
+if ( !vlapic_match_dest(vcpu_vlapic(v), NULL, APIC_DEST_NOSHORT,
+dest_id, dest_mode) )
+continue;
+
+__set_bit(v->vcpu_id, dest_vcpu_bitmap);
+dest_vcpus++;
+}
+
+if ( dest_vcpus != 0 )
+{
+unsigned int mod = gvec % dest_vcpus;
+unsigned int idx = 0;
+
+for ( i = 0; i <= mod; i++ )
+{
+idx = find_next_bit(dest_vcpu_bitmap, d->max_vcpus, idx) + 1;
+BUG_ON(idx >= d->max_vcpus);
+}
+
+dest = d->vcpu[idx - 1];
+}
+
+xfree(dest_vcpu_bitmap);
+
+return dest;
+}
+
+/*
+ * The purpose of this routine is to find the right destination vCPU for
+ * an interrupt which will be delivered by VT-d posted-interrupt. There
+ * are several cases as below:
+ *
+ * - For lowest-priority interrupts, use vector-hashing mechanism to find
+ *   the destination.
+ * - Otherwise, for single destination interrupt, it is straightforward to
+ *   find the destination vCPU and return true.
+ * - For multicast/broadcast vCPU, we cannot handle it via interrupt posting,
+ *   so return NULL.
+ */
+static struct vcpu *pi_find_dest_vcpu(const struct domain *d, uint32_t dest_id,
+  bool_t dest_mode, uint8_t delivery_mode,
+  uint8_t gvec)
+{
+unsigned int dest_vcpus = 0;
+struct vcpu *v, *dest = NULL;
+
+switch ( delivery_mode

[Xen-devel] [PATCH v8 14/17] vmx: Properly handle notification event when vCPU is running

2015-10-12 Thread Feng Wu
When a vCPU is running in Root mode and a notification event
has been injected to it. we need to set VCPU_KICK_SOFTIRQ for
the current cpu, so the pending interrupt in PIRR will be
synced to vIRR before VM-Exit in time.

CC: Kevin Tian 
CC: Keir Fraser 
CC: Jan Beulich 
CC: Andrew Cooper 
Signed-off-by: Feng Wu 
Acked-by: Kevin Tian 
---
v7:
- Retain 'cli' in the comments to make it more understandable.
- Register another notification event handler when VT-d PI is enabled

v6:
- Ack the interrupt in the beginning of pi_notification_interrupt()

v4:
- Coding style.

v3:
- Make pi_notification_interrupt() static

 xen/arch/x86/hvm/vmx/vmx.c | 54 +-
 1 file changed, 53 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 741a271..e448b31 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1975,6 +1975,53 @@ static struct hvm_function_table __initdata 
vmx_function_table = {
 .altp2m_vcpu_emulate_vmfunc = vmx_vcpu_emulate_vmfunc,
 };
 
+/* Handle VT-d posted-interrupt when VCPU is running. */
+static void pi_notification_interrupt(struct cpu_user_regs *regs)
+{
+ack_APIC_irq();
+this_cpu(irq_count)++;
+
+/*
+ * We get here when a vCPU is running in root-mode (such as via hypercall,
+ * or any other reasons which can result in VM-Exit), and before vCPU is
+ * back to non-root, external interrupts from an assigned device happen
+ * and a notification event is delivered to this logical CPU.
+ *
+ * we need to set VCPU_KICK_SOFTIRQ for the current cpu, just like
+ * __vmx_deliver_posted_interrupt(). So the pending interrupt in PIRR will
+ * be synced to vIRR before VM-Exit in time.
+ *
+ * Please refer to the following code fragments from
+ * xen/arch/x86/hvm/vmx/entry.S:
+ *
+ * .Lvmx_do_vmentry
+ *
+ *  ..
+ *
+ *  point 1
+ *
+ *  cli
+ *  cmp  %ecx,(%rdx,%rax,1)
+ *  jnz  .Lvmx_process_softirqs
+ *
+ *  ..
+ *
+ *  je   .Lvmx_launch
+ *
+ *  ..
+ *
+ * .Lvmx_process_softirqs:
+ *  sti
+ *  call do_softirq
+ *  jmp  .Lvmx_do_vmentry
+ *
+ * If VT-d engine issues a notification event at point 1 above, it cannot
+ * be delivered to the guest during this VM-entry without raising the
+ * softirq in this notification handler.
+ */
+raise_softirq(VCPU_KICK_SOFTIRQ);
+}
+
 const struct hvm_function_table * __init start_vmx(void)
 {
 set_in_cr4(X86_CR4_VMXE);
@@ -2012,7 +2059,12 @@ const struct hvm_function_table * __init start_vmx(void)
 }
 
 if ( cpu_has_vmx_posted_intr_processing )
-alloc_direct_apic_vector(&posted_intr_vector, event_check_interrupt);
+{
+if ( iommu_intpost )
+alloc_direct_apic_vector(&posted_intr_vector, 
pi_notification_interrupt);
+else
+alloc_direct_apic_vector(&posted_intr_vector, 
event_check_interrupt);
+}
 else
 {
 vmx_function_table.deliver_posted_intr = NULL;
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v8 07/17] vmx: Initialize VT-d Posted-Interrupts Descriptor

2015-10-12 Thread Feng Wu
This patch initializes the VT-d Posted-interrupt Descriptor.

CC: Kevin Tian 
CC: Keir Fraser 
CC: Jan Beulich 
CC: Andrew Cooper 
Signed-off-by: Feng Wu 
Acked-by: Kevin Tian 
Reviewed-by: Konrad Rzeszutek Wilk 
---
v7:
- Add comments to function 'pi_desc_init' to clarify why we
  update the posted-interrupt descriptor in non-atomical way
  in it.

v3:
- Move pi_desc_init() to xen/arch/x86/hvm/vmx/vmcs.c
- Remove the 'inline' flag of pi_desc_init()

 xen/arch/x86/hvm/vmx/vmcs.c   | 22 ++
 xen/include/asm-x86/hvm/vmx/vmx.h |  2 ++
 2 files changed, 24 insertions(+)

diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index a0a97e7..5f67797 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -39,6 +39,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static bool_t __read_mostly opt_vpid_enabled = 1;
 boolean_param("vpid", opt_vpid_enabled);
@@ -951,6 +952,24 @@ void virtual_vmcs_vmwrite(void *vvmcs, u32 vmcs_encoding, 
u64 val)
 virtual_vmcs_exit(vvmcs);
 }
 
+/*
+ * This function is only called in a vCPU's initialization phase,
+ * so we can update the posted-interrupt descriptor in non-atomic way.
+ */
+static void pi_desc_init(struct vcpu *v)
+{
+uint32_t dest;
+
+v->arch.hvm_vmx.pi_desc.nv = posted_intr_vector;
+
+dest = cpu_physical_id(v->processor);
+
+if ( x2apic_enabled )
+v->arch.hvm_vmx.pi_desc.ndst = dest;
+else
+v->arch.hvm_vmx.pi_desc.ndst = MASK_INSR(dest, PI_xAPIC_NDST_MASK);
+}
+
 static int construct_vmcs(struct vcpu *v)
 {
 struct domain *d = v->domain;
@@ -1089,6 +1108,9 @@ static int construct_vmcs(struct vcpu *v)
 
 if ( cpu_has_vmx_posted_intr_processing )
 {
+if ( iommu_intpost )
+pi_desc_init(v);
+
 __vmwrite(PI_DESC_ADDR, virt_to_maddr(&v->arch.hvm_vmx.pi_desc));
 __vmwrite(POSTED_INTR_NOTIFICATION_VECTOR, posted_intr_vector);
 }
diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h 
b/xen/include/asm-x86/hvm/vmx/vmx.h
index 8d91110..70b254f 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -88,6 +88,8 @@ typedef enum {
 #define EPT_EMT_WB  6
 #define EPT_EMT_RSV27
 
+#define PI_xAPIC_NDST_MASK  0xFF00
+
 void vmx_asm_vmexit_handler(struct cpu_user_regs);
 void vmx_asm_do_vmentry(void);
 void vmx_intr_assist(void);
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v8 11/17] vt-d: Add API to update IRTE when VT-d PI is used

2015-10-12 Thread Feng Wu
This patch adds an API which is used to update the IRTE
for posted-interrupt when guest changes MSI/MSI-X information.

CC: Yang Zhang 
CC: Kevin Tian 
CC: Keir Fraser 
CC: Jan Beulich 
CC: Andrew Cooper 
Signed-off-by: Feng Wu 
Reviewed-by: Jan Beulich 
---
v8:
- Some minor adjustment

v7:
- Remove __uint128_t cast
- Remove Kevin's Ack due to a bug fix for v6
- Reword some comments
- Setup posted IRTE from zeroed structure

v6:
- In some error cases, the desc->lock will be unlocked twice, fix it.
- Coding style fix.
- Add some comments.

v5:
- Make some function parameters const
- Call "spin_unlock_irq(&desc->lock);" a little eariler
- Add "ASSERT(spin_is_locked(&pcidevs_lock))"
- -EBADSLT -> -ENODEV, EBADSLT is removed in the lasted Xen

v4:
- Don't inline setup_posted_irte()
- const struct pi_desc *pi_desc for setup_posted_irte()
- return -EINVAL when pirq_spin_lock_irq_desc() fails.
- Make some variables const
- Release irq desc lock earlier in pi_update_irte()
- Remove the pointless do-while() loop when doing cmpxchg16b()

v3:
- Remove "adding PDA_MASK()" when updating 'pda_l' and 'pda_h' for IRTE.
- Change the return type of pi_update_irte() to int.
- Remove some pointless printk message in pi_update_irte().
- Use structure assignment instead of memcpy() for irte copy.

 xen/drivers/passthrough/vtd/intremap.c | 120 +
 xen/drivers/passthrough/vtd/iommu.h|   6 ++
 xen/include/asm-x86/iommu.h|   2 +
 3 files changed, 128 insertions(+)

diff --git a/xen/drivers/passthrough/vtd/intremap.c 
b/xen/drivers/passthrough/vtd/intremap.c
index 8f135e1..67e4f6d 100644
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -899,3 +899,123 @@ void iommu_disable_x2apic_IR(void)
 for_each_drhd_unit ( drhd )
 disable_qinval(drhd->iommu);
 }
+
+static void setup_posted_irte(
+struct iremap_entry *new_ire, const struct iremap_entry *old_ire,
+const struct pi_desc *pi_desc, const uint8_t gvec)
+{
+memset(new_ire, sizeof(*new_ire), 0);
+
+if ( !old_ire->remap.im )
+{
+new_ire->post.p = old_ire->remap.p;
+new_ire->post.fpd = old_ire->remap.fpd;
+new_ire->post.sid = old_ire->remap.sid;
+new_ire->post.sq = old_ire->remap.sq;
+new_ire->post.svt = old_ire->remap.svt;
+}
+else
+{
+new_ire->post.p = old_ire->post.p;
+new_ire->post.fpd = old_ire->post.fpd;
+new_ire->post.sid = old_ire->post.sid;
+new_ire->post.sq = old_ire->post.sq;
+new_ire->post.svt = old_ire->post.svt;
+new_ire->post.urg = old_ire->post.urg;
+}
+
+new_ire->post.im = 1;
+new_ire->post.vector = gvec;
+new_ire->post.pda_l = virt_to_maddr(pi_desc) >> (32 - PDA_LOW_BIT);
+new_ire->post.pda_h = virt_to_maddr(pi_desc) >> 32;
+}
+
+/*
+ * This function is used to update the IRTE for posted-interrupt
+ * when guest changes MSI/MSI-X information.
+ */
+int pi_update_irte(const struct vcpu *v, const struct pirq *pirq,
+const uint8_t gvec)
+{
+struct irq_desc *desc;
+const struct msi_desc *msi_desc;
+int remap_index;
+int rc = 0;
+const struct pci_dev *pci_dev;
+const struct acpi_drhd_unit *drhd;
+struct iommu *iommu;
+struct ir_ctrl *ir_ctrl;
+struct iremap_entry *iremap_entries = NULL, *p = NULL;
+struct iremap_entry new_ire, old_ire;
+const struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc;
+__uint128_t ret;
+
+desc = pirq_spin_lock_irq_desc(pirq, NULL);
+if ( !desc )
+return -EINVAL;
+
+msi_desc = desc->msi_desc;
+if ( !msi_desc )
+{
+rc = -ENODEV;
+goto unlock_out;
+}
+
+pci_dev = msi_desc->dev;
+if ( !pci_dev )
+{
+rc = -ENODEV;
+goto unlock_out;
+}
+
+remap_index = msi_desc->remap_index;
+
+spin_unlock_irq(&desc->lock);
+
+ASSERT(spin_is_locked(&pcidevs_lock));
+
+/*
+ * FIXME: For performance reasons we should store the 'iommu' pointer in
+ * 'struct msi_desc' in some other place, so we don't need to waste
+ * time searching it here.
+ */
+drhd = acpi_find_matched_drhd_unit(pci_dev);
+if ( !drhd )
+return -ENODEV;
+
+iommu = drhd->iommu;
+ir_ctrl = iommu_ir_ctrl(iommu);
+if ( !ir_ctrl )
+return -ENODEV;
+
+spin_lock_irq(&ir_ctrl->iremap_lock);
+
+GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, remap_index, iremap_entries, p);
+
+old_ire = *p;
+
+/* Setup/Update interrupt remapping table entry. */
+setup_posted_irte(&new_ire, &old_ire, pi_desc, gvec);
+ret = cmpxchg16b(p, &old_ire, &new_ire);
+
+/*
+ * In the above, we use cmpxchg16 to atomically update the 1

[Xen-devel] [PATCH v8 01/17] VT-d Posted-intterrupt (PI) design

2015-10-12 Thread Feng Wu
Add the design doc for VT-d PI.

CC: Kevin Tian 
CC: Yang Zhang 
CC: Jan Beulich 
CC: Keir Fraser 
CC: Andrew Cooper 
CC: George Dunlap 
Signed-off-by: Feng Wu 
Reviewed-by: Kevin Tian 
Reviewed-by: Konrad Rzeszutek Wilk 
---
 docs/misc/vtd-pi.txt | 332 +++
 1 file changed, 332 insertions(+)
 create mode 100644 docs/misc/vtd-pi.txt

diff --git a/docs/misc/vtd-pi.txt b/docs/misc/vtd-pi.txt
new file mode 100644
index 000..af5409a
--- /dev/null
+++ b/docs/misc/vtd-pi.txt
@@ -0,0 +1,332 @@
+Authors: Feng Wu 
+
+VT-d Posted-interrupt (PI) design for XEN
+
+Background
+==
+With the development of virtualization, there are more and more device
+assignment requirements. However, today when a VM is running with
+assigned devices (such as, NIC), external interrupt handling for the assigned
+devices always needs VMM intervention.
+
+VT-d Posted-interrupt is a more enhanced method to handle interrupts
+in the virtualization environment. Interrupt posting is the process by
+which an interrupt request is recorded in a memory-resident
+posted-interrupt-descriptor structure by the root-complex, followed by
+an optional notification event issued to the CPU complex.
+
+With VT-d Posted-interrupt we can get the following advantages:
+- Direct delivery of external interrupts to running vCPUs without VMM
+intervention
+- Decrease the interrupt migration complexity. On vCPU migration, software
+can atomically co-migrate all interrupts targeting the migrating vCPU. For
+virtual machines with assigned devices, migrating a vCPU across pCPUs
+either incurs the overhead of forwarding interrupts in software (e.g. via VMM
+generated IPIs), or complexity to independently migrate each interrupt 
targeting
+the vCPU to the new pCPU. However, after enabling VT-d PI, the destination vCPU
+of an external interrupt from assigned devices is stored in the IRTE (i.e.
+Posted-interrupt Descriptor Address), when vCPU is migrated to another pCPU,
+we will set this new pCPU in the 'NDST' filed of Posted-interrupt descriptor, 
this
+make the interrupt migration automatic.
+
+Here is what Xen currently does for external interrupts from assigned devices:
+
+When a VM is running and an external interrupt from an assigned device occurs
+for it. VM-EXIT happens, then:
+
+vmx_do_extint() --> do_IRQ() --> __do_IRQ_guest() --> hvm_do_IRQ_dpci() -->
+raise_softirq_for(pirq_dpci) --> raise_softirq(HVM_DPCI_SOFTIRQ)
+
+softirq HVM_DPCI_SOFTIRQ is bound to dpci_softirq()
+
+dpci_softirq() --> hvm_dirq_assist() --> vmsi_deliver_pirq() --> 
vmsi_deliver() -->
+vmsi_inj_irq() --> vlapic_set_irq()
+
+vlapic_set_irq() does the following things:
+1. If CPU-side posted-interrupt is supported, call vmx_deliver_posted_intr() 
to deliver
+the virtual interrupt via posted-interrupt infrastructure.
+2. Else if CPU-side posted-interrupt is not supported, set the related vIRR in 
vLAPIC
+page and call vcpu_kick() to kick the related vCPU. Before VM-Entry, 
vmx_intr_assist()
+will help to inject the interrupt to guests.
+
+However, after VT-d PI is supported, when a guest is running in non-root and an
+external interrupt from an assigned device occurs for it. No VM-Exit is needed,
+the guest can handle this totally in non-root mode, thus avoiding all the above
+code flow.
+
+Posted-interrupt Introduction
+
+There are two components to the Posted-interrupt architecture:
+Processor Support and Root-Complex Support
+
+- Processor Support
+Posted-interrupt processing is a feature by which a processor processes
+the virtual interrupts by recording them as pending on the virtual-APIC
+page.
+
+Posted-interrupt processing is enabled by setting the process posted
+interrupts VM-execution control. The processing is performed in response
+to the arrival of an interrupt with the posted-interrupt notification vector.
+In response to such an interrupt, the processor processes virtual interrupts
+recorded in a data structure called a posted-interrupt descriptor.
+
+More information about APICv and CPU-side Posted-interrupt, please refer
+to Chapter 29, and Section 29.6 in the Intel SDM:
+http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf
+
+- Root-Complex Support
+Interrupt posting is the process by which an interrupt request (from IOAPIC
+or MSI/MSIx capable sources) is recorded in a memory-resident
+posted-interrupt-descriptor structure by the root-complex, followed by
+an optional notification event issued to the CPU complex. The interrupt
+request arriving at the root-complex carry the identity of the interrupt
+request source and a 'remapping-index'. The remapping-index is used to
+look-up an entry from the memory-resident interrupt-remap-table. Unlike
+interrupt-remapping, the interrupt-remap-table-entry for a posted-interrupt,
+specifies a virtual-vector and a pointer to th

[Xen-devel] [PATCH v8 17/17] Add a command line parameter for VT-d posted-interrupts

2015-10-12 Thread Feng Wu
Enable VT-d Posted-Interrupts and add a command line
parameter for it.

CC: Jan Beulich 
Signed-off-by: Feng Wu 
Reviewed-by: Kevin Tian 
Acked-by: Jan Beulich 
---
v6:
- Change the default value to 'false' in xen-command-line.markdown

 docs/misc/xen-command-line.markdown | 9 -
 xen/drivers/passthrough/iommu.c | 3 +++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/docs/misc/xen-command-line.markdown 
b/docs/misc/xen-command-line.markdown
index a2e427c..ecaf221 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -855,7 +855,7 @@ debug hypervisor only).
 > Default: `new` unless directed-EOI is supported
 
 ### iommu
-> `= List of [  | force | required | intremap | qinval | snoop | 
sharept | dom0-passthrough | dom0-strict | amd-iommu-perdev-intremap | 
workaround_bios_bug | igfx | verbose | debug ]`
+> `= List of [  | force | required | intremap | intpost | qinval | 
snoop | sharept | dom0-passthrough | dom0-strict | amd-iommu-perdev-intremap | 
workaround_bios_bug | igfx | verbose | debug ]`
 
 > Sub-options:
 
@@ -882,6 +882,13 @@ debug hypervisor only).
 >> Control the use of interrupt remapping (DMA remapping will always be enabled
 >> if IOMMU functionality is enabled).
 
+> `intpost`
+
+> Default: `false`
+
+>> Control the use of interrupt posting, which depends on the availability of
+>> interrupt remapping.
+
 > `qinval` (VT-d)
 
 > Default: `true`
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 36d5cc0..8d03076 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -38,6 +38,7 @@ static void iommu_dump_p2m_table(unsigned char key);
  *   no-snoop   Disable VT-d Snoop Control
  *   no-qinval  Disable VT-d Queued Invalidation
  *   no-intremapDisable VT-d Interrupt Remapping
+ *   no-intpost Disable VT-d Interrupt posting
  */
 custom_param("iommu", parse_iommu_param);
 bool_t __initdata iommu_enable = 1;
@@ -105,6 +106,8 @@ static void __init parse_iommu_param(char *s)
 iommu_qinval = val;
 else if ( !strcmp(s, "intremap") )
 iommu_intremap = val;
+else if ( !strcmp(s, "intpost") )
+iommu_intpost = val;
 else if ( !strcmp(s, "debug") )
 {
 iommu_debug = val;
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v8 15/17] vmx: VT-d posted-interrupt core logic handling

2015-10-12 Thread Feng Wu
This patch includes the following aspects:
- Handling logic when vCPU is blocked:
* Add a global vector to wake up the blocked vCPU
  when an interrupt is being posted to it (This part
  was sugguested by Yang Zhang ).
* Define two per-cpu variables:
  1. pi_blocked_vcpu:
A list storing the vCPUs which were blocked
on this pCPU.

  2. pi_blocked_vcpu_lock:
The spinlock to protect pi_blocked_vcpu.

- Add the following hooks, this part was suggested
  by George Dunlap  and
  Dario Faggioli .
* arch_vcpu_block()
  Called alled before vcpu is blocking and update the PID
  (posted-interrupt descriptor).

* arch_vcpu_block_cancel()
  Called when interrupts come in during blocking.

* vmx_pi_switch_from()
  Called before context switch, we update the PID when the
  vCPU is preempted or going to sleep.

* vmx_pi_switch_to()
  Called after context switch, we update the PID when the vCPU
  is going to run.

* arch_vcpu_wake_prepare()
  It will be called when waking up the vCPU, we update
  the posted interrupt descriptor when the vCPU is
  unblocked.

CC: Keir Fraser 
CC: Jan Beulich 
CC: Andrew Cooper 
CC: Kevin Tian 
CC: George Dunlap 
CC: Dario Faggioli 
Sugguested-by: Dario Faggioli 
Signed-off-by: Feng Wu 
---
v8:
- Remove the lazy context switch handling for PI state transition
- Change PI state in vcpu_block() and do_poll() when the vCPU
  is going to be blocked

v7:
- Merge [PATCH v6 16/18] vmx: Add some scheduler hooks for VT-d posted 
interrupts
  and "[PATCH v6 14/18] vmx: posted-interrupt handling when vCPU is blocked"
  into this patch, so it is self-contained and more convenient
  for code review.
- Make 'pi_blocked_vcpu' and 'pi_blocked_vcpu_lock' static
- Coding style
- Use per_cpu() instead of this_cpu() in pi_wakeup_interrupt()
- Move ack_APIC_irq() to the beginning of pi_wakeup_interrupt()
- Rename 'pi_ctxt_switch_from' to 'ctxt_switch_prepare'
- Rename 'pi_ctxt_switch_to' to 'ctxt_switch_cancel'
- Use 'has_hvm_container_vcpu' instead of 'is_hvm_vcpu'
- Use 'spin_lock' and 'spin_unlock' when the interrupt has been
  already disabled.
- Rename arch_vcpu_wake_prepare to vmx_vcpu_wake_prepare
- Define vmx_vcpu_wake_prepare in xen/arch/x86/hvm/hvm.c
- Call .pi_ctxt_switch_to() __context_switch() instead of directly
  calling vmx_post_ctx_switch_pi() in vmx_ctxt_switch_to()
- Make .pi_block_cpu unsigned int
- Use list_del() instead of list_del_init()
- Coding style

One remaining item in v7:
Jan has concern about calling vcpu_unblock() in vmx_pre_ctx_switch_pi(),
need Dario or George's input about this.

v6:
- Add two static inline functions for pi context switch
- Fix typos

v5:
- Rename arch_vcpu_wake to arch_vcpu_wake_prepare
- Make arch_vcpu_wake_prepare() inline for ARM
- Merge the ARM dummy hook with together
- Changes to some code comments
- Leave 'pi_ctxt_switch_from' and 'pi_ctxt_switch_to' NULL if
  PI is disabled or the vCPU is not in HVM
- Coding style

v4:
- Newly added

Changlog for "vmx: posted-interrupt handling when vCPU is blocked"
v6:
- Fix some typos
- Ack the interrupt right after the spin_unlock in pi_wakeup_interrupt()

v4:
- Use local variables in pi_wakeup_interrupt()
- Remove vcpu from the blocked list when pi_desc.on==1, this
- avoid kick vcpu multiple times.
- Remove tasklet

v3:
- This patch is generated by merging the following three patches in v2:
   [RFC v2 09/15] Add a new per-vCPU tasklet to wakeup the blocked vCPU
   [RFC v2 10/15] vmx: Define two per-cpu variables
   [RFC v2 11/15] vmx: Add a global wake-up vector for VT-d Posted-Interrupts
- rename 'vcpu_wakeup_tasklet' to 'pi_vcpu_wakeup_tasklet'
- Move the definition of 'pi_vcpu_wakeup_tasklet' to 'struct arch_vmx_struct'
- rename 'vcpu_wakeup_tasklet_handler' to 'pi_vcpu_wakeup_tasklet_handler'
- Make pi_wakeup_interrupt() static
- Rename 'blocked_vcpu_list' to 'pi_blocked_vcpu_list'
- move 'pi_blocked_vcpu_list' to 'struct arch_vmx_struct'
- Rename 'blocked_vcpu' to 'pi_blocked_vcpu'
- Rename 'blocked_vcpu_lock' to 'pi_blocked_vcpu_lock'

 xen/arch/x86/domain.c  |  12 ++
 xen/arch/x86/hvm/hvm.c |  18 +++
 xen/arch/x86/hvm/vmx/vmcs.c|   2 +
 xen/arch/x86/hvm/vmx/vmx.c | 265 +
 xen/common/schedule.c  |   9 ++
 xen/include/asm-arm/domain.h   |   4 +
 xen/include/asm-x86/domain.h   |   4 +
 xen/include/asm-x86/hvm/hvm.h  |   4 +
 xen/include/asm-x86/hvm/vmx/vmcs.h |  11 ++
 xen/include/asm-x86/hvm/vmx/vmx.h  |   4 +
 10 files changed, 333 insertions(+)

diff --git a/xen/arch/x86/domain.c b/x

[Xen-devel] [PATCH v8 05/17] vmx: Extend struct pi_desc to support VT-d Posted-Interrupts

2015-10-12 Thread Feng Wu
Extend struct pi_desc according to VT-d Posted-Interrupts Spec.

CC: Kevin Tian 
CC: Keir Fraser 
CC: Jan Beulich 
CC: Andrew Cooper 
Signed-off-by: Feng Wu 
Reviewed-by: Andrew Cooper 
Acked-by: Kevin Tian 
Reviewed-by: Konrad Rzeszutek Wilk 
---
v8:
- Coding style

v7:
- Coding style.

v3:
- Use u32 instead of u64 for the bitfield in 'struct pi_desc'

 xen/include/asm-x86/hvm/vmx/vmcs.h | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h 
b/xen/include/asm-x86/hvm/vmx/vmcs.h
index f1126d4..81c9e63 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -80,8 +80,18 @@ struct vmx_domain {
 
 struct pi_desc {
 DECLARE_BITMAP(pir, NR_VECTORS);
-u32 control;
-u32 rsvd[7];
+union {
+struct {
+u16 on : 1,  /* bit 256 - Outstanding Notification */
+sn : 1,  /* bit 257 - Suppress Notification */
+rsvd_1 : 14; /* bit 271:258 - Reserved */
+u8  nv;  /* bit 279:272 - Notification Vector */
+u8  rsvd_2;  /* bit 287:280 - Reserved */
+u32 ndst;/* bit 319:288 - Notification Destination */
+};
+u64 control;
+};
+u32 rsvd[6];
 } __attribute__ ((aligned (64)));
 
 #define ept_get_wl(ept)   ((ept)->ept_wl)
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v8 00/17] Add VT-d Posted-Interrupts support

2015-10-12 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

You can find the VT-d Posted-Interrtups Spec. in the following URL:
http://www.intel.com/content/www/us/en/intelligent-systems/intel-technology/vt-directed-io-spec.html

Feng Wu (17):
  VT-d Posted-intterrupt (PI) design
  Add cmpxchg16b support for x86-64
  iommu: Add iommu_intpost to control VT-d Posted-Interrupts feature
  vt-d: VT-d Posted-Interrupts feature detection
  vmx: Extend struct pi_desc to support VT-d Posted-Interrupts
  vmx: Add some helper functions for Posted-Interrupts
  vmx: Initialize VT-d Posted-Interrupts Descriptor
  vmx: Suppress posting interrupts when 'SN' is set
  VT-d: Remove pointless casts
  vt-d: Extend struct iremap_entry to support VT-d Posted-Interrupts
  vt-d: Add API to update IRTE when VT-d PI is used
  x86: move some APIC related macros to apicdef.h
  Update IRTE according to guest interrupt config changes
  vmx: Properly handle notification event when vCPU is running
  vmx: VT-d posted-interrupt core logic handling
  VT-d: Dump the posted format IRTE
  Add a command line parameter for VT-d posted-interrupts

 docs/misc/vtd-pi.txt   | 332 +++
 docs/misc/xen-command-line.markdown|   9 +-
 xen/arch/x86/domain.c  |  12 ++
 xen/arch/x86/hvm/hvm.c |  18 ++
 xen/arch/x86/hvm/vlapic.c  |   5 -
 xen/arch/x86/hvm/vmx/vmcs.c|  24 +++
 xen/arch/x86/hvm/vmx/vmx.c | 348 -
 xen/common/schedule.c  |   9 +
 xen/drivers/passthrough/io.c   | 123 +++-
 xen/drivers/passthrough/iommu.c|  16 +-
 xen/drivers/passthrough/vtd/intremap.c | 212 +++-
 xen/drivers/passthrough/vtd/iommu.c|  14 +-
 xen/drivers/passthrough/vtd/iommu.h|  51 +++--
 xen/drivers/passthrough/vtd/utils.c|  40 ++--
 xen/include/asm-arm/domain.h   |   4 +
 xen/include/asm-x86/apicdef.h  |   3 +
 xen/include/asm-x86/domain.h   |   4 +
 xen/include/asm-x86/hvm/hvm.h  |   4 +
 xen/include/asm-x86/hvm/vmx/vmcs.h |  25 ++-
 xen/include/asm-x86/hvm/vmx/vmx.h  |  27 +++
 xen/include/asm-x86/iommu.h|   2 +
 xen/include/asm-x86/x86_64/system.h|  33 
 xen/include/xen/iommu.h|   2 +-
 23 files changed, 1229 insertions(+), 88 deletions(-)
 create mode 100644 docs/misc/vtd-pi.txt

-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v8 16/17] VT-d: Dump the posted format IRTE

2015-10-12 Thread Feng Wu
Add the utility to dump the posted format IRTE.

CC: Yang Zhang 
CC: Kevin Tian 
Signed-off-by: Feng Wu 
---
v8:
- Coding style

v7:
- Remove the two stage loop

v6:
- Fix a typo

v4:
- Newly added

 xen/drivers/passthrough/vtd/utils.c | 28 +---
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/xen/drivers/passthrough/vtd/utils.c 
b/xen/drivers/passthrough/vtd/utils.c
index 6daa156..a1b3ebc 100644
--- a/xen/drivers/passthrough/vtd/utils.c
+++ b/xen/drivers/passthrough/vtd/utils.c
@@ -203,6 +203,9 @@ static void dump_iommu_info(unsigned char key)
 ecap_intr_remap(iommu->ecap) ? "" : "not ",
 (status & DMA_GSTS_IRES) ? " and enabled" : "" );
 
+printk("  Interrupt Posting: %ssupported.\n",
+   cap_intr_post(iommu->cap) ? "" : "not ");
+
 if ( status & DMA_GSTS_IRES )
 {
 /* Dump interrupt remapping table. */
@@ -213,8 +216,9 @@ static void dump_iommu_info(unsigned char key)
 
 printk("  Interrupt remapping table (nr_entry=%#x. "
 "Only dump P=1 entries here):\n", nr_entry);
-printk("   SVT  SQ   SID  DST  V  AVL DLM TM RH DM "
-   "FPD P\n");
+printk("R means remapped format, P means posted format.\n");
+printk("R:   SVT  SQ   SID  V  AVL FPD  DST DLM TM RH DM 
P\n");
+printk("P:   SVT  SQ   SID  V  AVL FPD  PDA  URG 
P\n");
 for ( i = 0; i < nr_entry; i++ )
 {
 struct iremap_entry *p;
@@ -232,11 +236,21 @@ static void dump_iommu_info(unsigned char key)
 
 if ( !p->remap.p )
 continue;
-printk("  %04x:  %x   %x  %04x %08x %02x%x   %x  %x  %x  
%x"
-"   %x %x\n", i,
-p->remap.svt, p->remap.sq, p->remap.sid, p->remap.dst,
-p->remap.vector, p->remap.avail, p->remap.dlm, p->remap.tm,
-p->remap.rh, p->remap.dm, p->remap.fpd, p->remap.p);
+if ( !p->remap.im )
+printk("R:  %04x:  %x   %x  %04x %02x%x   %x %08x   %x 
 %x  %x  %x %x\n",
+   i,
+   p->remap.svt, p->remap.sq, p->remap.sid,
+   p->remap.vector, p->remap.avail, p->remap.fpd,
+   p->remap.dst, p->remap.dlm, p->remap.tm, 
p->remap.rh,
+   p->remap.dm, p->remap.p);
+else
+printk("P:  %04x:  %x   %x  %04x %02x%x   %x %16lx
%x %x\n",
+   i,
+   p->post.svt, p->post.sq, p->post.sid, 
p->post.vector,
+   p->post.avail, p->post.fpd,
+   ((u64)p->post.pda_h << 32) | (p->post.pda_l << 6),
+   p->post.urg, p->post.p);
+
 print_cnt++;
 }
 if ( iremap_entries )
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v8 10/17] vt-d: Extend struct iremap_entry to support VT-d Posted-Interrupts

2015-10-12 Thread Feng Wu
Extend struct iremap_entry according to VT-d Posted-Interrupts Spec.

CC: Yang Zhang 
CC: Kevin Tian 
Signed-off-by: Feng Wu 
Acked-by: Kevin Tian 
---
v8:
- Make use of the __uint128_t member in struct iremap_entry when needed

v7:
- Add a __uint128_t member to the union in struct iremap_entry

v4:
- res_4 is not a bitfiled, correct it.
- Expose 'im' to remapped irte as well.

v3:
- Use u32 instead of u64 to define the bitfields in 'struct iremap_entry'
- Limit using bitfield if possible

 xen/drivers/passthrough/vtd/intremap.c | 92 +-
 xen/drivers/passthrough/vtd/iommu.h| 44 ++--
 xen/drivers/passthrough/vtd/utils.c|  8 +--
 3 files changed, 81 insertions(+), 63 deletions(-)

diff --git a/xen/drivers/passthrough/vtd/intremap.c 
b/xen/drivers/passthrough/vtd/intremap.c
index 987bbe9..8f135e1 100644
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -122,9 +122,9 @@ static u16 hpetid_to_bdf(unsigned int hpet_id)
 static void set_ire_sid(struct iremap_entry *ire,
 unsigned int svt, unsigned int sq, unsigned int sid)
 {
-ire->hi.svt = svt;
-ire->hi.sq = sq;
-ire->hi.sid = sid;
+ire->remap.svt = svt;
+ire->remap.sq = sq;
+ire->remap.sid = sid;
 }
 
 static void set_ioapic_source_id(int apic_id, struct iremap_entry *ire)
@@ -219,7 +219,7 @@ static unsigned int alloc_remap_entry(struct iommu *iommu, 
unsigned int nr)
 else
 p = &iremap_entries[i % (1 << IREMAP_ENTRY_ORDER)];
 
-if ( p->lo_val || p->hi_val ) /* not a free entry */
+if ( p->val ) /* not a free entry */
 found = 0;
 else if ( ++found == nr )
 break;
@@ -253,7 +253,7 @@ static int remap_entry_to_ioapic_rte(
 GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
  iremap_entries, iremap_entry);
 
-if ( iremap_entry->hi_val == 0 && iremap_entry->lo_val == 0 )
+if ( iremap_entry->val == 0 )
 {
 dprintk(XENLOG_ERR VTDPREFIX,
 "%s: index (%d) get an empty entry!\n",
@@ -263,13 +263,13 @@ static int remap_entry_to_ioapic_rte(
 return -EFAULT;
 }
 
-old_rte->vector = iremap_entry->lo.vector;
-old_rte->delivery_mode = iremap_entry->lo.dlm;
-old_rte->dest_mode = iremap_entry->lo.dm;
-old_rte->trigger = iremap_entry->lo.tm;
+old_rte->vector = iremap_entry->remap.vector;
+old_rte->delivery_mode = iremap_entry->remap.dlm;
+old_rte->dest_mode = iremap_entry->remap.dm;
+old_rte->trigger = iremap_entry->remap.tm;
 old_rte->__reserved_2 = 0;
 old_rte->dest.logical.__reserved_1 = 0;
-old_rte->dest.logical.logical_dest = iremap_entry->lo.dst >> 8;
+old_rte->dest.logical.logical_dest = iremap_entry->remap.dst >> 8;
 
 unmap_vtd_domain_page(iremap_entries);
 spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
@@ -317,27 +317,28 @@ static int ioapic_rte_to_remap_entry(struct iommu *iommu,
 if ( rte_upper )
 {
 if ( x2apic_enabled )
-new_ire.lo.dst = value;
+new_ire.remap.dst = value;
 else
-new_ire.lo.dst = (value >> 24) << 8;
+new_ire.remap.dst = (value >> 24) << 8;
 }
 else
 {
 *(((u32 *)&new_rte) + 0) = value;
-new_ire.lo.fpd = 0;
-new_ire.lo.dm = new_rte.dest_mode;
-new_ire.lo.tm = new_rte.trigger;
-new_ire.lo.dlm = new_rte.delivery_mode;
+new_ire.remap.fpd = 0;
+new_ire.remap.dm = new_rte.dest_mode;
+new_ire.remap.tm = new_rte.trigger;
+new_ire.remap.dlm = new_rte.delivery_mode;
 /* Hardware require RH = 1 for LPR delivery mode */
-new_ire.lo.rh = (new_ire.lo.dlm == dest_LowestPrio);
-new_ire.lo.avail = 0;
-new_ire.lo.res_1 = 0;
-new_ire.lo.vector = new_rte.vector;
-new_ire.lo.res_2 = 0;
+new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
+new_ire.remap.avail = 0;
+new_ire.remap.res_1 = 0;
+new_ire.remap.vector = new_rte.vector;
+new_ire.remap.res_2 = 0;
 
 set_ioapic_source_id(IO_APIC_ID(apic), &new_ire);
-new_ire.hi.res_1 = 0;
-new_ire.lo.p = 1; /* finally, set present bit */
+new_ire.remap.res_3 = 0;
+new_ire.remap.res_4 = 0;
+new_ire.remap.p = 1; /* finally, set present bit */
 
 /* now construct new ioapic rte entry */
 remap_rte->vector = new_rte.vector;
@@ -510,7 +511,7 @@ static int remap_entry_to_msi_msg(
 GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
  iremap_entries, iremap_entry);
 
-if ( iremap_entry->hi_val == 0 && iremap_entry->lo_val == 0 )
+if ( 

[Xen-devel] [PATCH v5 02/17] Add cmpxchg16b support for x86-64

2015-08-11 Thread Feng Wu
This patch adds cmpxchg16b support for x86-64, so software
can perform 128-bit atomic write/read.

CC: Keir Fraser 
CC: Jan Beulich 
CC: Andrew Cooper 
Signed-off-by: Feng Wu 
---
v5:
- Change back the parameters of __cmpxchg16b() to __uint128_t *
- Remove pointless cast for 'ptr'
- Remove pointless parentheses
- Use A constraint for the output

v4:
- Use pointer as the parameter of __cmpxchg16b().
- Use gcc's __uint128_t built-in type
- Make the parameters of __cmpxchg16b() void *

v3:
- Newly added.

 xen/include/asm-x86/x86_64/system.h | 28 
 1 file changed, 28 insertions(+)

diff --git a/xen/include/asm-x86/x86_64/system.h 
b/xen/include/asm-x86/x86_64/system.h
index 662813a..ebbe4b5 100644
--- a/xen/include/asm-x86/x86_64/system.h
+++ b/xen/include/asm-x86/x86_64/system.h
@@ -6,6 +6,34 @@
(unsigned long)(n),sizeof(*(ptr
 
 /*
+ * Atomic 16 bytes compare and exchange.  Compare OLD with MEM, if
+ * identical, store NEW in MEM.  Return the initial value in MEM.
+ * Success is indicated by comparing RETURN with OLD.
+ *
+ * This function can only be called when cpu_has_cx16 is ture.
+ */
+
+static always_inline __uint128_t __cmpxchg16b(
+volatile void *ptr, __uint128_t *old, __uint128_t *new)
+{
+__uint128_t prev;
+uint64_t new_high = *new >> 64;
+uint64_t new_low = (uint64_t)*new;
+
+ASSERT(cpu_has_cx16);
+
+asm volatile ( "lock; cmpxchg16b %3"
+   : "=A" (prev)
+   : "c" (new_high), "b" (new_low), "m" (*__xg(ptr)), "0" 
(*old)
+   : "memory" );
+
+return prev;
+}
+
+#define cmpxchg16b(ptr,o,n) \
+__cmpxchg16b((ptr), (__uint128_t *)(o), (__uint128_t *)(n))
+
+/*
  * This function causes value _o to be changed to _n at location _p.
  * If this access causes a fault then we return 1, otherwise we return 0.
  * If no fault occurs then _o is updated to the value we saw at _p. If this
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v5 00/17] Add VT-d Posted-Interrupts support

2015-08-11 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

You can find the VT-d Posted-Interrtups Spec. in the following URL:
http://www.intel.com/content/www/us/en/intelligent-systems/intel-technology/vt-directed-io-spec.html

Feng Wu (17):
  VT-d Posted-intterrupt (PI) design
  Add cmpxchg16b support for x86-64
  iommu: Add iommu_intpost to control VT-d Posted-Interrupts feature
  vt-d: VT-d Posted-Interrupts feature detection
  vmx: Extend struct pi_desc to support VT-d Posted-Interrupts
  vmx: Add some helper functions for Posted-Interrupts
  vmx: Initialize VT-d Posted-Interrupts Descriptor
  vmx: Suppress posting interrupts when 'SN' is set
  VT-d: Remove pointless casts
  vt-d: Extend struct iremap_entry to support VT-d Posted-Interrupts
  vt-d: Add API to update IRTE when VT-d PI is used
  Update IRTE according to guest interrupt config changes
  vmx: posted-interrupt handling when vCPU is blocked
  vmx: Properly handle notification event when vCPU is running
  vmx: Add some scheduler hooks for VT-d posted interrupts
  VT-d: Dump the posted format IRTE
  Add a command line parameter for VT-d posted-interrupts

 docs/misc/vtd-pi.txt   | 333 +
 docs/misc/xen-command-line.markdown|   9 +-
 xen/arch/x86/domain.c  |  11 ++
 xen/arch/x86/hvm/vmx/vmcs.c|  21 +++
 xen/arch/x86/hvm/vmx/vmx.c | 286 +++-
 xen/common/schedule.c  |   2 +
 xen/drivers/passthrough/io.c   | 124 +++-
 xen/drivers/passthrough/iommu.c|  16 +-
 xen/drivers/passthrough/vtd/intremap.c | 204 +++-
 xen/drivers/passthrough/vtd/iommu.c|  17 +-
 xen/drivers/passthrough/vtd/iommu.h|  46 +++--
 xen/drivers/passthrough/vtd/utils.c|  57 +-
 xen/include/asm-arm/domain.h   |   2 +
 xen/include/asm-x86/domain.h   |   3 +
 xen/include/asm-x86/hvm/hvm.h  |   2 +
 xen/include/asm-x86/hvm/vmx/vmcs.h |  26 ++-
 xen/include/asm-x86/hvm/vmx/vmx.h  |  28 +++
 xen/include/asm-x86/iommu.h|   2 +
 xen/include/asm-x86/x86_64/system.h|  28 +++
 xen/include/xen/iommu.h|   2 +-
 20 files changed, 1140 insertions(+), 79 deletions(-)
 create mode 100644 docs/misc/vtd-pi.txt

-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v5 08/17] vmx: Suppress posting interrupts when 'SN' is set

2015-08-11 Thread Feng Wu
Currently, we don't support urgent interrupt, all interrupts
are recognized as non-urgent interrupt, so we cannot send
posted-interrupt when 'SN' is set.

CC: Kevin Tian 
CC: Keir Fraser 
CC: Jan Beulich 
CC: Andrew Cooper 
Signed-off-by: Feng Wu 
---
v5:
- keep the vcpu_kick() at the end of vmx_deliver_posted_intr()
- Keep the 'return' after calling __vmx_deliver_posted_interrupt()

v4:
- Coding style.
- V3 removes a vcpu_kick() from the eoi_exitmap_changed path
  incorrectly, fix it.

v3:
- use cmpxchg to test SN/ON and set ON

 xen/arch/x86/hvm/vmx/vmx.c | 29 -
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index c32d863..d2a4cfb 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1701,8 +1701,35 @@ static void vmx_deliver_posted_intr(struct vcpu *v, u8 
vector)
  */
 pi_set_on(&v->arch.hvm_vmx.pi_desc);
 }
-else if ( !pi_test_and_set_on(&v->arch.hvm_vmx.pi_desc) )
+else
 {
+struct pi_desc old, new, prev;
+
+prev.control = 0;
+
+do {
+/*
+ * Currently, we don't support urgent interrupt, all
+ * interrupts are recognized as non-urgent interrupt,
+ * so we cannot send posted-interrupt when 'SN' is set.
+ * Besides that, if 'ON' is already set, we cannot set
+ * posted-interrupts as well.
+ */
+if ( pi_test_sn(&prev) || pi_test_on(&prev) )
+{
+vcpu_kick(v);
+return;
+}
+
+old.control = v->arch.hvm_vmx.pi_desc.control &
+  ~( 1 << POSTED_INTR_ON | 1 << POSTED_INTR_SN );
+new.control = v->arch.hvm_vmx.pi_desc.control |
+  1 << POSTED_INTR_ON;
+
+prev.control = cmpxchg(&v->arch.hvm_vmx.pi_desc.control,
+   old.control, new.control);
+} while ( prev.control != old.control );
+
 __vmx_deliver_posted_interrupt(v);
 return;
 }
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v5 05/17] vmx: Extend struct pi_desc to support VT-d Posted-Interrupts

2015-08-11 Thread Feng Wu
Extend struct pi_desc according to VT-d Posted-Interrupts Spec.

CC: Kevin Tian 
CC: Keir Fraser 
CC: Jan Beulich 
CC: Andrew Cooper 
Signed-off-by: Feng Wu 
Reviewed-by: Andrew Cooper 
Acked-by: Kevin Tian 
---
v3:
- Use u32 instead of u64 for the bitfield in 'struct pi_desc'

 xen/include/asm-x86/hvm/vmx/vmcs.h | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h 
b/xen/include/asm-x86/hvm/vmx/vmcs.h
index f1126d4..7e81752 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -80,8 +80,19 @@ struct vmx_domain {
 
 struct pi_desc {
 DECLARE_BITMAP(pir, NR_VECTORS);
-u32 control;
-u32 rsvd[7];
+union {
+struct
+{
+u16 on : 1,  /* bit 256 - Outstanding Notification */
+sn : 1,  /* bit 257 - Suppress Notification */
+rsvd_1 : 14; /* bit 271:258 - Reserved */
+u8  nv;  /* bit 279:272 - Notification Vector */
+u8  rsvd_2;  /* bit 287:280 - Reserved */
+u32 ndst;/* bit 319:288 - Notification Destination */
+};
+u64 control;
+};
+u32 rsvd[6];
 } __attribute__ ((aligned (64)));
 
 #define ept_get_wl(ept)   ((ept)->ept_wl)
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v5 01/17] VT-d Posted-intterrupt (PI) design

2015-08-11 Thread Feng Wu
Add the design doc for VT-d PI.

CC: Kevin Tian 
CC: Yang Zhang 
CC: Jan Beulich 
CC: Keir Fraser 
CC: Andrew Cooper 
CC: George Dunlap 
Signed-off-by: Feng Wu 
Reviewed-by: Kevin Tian 
---
 docs/misc/vtd-pi.txt | 333 +++
 1 file changed, 333 insertions(+)
 create mode 100644 docs/misc/vtd-pi.txt

diff --git a/docs/misc/vtd-pi.txt b/docs/misc/vtd-pi.txt
new file mode 100644
index 000..98a77ba
--- /dev/null
+++ b/docs/misc/vtd-pi.txt
@@ -0,0 +1,333 @@
+Authors: Feng Wu 
+
+VT-d Posted-interrupt (PI) design for XEN
+
+Background
+==
+With the development of virtualization, there are more and more device
+assignment requirements. However, today when a VM is running with
+assigned devices (such as, NIC), external interrupt handling for the assigned
+devices always needs VMM intervention.
+
+VT-d Posted-interrupt is a more enhanced method to handle interrupts
+in the virtualization environment. Interrupt posting is the process by
+which an interrupt request is recorded in a memory-resident
+posted-interrupt-descriptor structure by the root-complex, followed by
+an optional notification event issued to the CPU complex.
+
+With VT-d Posted-interrupt we can get the following advantages:
+- Direct delivery of external interrupts to running vCPUs without VMM
+intervention
+- Decrease the interrupt migration complexity. On vCPU migration, software
+can atomically co-migrate all interrupts targeting the migrating vCPU. For
+virtual machines with assigned devices, migrating a vCPU across pCPUs
+either incur the overhead of forwarding interrupts in software (e.g. via VMM
+generated IPIS), or complexity to independently migrate each interrupt 
targeting
+the vCPU to the new pCPU. However, after enabling VT-d PI, the destination vCPU
+of an external interrupt from assigned devices is stored in the IRTE (i.e.
+Posted-interrupt Descriptor Address), when vCPU is migrated to another pCPU,
+we will set this new pCPU in the 'NDST' filed of Posted-interrupt descriptor, 
this
+make the interrupt migration automatic.
+
+Here is what Xen currently does for external interrupts from assigned devices:
+
+When a VM is running and an external interrupt from an assigned device occurs
+for it. VM-EXIT happens, then:
+
+vmx_do_extint() --> do_IRQ() --> __do_IRQ_guest() --> hvm_do_IRQ_dpci() -->
+raise_softirq_for(pirq_dpci) --> raise_softirq(HVM_DPCI_SOFTIRQ)
+
+softirq HVM_DPCI_SOFTIRQ is bound to dpci_softirq()
+
+dpci_softirq() --> hvm_dirq_assist() --> vmsi_deliver_pirq() --> 
vmsi_deliver() -->
+vmsi_inj_irq() --> vlapic_set_irq()
+
+vlapic_set_irq() does the following things:
+1. If CPU-side posted-interrupt is supported, call vmx_deliver_posted_intr() 
to deliver
+the virtual interrupt via posted-interrupt infrastructure.
+2. Else if CPU-side posted-interrupt is not supported, set the related vIRR in 
vLAPIC
+page and call vcpu_kick() to kick the related vCPU. Before VM-Entry, 
vmx_intr_assist()
+will help to inject the interrupt to guests.
+
+However, after VT-d PI is supported, when a guest is running in non-root and an
+external interrupt from an assigned device occurs for it. No VM-Exit is needed,
+the guest can handle this totally in non-root mode, thus avoiding all the above
+code flow.
+
+Posted-interrupt Introduction
+
+There are two components to the Posted-interrupt architecture:
+Processor Support and Root-Complex Support
+
+- Processor Support
+Posted-interrupt processing is a feature by which a processor processes
+the virtual interrupts by recording them as pending on the virtual-APIC
+page.
+
+Posted-interrupt processing is enabled by setting the process posted
+interrupts VM-execution control. The processing is performed in response
+to the arrival of an interrupt with the posted-interrupt notification vector.
+In response to such an interrupt, the processor processes virtual interrupts
+recorded in a data structure called a posted-interrupt descriptor.
+
+More information about APICv and CPU-side Posted-interrupt, please refer
+to Chapter 29, and Section 29.6 in the Intel SDM:
+http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf
+
+- Root-Complex Support
+Interrupt posting is the process by which an interrupt request (from IOAPIC
+or MSI/MSIx capable sources) is recorded in a memory-resident
+posted-interrupt-descriptor structure by the root-complex, followed by
+an optional notification event issued to the CPU complex. The interrupt
+request arriving at the root-complex carry the identity of the interrupt
+request source and a 'remapping-index'. The remapping-index is used to
+look-up an entry from the memory-resident interrupt-remap-table. Unlike
+with interrupt-remapping, the interrupt-remap-table-entry for a posted-
+interrupt, specifies a virtual-vector and a pointer to the posted-interrupt
+descript

[Xen-devel] [PATCH v5 03/17] iommu: Add iommu_intpost to control VT-d Posted-Interrupts feature

2015-08-11 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

This patch adds variable 'iommu_intpost' to control whether enable VT-d
posted-interrupt or not in the generic IOMMU code.

CC: Jan Beulich 
CC: Kevin Tian 
Signed-off-by: Feng Wu 
Reviewed-by: Kevin Tian 
---
v5:
- Remove the "if no intremap then no intpost" logic in parse_iommu_param(), 
which
  can be covered in iommu_setup()

v3:
- Remove pointless initializer for 'iommu_intpost'.
- Some adjustment for "if no intremap then no intpost" logic.
* For parse_iommu_param(), move it to the end of the function,
  so we don't need to add the some logic when introduing the
  new kernel parameter 'intpost' in later patch.
* Add this logic in iommu_setup() after iommu_hardware_setup()
  is called.

 xen/drivers/passthrough/iommu.c | 13 -
 xen/include/xen/iommu.h |  2 +-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 11d58cc..8eb77f7 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -50,6 +50,14 @@ bool_t __read_mostly iommu_passthrough;
 bool_t __read_mostly iommu_snoop = 1;
 bool_t __read_mostly iommu_qinval = 1;
 bool_t __read_mostly iommu_intremap = 1;
+
+/*
+ * In the current implementation of VT-d posted interrupts, in some extreme
+ * cases, the per cpu list which saves the blocked vCPU will be very long,
+ * and this will affect the interrupt latency, so let this feature off by
+ * default until we find a good solution to resolve it.
+ */
+bool_t __read_mostly iommu_intpost;
 bool_t __read_mostly iommu_hap_pt_share = 1;
 bool_t __read_mostly iommu_debug;
 bool_t __read_mostly amd_iommu_perdev_intremap = 1;
@@ -304,6 +312,9 @@ int __init iommu_setup(void)
 panic("Couldn't enable %s and iommu=required/force",
   !iommu_enabled ? "IOMMU" : "Interrupt Remapping");
 
+if ( !iommu_intremap )
+iommu_intpost = 0;
+
 if ( !iommu_enabled )
 {
 iommu_snoop = 0;
@@ -371,7 +382,7 @@ void iommu_crash_shutdown(void)
 const struct iommu_ops *ops = iommu_get_ops();
 if ( iommu_enabled )
 ops->crash_shutdown();
-iommu_enabled = iommu_intremap = 0;
+iommu_enabled = iommu_intremap = iommu_intpost = 0;
 }
 
 int iommu_get_reserved_device_memory(iommu_grdm_t *func, void *ctxt)
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index 705969b..da326a1 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -30,7 +30,7 @@
 extern bool_t iommu_enable, iommu_enabled;
 extern bool_t force_iommu, iommu_verbose;
 extern bool_t iommu_workaround_bios_bug, iommu_passthrough;
-extern bool_t iommu_snoop, iommu_qinval, iommu_intremap;
+extern bool_t iommu_snoop, iommu_qinval, iommu_intremap, iommu_intpost;
 extern bool_t iommu_hap_pt_share;
 extern bool_t iommu_debug;
 extern bool_t amd_iommu_perdev_intremap;
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v5 06/17] vmx: Add some helper functions for Posted-Interrupts

2015-08-11 Thread Feng Wu
This patch adds some helper functions to manipulate the
Posted-Interrupts Descriptor.

CC: Kevin Tian 
CC: Keir Fraser 
CC: Jan Beulich 
CC: Andrew Cooper 
Signed-off-by: Feng Wu 
---
v4:
- Newly added

 xen/include/asm-x86/hvm/vmx/vmx.h | 21 +
 1 file changed, 21 insertions(+)

diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h 
b/xen/include/asm-x86/hvm/vmx/vmx.h
index 3fbfa44..acd4aec 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -101,6 +101,7 @@ void vmx_update_cpu_exec_control(struct vcpu *v);
 void vmx_update_secondary_exec_control(struct vcpu *v);
 
 #define POSTED_INTR_ON  0
+#define POSTED_INTR_SN  1
 static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
 {
 return test_and_set_bit(vector, pi_desc->pir);
@@ -121,11 +122,31 @@ static inline int pi_test_and_clear_on(struct pi_desc 
*pi_desc)
 return test_and_clear_bit(POSTED_INTR_ON, &pi_desc->control);
 }
 
+static inline int pi_test_on(struct pi_desc *pi_desc)
+{
+return test_bit(POSTED_INTR_ON, &pi_desc->control);
+}
+
 static inline unsigned long pi_get_pir(struct pi_desc *pi_desc, int group)
 {
 return xchg(&pi_desc->pir[group], 0);
 }
 
+static inline int pi_test_sn(struct pi_desc *pi_desc)
+{
+return test_bit(POSTED_INTR_SN, &pi_desc->control);
+}
+
+static inline void pi_set_sn(struct pi_desc *pi_desc)
+{
+set_bit(POSTED_INTR_SN, &pi_desc->control);
+}
+
+static inline void pi_clear_sn(struct pi_desc *pi_desc)
+{
+clear_bit(POSTED_INTR_SN, &pi_desc->control);
+}
+
 /*
  * Exit Reasons
  */
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v5 07/17] vmx: Initialize VT-d Posted-Interrupts Descriptor

2015-08-11 Thread Feng Wu
This patch initializes the VT-d Posted-interrupt Descriptor.

CC: Kevin Tian 
CC: Keir Fraser 
CC: Jan Beulich 
CC: Andrew Cooper 
Signed-off-by: Feng Wu 
Acked-by: Kevin Tian 
---
v3:
- Move pi_desc_init() to xen/arch/x86/hvm/vmx/vmcs.c
- Remove the 'inline' flag of pi_desc_init()

 xen/arch/x86/hvm/vmx/vmcs.c   | 18 ++
 xen/include/asm-x86/hvm/vmx/vmx.h |  2 ++
 2 files changed, 20 insertions(+)

diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index a0a97e7..28c553f 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -39,6 +39,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static bool_t __read_mostly opt_vpid_enabled = 1;
 boolean_param("vpid", opt_vpid_enabled);
@@ -951,6 +952,20 @@ void virtual_vmcs_vmwrite(void *vvmcs, u32 vmcs_encoding, 
u64 val)
 virtual_vmcs_exit(vvmcs);
 }
 
+static void pi_desc_init(struct vcpu *v)
+{
+uint32_t dest;
+
+v->arch.hvm_vmx.pi_desc.nv = posted_intr_vector;
+
+dest = cpu_physical_id(v->processor);
+
+if ( x2apic_enabled )
+v->arch.hvm_vmx.pi_desc.ndst = dest;
+else
+v->arch.hvm_vmx.pi_desc.ndst = MASK_INSR(dest, PI_xAPIC_NDST_MASK);
+}
+
 static int construct_vmcs(struct vcpu *v)
 {
 struct domain *d = v->domain;
@@ -1089,6 +1104,9 @@ static int construct_vmcs(struct vcpu *v)
 
 if ( cpu_has_vmx_posted_intr_processing )
 {
+if ( iommu_intpost )
+pi_desc_init(v);
+
 __vmwrite(PI_DESC_ADDR, virt_to_maddr(&v->arch.hvm_vmx.pi_desc));
 __vmwrite(POSTED_INTR_NOTIFICATION_VECTOR, posted_intr_vector);
 }
diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h 
b/xen/include/asm-x86/hvm/vmx/vmx.h
index acd4aec..03c529c 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -88,6 +88,8 @@ typedef enum {
 #define EPT_EMT_WB  6
 #define EPT_EMT_RSV27
 
+#define PI_xAPIC_NDST_MASK  0xFF00
+
 void vmx_asm_vmexit_handler(struct cpu_user_regs);
 void vmx_asm_do_vmentry(void);
 void vmx_intr_assist(void);
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v5 09/17] VT-d: Remove pointless casts

2015-08-11 Thread Feng Wu
Remove pointless casts.

Signed-off-by: Feng Wu 
---
v5:
- Newly added.

 xen/drivers/passthrough/vtd/utils.c | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/xen/drivers/passthrough/vtd/utils.c 
b/xen/drivers/passthrough/vtd/utils.c
index 44c4ef5..162b764 100644
--- a/xen/drivers/passthrough/vtd/utils.c
+++ b/xen/drivers/passthrough/vtd/utils.c
@@ -234,10 +234,9 @@ static void dump_iommu_info(unsigned char key)
 continue;
 printk("  %04x:  %x   %x  %04x %08x %02x%x   %x  %x  %x  
%x"
 "   %x %x\n", i,
-(u32)p->hi.svt, (u32)p->hi.sq, (u32)p->hi.sid,
-(u32)p->lo.dst, (u32)p->lo.vector, (u32)p->lo.avail,
-(u32)p->lo.dlm, (u32)p->lo.tm, (u32)p->lo.rh,
-(u32)p->lo.dm, (u32)p->lo.fpd, (u32)p->lo.p);
+p->hi.svt, p->hi.sq, p->hi.sid, p->lo.dst, p->lo.vector,
+p->lo.avail, p->lo.dlm, p->lo.tm, p->lo.rh, p->lo.dm,
+p->lo.fpd, p->lo.p);
 print_cnt++;
 }
 if ( iremap_entries )
@@ -281,11 +280,10 @@ static void dump_iommu_info(unsigned char key)
 
 printk("   %02x:  %04x   %x%x   %x   %x   %x%x"
 "%x %02x\n", i,
-(u32)remap->index_0_14 | ((u32)remap->index_15 << 15),
-(u32)remap->format, (u32)remap->mask, (u32)remap->trigger,
-(u32)remap->irr, (u32)remap->polarity,
-(u32)remap->delivery_status, (u32)remap->delivery_mode,
-(u32)remap->vector);
+remap->index_0_14 | ((u32)remap->index_15 << 15),
+remap->format, remap->mask, remap->trigger, remap->irr,
+remap->polarity, remap->delivery_status, 
remap->delivery_mode,
+remap->vector);
 }
 }
 }
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v5 04/17] vt-d: VT-d Posted-Interrupts feature detection

2015-08-11 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

CC: Yang Zhang 
CC: Kevin Tian 
Signed-off-by: Feng Wu 
---
v5:
- Remove blank line

v4:
- Correct a logic error when setting iommu_intpost to 0

v3:
- Remove the "if no intremap then no intpost" logic in
  intel_vtd_setup(), it is covered in the iommu_setup().
- Add "if no intremap then no intpost" logic in the end
  of init_vtd_hw() which is called by vtd_resume().

So the logic exists in the following three places:
- parse_iommu_param()
- iommu_setup()
- init_vtd_hw()

 xen/drivers/passthrough/vtd/iommu.c | 17 +++--
 xen/drivers/passthrough/vtd/iommu.h |  1 +
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/xen/drivers/passthrough/vtd/iommu.c 
b/xen/drivers/passthrough/vtd/iommu.c
index 1dffc40..52c7cc9 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -2079,6 +2079,9 @@ static int init_vtd_hw(void)
 disable_intremap(drhd->iommu);
 }
 
+if ( !iommu_intremap )
+iommu_intpost = 0;
+
 /*
  * Set root entries for each VT-d engine.  After set root entry,
  * must globally invalidate context cache, and then globally
@@ -2147,8 +2150,8 @@ int __init intel_vtd_setup(void)
 }
 
 /* We enable the following features only if they are supported by all VT-d
- * engines: Snoop Control, DMA passthrough, Queued Invalidation and
- * Interrupt Remapping.
+ * engines: Snoop Control, DMA passthrough, Queued Invalidation, Interrupt
+ * Remapping, and Posted Interrupt
  */
 for_each_drhd_unit ( drhd )
 {
@@ -2176,6 +2179,14 @@ int __init intel_vtd_setup(void)
 if ( iommu_intremap && !ecap_intr_remap(iommu->ecap) )
 iommu_intremap = 0;
 
+/*
+ * We cannot use posted interrupt if X86_FEATURE_CX16 is
+ * not supported, since we count on this feature to
+ * atomically update 16-byte IRTE in posted format.
+ */
+if ( !iommu_intremap || !cap_intr_post(iommu->cap) || !cpu_has_cx16 )
+iommu_intpost = 0;
+
 if ( !vtd_ept_page_compatible(iommu) )
 iommu_hap_pt_share = 0;
 
@@ -2201,6 +2212,7 @@ int __init intel_vtd_setup(void)
 P(iommu_passthrough, "Dom0 DMA Passthrough");
 P(iommu_qinval, "Queued Invalidation");
 P(iommu_intremap, "Interrupt Remapping");
+P(iommu_intpost, "Posted Interrupt");
 P(iommu_hap_pt_share, "Shared EPT tables");
 #undef P
 
@@ -2220,6 +2232,7 @@ int __init intel_vtd_setup(void)
 iommu_passthrough = 0;
 iommu_qinval = 0;
 iommu_intremap = 0;
+iommu_intpost = 0;
 return ret;
 }
 
diff --git a/xen/drivers/passthrough/vtd/iommu.h 
b/xen/drivers/passthrough/vtd/iommu.h
index ac71ed1..22abefe 100644
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -61,6 +61,7 @@
 /*
  * Decoding Capability Register
  */
+#define cap_intr_post(c)   (((c) >> 59) & 1)
 #define cap_read_drain(c)  (((c) >> 55) & 1)
 #define cap_write_drain(c) (((c) >> 54) & 1)
 #define cap_max_amask_val(c)   (((c) >> 48) & 0x3f)
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v5 14/17] vmx: Properly handle notification event when vCPU is running

2015-08-11 Thread Feng Wu
When a vCPU is running in Root mode and a notification event
has been injected to it. we need to set VCPU_KICK_SOFTIRQ for
the current cpu, so the pending interrupt in PIRR will be
synced to vIRR before VM-Exit in time.

CC: Kevin Tian 
CC: Keir Fraser 
CC: Jan Beulich 
CC: Andrew Cooper 
Signed-off-by: Feng Wu 
Acked-by: Kevin Tian 
---
v4:
- Coding style.

v3:
- Make pi_notification_interrupt() static

 xen/arch/x86/hvm/vmx/vmx.c | 47 +-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index e80d888..c8a4371 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2033,6 +2033,51 @@ static void pi_wakeup_interrupt(struct cpu_user_regs 
*regs)
 this_cpu(irq_count)++;
 }
 
+/* Handle VT-d posted-interrupt when VCPU is running. */
+static void pi_notification_interrupt(struct cpu_user_regs *regs)
+{
+/*
+ * We get here when a vCPU is running in root-mode (such as via hypercall,
+ * or any other reasons which can result in VM-Exit), and before vCPU is
+ * back to non-root, external interrupts from an assigned device happen
+ * and a notification event is delivered to this logical CPU.
+ *
+ * we need to set VCPU_KICK_SOFTIRQ for the current cpu, just like
+ * __vmx_deliver_posted_interrupt(). So the pending interrupt in PIRR will
+ * be synced to vIRR before VM-Exit in time.
+ *
+ * Please refer to the following code fragments from
+ * xen/arch/x86/hvm/vmx/entry.S:
+ *
+ * .Lvmx_do_vmentry
+ *
+ *  ..
+ *  point 1
+ *
+ *  cmp  %ecx,(%rdx,%rax,1)
+ *  jnz  .Lvmx_process_softirqs
+ *
+ *  ..
+ *
+ *  je   .Lvmx_launch
+ *
+ *  ..
+ *
+ * .Lvmx_process_softirqs:
+ *  sti
+ *  call do_softirq
+ *  jmp  .Lvmx_do_vmentry
+ *
+ * If VT-d engine issues a notification event at point 1 above, it cannot
+ * be delivered to the guest during this VM-entry without raising the
+ * softirq in this notification handler.
+ */
+raise_softirq(VCPU_KICK_SOFTIRQ);
+
+ack_APIC_irq();
+this_cpu(irq_count)++;
+}
+
 const struct hvm_function_table * __init start_vmx(void)
 {
 set_in_cr4(X86_CR4_VMXE);
@@ -2071,7 +2116,7 @@ const struct hvm_function_table * __init start_vmx(void)
 
 if ( cpu_has_vmx_posted_intr_processing )
 {
-alloc_direct_apic_vector(&posted_intr_vector, event_check_interrupt);
+alloc_direct_apic_vector(&posted_intr_vector, 
pi_notification_interrupt);
 
 if ( iommu_intpost )
 alloc_direct_apic_vector(&pi_wakeup_vector, pi_wakeup_interrupt);
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v5 16/17] VT-d: Dump the posted format IRTE

2015-08-11 Thread Feng Wu
Add the utility to dump the posted format IRTE.

CC: Yang Zhang 
CC: Kevin Tian 
Signed-off-by: Feng Wu 
---
v4:
- Newly added

 xen/drivers/passthrough/vtd/utils.c | 41 -
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/xen/drivers/passthrough/vtd/utils.c 
b/xen/drivers/passthrough/vtd/utils.c
index 9d556da..1848385 100644
--- a/xen/drivers/passthrough/vtd/utils.c
+++ b/xen/drivers/passthrough/vtd/utils.c
@@ -203,6 +203,9 @@ static void dump_iommu_info(unsigned char key)
 ecap_intr_remap(iommu->ecap) ? "" : "not ",
 (status & DMA_GSTS_IRES) ? " and enabled" : "" );
 
+printk("  Interrupt Posting: %ssupported.\n",
+cap_intr_post(iommu->ecap) ? "" : "not ");
+
 if ( status & DMA_GSTS_IRES )
 {
 /* Dump interrupt remapping table. */
@@ -213,6 +216,7 @@ static void dump_iommu_info(unsigned char key)
 
 printk("  Interrupt remapping table (nr_entry=%#x. "
 "Only dump P=1 entries here):\n", nr_entry);
+printk ("Entries for remapped format:\n");
 printk("   SVT  SQ   SID  DST  V  AVL DLM TM RH DM "
"FPD P\n");
 for ( i = 0; i < nr_entry; i++ )
@@ -230,7 +234,7 @@ static void dump_iommu_info(unsigned char key)
 else
 p = &iremap_entries[i % (1 << IREMAP_ENTRY_ORDER)];
 
-if ( !p->remap.p )
+if ( !p->remap.p || p->remap.im )
 continue;
 printk("  %04x:  %x   %x  %04x %08x %02x%x   %x  %x  %x  
%x"
 "   %x %x\n", i,
@@ -239,6 +243,41 @@ static void dump_iommu_info(unsigned char key)
 p->remap.rh, p->remap.dm, p->remap.fpd, p->remap.p);
 print_cnt++;
 }
+
+if ( iremap_entries )
+unmap_vtd_domain_page(iremap_entries);
+
+iremap_entries = NULL;
+printk ("\nEntries for posted format:\n");
+printk("   SVT  SQ   SID  PDA  V  URG AVL FPD 
P\n");
+for ( i = 0; i < nr_entry; i++ )
+{
+struct iremap_entry *p;
+if ( i % (1 << IREMAP_ENTRY_ORDER) == 0 )
+{
+/* This entry across page boundry */
+if ( iremap_entries )
+unmap_vtd_domain_page(iremap_entries);
+
+GET_IREMAP_ENTRY(iremap_maddr, i,
+ iremap_entries, p);
+}
+else
+p = &iremap_entries[i % (1 << IREMAP_ENTRY_ORDER)];
+
+if ( !p->post.p || !p->post.im )
+continue;
+
+printk("  %04x:  %x   %x  %04x %16lx %02x%x   %x  %x  
%x\n",
+i,
+p->post.svt, p->post.sq, p->post.sid,
+((u64)p->post.pda_h << 32) | (p->post.pda_l << 6),
+p->post.vector, p->post.urg, p->post.avail, p->post.fpd,
+p->post.p);
+
+print_cnt++;
+}
+
 if ( iremap_entries )
 unmap_vtd_domain_page(iremap_entries);
 if ( iommu_ir_ctrl(iommu)->iremap_num != print_cnt )
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v5 12/17] Update IRTE according to guest interrupt config changes

2015-08-11 Thread Feng Wu
When guest changes its interrupt configuration (such as, vector, etc.)
for direct-assigned devices, we need to update the associated IRTE
with the new guest vector, so external interrupts from the assigned
devices can be injected to guests without VM-Exit.

For lowest-priority interrupts, we use vector-hashing mechamisn to find
the destination vCPU. This follows the hardware behavior, since modern
Intel CPUs use vector hashing to handle the lowest-priority interrupt.

For multicast/broadcast vCPU, we cannot handle it via interrupt posting,
still use interrupt remapping.

CC: Jan Beulich 
Signed-off-by: Feng Wu 
---
v5:
- Make 'struct vcpu *vcpu' const

v4:
- Make some 'int' variables 'unsigned int' in pi_find_dest_vcpu()
- Make 'dest_id' uint32_t
- Rename 'size' to 'bitmap_array_size'
- find_next_bit() and find_first_bit() always return unsigned int,
  so no need to check whether the return value is less than 0.
- Message error level XENLOG_G_WARNING -> XENLOG_G_INFO
- Remove useless warning message
- Create a seperate function vector_hashing_dest() to find the
- destination of lowest-priority interrupts.
- Change some comments

v3:
- Use bitmap to store the all the possible destination vCPUs of an
  interrupt, then trying to find the right destination from the bitmap
- Typo and some small changes

 xen/drivers/passthrough/io.c | 124 ++-
 1 file changed, 123 insertions(+), 1 deletion(-)

diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index bda9374..f62f86c 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static DEFINE_PER_CPU(struct list_head, dpci_list);
 
@@ -198,6 +199,108 @@ void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci)
 xfree(dpci);
 }
 
+/*
+ * This routine handles lowest-priority interrupts using vector-hashing
+ * mechanism. As an example, modern Intel CPUs use this method to handle
+ * lowest-priority interrupts.
+ *
+ * Here is the details about the vector-hashing mechanism:
+ * 1. For lowest-priority interrupts, store all the possible destination
+ *vCPUs in an array.
+ * 2. Use "gvec % max number of destination vCPUs" to find the right
+ *destination vCPU in the array for the lowest-priority interrupt.
+ */
+static struct vcpu *vector_hashing_dest(const struct domain *d,
+uint32_t dest_id,
+bool_t dest_mode,
+uint8_t gvec)
+
+{
+unsigned long *dest_vcpu_bitmap;
+unsigned int dest_vcpu_num = 0, idx;
+unsigned int bitmap_array_size = BITS_TO_LONGS(d->max_vcpus);
+struct vcpu *v, *dest = NULL;
+unsigned int i;
+
+dest_vcpu_bitmap = xzalloc_array(unsigned long, bitmap_array_size);
+if ( !dest_vcpu_bitmap )
+{
+dprintk(XENLOG_G_INFO,
+"dom%d: failed to allocate memory\n", d->domain_id);
+return NULL;
+}
+
+for_each_vcpu ( d, v )
+{
+if ( !vlapic_match_dest(vcpu_vlapic(v), NULL, 0,
+dest_id, dest_mode) )
+continue;
+
+__set_bit(v->vcpu_id, dest_vcpu_bitmap);
+dest_vcpu_num++;
+}
+
+if ( dest_vcpu_num != 0 )
+{
+idx = 0;
+
+for ( i = gvec % dest_vcpu_num; i >= 0; i--)
+{
+idx = find_next_bit(dest_vcpu_bitmap, d->max_vcpus, idx) + 1;
+BUG_ON(idx >= d->max_vcpus);
+}
+idx--;
+
+dest = d->vcpu[idx];
+}
+
+xfree(dest_vcpu_bitmap);
+
+return dest;
+}
+
+/*
+ * The purpose of this routine is to find the right destination vCPU for
+ * an interrupt which will be delivered by VT-d posted-interrupt. There
+ * are several cases as below:
+ *
+ * - For lowest-priority interrupts, use vector-hashing mechanism to find
+ *   the destination.
+ * - Otherwise, for single destination interrupt, it is straightforward to
+ *   find the destination vCPU and return true.
+ * - For multicast/broadcast vCPU, we cannot handle it via interrupt posting,
+ *   so return NULL.
+ */
+static struct vcpu *pi_find_dest_vcpu(const struct domain *d, uint32_t dest_id,
+  bool_t dest_mode, uint8_t delivery_mode,
+  uint8_t gvec)
+{
+unsigned int dest_vcpu_num = 0;
+struct vcpu *v, *dest = NULL;
+
+if ( delivery_mode == dest_LowestPrio )
+return vector_hashing_dest(d, dest_id, dest_mode, gvec);
+
+for_each_vcpu ( d, v )
+{
+if ( !vlapic_match_dest(vcpu_vlapic(v), NULL, 0,
+dest_id, dest_mode) )
+continue;
+
+dest_vcpu_num++;
+dest = v;
+}
+
+/*
+ * For fixed destination, we only handle single-destination
+ * interrupts.
+ 

[Xen-devel] [PATCH v5 11/17] vt-d: Add API to update IRTE when VT-d PI is used

2015-08-11 Thread Feng Wu
This patch adds an API which is used to update the IRTE
for posted-interrupt when guest changes MSI/MSI-X information.

CC: Yang Zhang 
CC: Kevin Tian 
CC: Keir Fraser 
CC: Jan Beulich 
CC: Andrew Cooper 
Signed-off-by: Feng Wu 
Acked-by: Kevin Tian 
---
v5:
- Make some function parameters const
- Call "spin_unlock_irq(&desc->lock);" a little eariler
- Add "ASSERT(spin_is_locked(&pcidevs_lock))"
- -EBADSLT -> -ENODEV, EBADSLT is removed in the lasted Xen

v4:
- Don't inline setup_posted_irte()
- const struct pi_desc *pi_desc for setup_posted_irte()
- return -EINVAL when pirq_spin_lock_irq_desc() fails.
- Make some variables const
- Release irq desc lock earlier in pi_update_irte()
- Remove the pointless do-while() loop when doing cmpxchg16b()

v3:
- Remove "adding PDA_MASK()" when updating 'pda_l' and 'pda_h' for IRTE.
- Change the return type of pi_update_irte() to int.
- Remove some pointless printk message in pi_update_irte().
- Use structure assignment instead of memcpy() for irte copy.

 xen/drivers/passthrough/vtd/intremap.c | 112 +
 xen/drivers/passthrough/vtd/iommu.h|   2 +
 xen/include/asm-x86/iommu.h|   2 +
 3 files changed, 116 insertions(+)

diff --git a/xen/drivers/passthrough/vtd/intremap.c 
b/xen/drivers/passthrough/vtd/intremap.c
index e9fffa6..8ec85d3 100644
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -899,3 +899,115 @@ void iommu_disable_x2apic_IR(void)
 for_each_drhd_unit ( drhd )
 disable_qinval(drhd->iommu);
 }
+
+static void setup_posted_irte(
+struct iremap_entry *new_ire,
+const struct pi_desc *pi_desc,
+const uint8_t gvec)
+{
+new_ire->post.urg = 0;
+new_ire->post.vector = gvec;
+new_ire->post.pda_l = virt_to_maddr(pi_desc) >> (32 - PDA_LOW_BIT);
+new_ire->post.pda_h = virt_to_maddr(pi_desc) >> 32;
+
+new_ire->post.res_1 = 0;
+new_ire->post.res_2 = 0;
+new_ire->post.res_3 = 0;
+new_ire->post.res_4 = 0;
+new_ire->post.res_5 = 0;
+
+new_ire->post.im = 1;
+}
+
+/*
+ * This function is used to update the IRTE for posted-interrupt
+ * when guest changes MSI/MSI-X information.
+ */
+int pi_update_irte(
+const struct vcpu *v,
+const struct pirq *pirq,
+const uint8_t gvec)
+{
+struct irq_desc *desc;
+const struct msi_desc *msi_desc;
+int remap_index;
+int rc = 0;
+const struct pci_dev *pci_dev;
+const struct acpi_drhd_unit *drhd;
+struct iommu *iommu;
+struct ir_ctrl *ir_ctrl;
+struct iremap_entry *iremap_entries = NULL, *p = NULL;
+struct iremap_entry new_ire, old_ire;
+const struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc;
+__uint128_t ret;
+
+desc = pirq_spin_lock_irq_desc(pirq, NULL);
+if ( !desc )
+return -EINVAL;
+
+msi_desc = desc->msi_desc;
+if ( !msi_desc )
+{
+rc = -ENODEV;
+goto unlock_out;
+}
+
+pci_dev = msi_desc->dev;
+if ( !pci_dev )
+{
+rc = -ENODEV;
+goto unlock_out;
+}
+
+remap_index = msi_desc->remap_index;
+
+spin_unlock_irq(&desc->lock);
+
+ASSERT(spin_is_locked(&pcidevs_lock));
+
+/*
+ * For performance concern, we will store the 'iommu' pointer in
+ * 'struct msi_desc' in some other place, so we don't need to waste
+ * time searching it here. I will fix this later.
+ */
+drhd = acpi_find_matched_drhd_unit(pci_dev);
+if ( !drhd )
+{
+rc = -ENODEV;
+goto unlock_out;
+}
+
+iommu = drhd->iommu;
+ir_ctrl = iommu_ir_ctrl(iommu);
+if ( !ir_ctrl )
+{
+rc = -ENODEV;
+goto unlock_out;
+}
+
+spin_lock_irq(&ir_ctrl->iremap_lock);
+
+GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, remap_index, iremap_entries, p);
+
+old_ire = new_ire = *p;
+
+/* Setup/Update interrupt remapping table entry. */
+setup_posted_irte(&new_ire, pi_desc, gvec);
+ret = cmpxchg16b(p, &old_ire, &new_ire);
+
+ASSERT(ret == *(__uint128_t *)&old_ire);
+
+iommu_flush_cache_entry(p, sizeof(*p));
+iommu_flush_iec_index(iommu, 0, remap_index);
+
+unmap_vtd_domain_page(iremap_entries);
+
+spin_unlock_irq(&ir_ctrl->iremap_lock);
+
+return 0;
+
+ unlock_out:
+spin_unlock_irq(&desc->lock);
+
+return rc;
+}
diff --git a/xen/drivers/passthrough/vtd/iommu.h 
b/xen/drivers/passthrough/vtd/iommu.h
index 6fca430..ff4ceb6 100644
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -322,6 +322,8 @@ struct iremap_entry {
   };
 };
 
+#define PDA_LOW_BIT26
+
 /* Max intr remapping table page order is 8, as max number of IRTEs is 64K */
 #define IREMAP_PAGE_ORDER  8
 
diff --git a/xen/include/asm-x86/iommu.h b/xen/include/asm-x86/iommu.h
i

[Xen-devel] [PATCH v5 10/17] vt-d: Extend struct iremap_entry to support VT-d Posted-Interrupts

2015-08-11 Thread Feng Wu
Extend struct iremap_entry according to VT-d Posted-Interrupts Spec.

CC: Yang Zhang 
CC: Kevin Tian 
Signed-off-by: Feng Wu 
Acked-by: Kevin Tian 
---
v4:
- res_4 is not a bitfiled, correct it.
- Expose 'im' to remapped irte as well.

v3:
- Use u32 instead of u64 to define the bitfields in 'struct iremap_entry'
- Limit using bitfield if possible

 xen/drivers/passthrough/vtd/intremap.c | 92 +-
 xen/drivers/passthrough/vtd/iommu.h| 43 ++--
 xen/drivers/passthrough/vtd/utils.c|  8 +--
 3 files changed, 80 insertions(+), 63 deletions(-)

diff --git a/xen/drivers/passthrough/vtd/intremap.c 
b/xen/drivers/passthrough/vtd/intremap.c
index 987bbe9..e9fffa6 100644
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -122,9 +122,9 @@ static u16 hpetid_to_bdf(unsigned int hpet_id)
 static void set_ire_sid(struct iremap_entry *ire,
 unsigned int svt, unsigned int sq, unsigned int sid)
 {
-ire->hi.svt = svt;
-ire->hi.sq = sq;
-ire->hi.sid = sid;
+ire->remap.svt = svt;
+ire->remap.sq = sq;
+ire->remap.sid = sid;
 }
 
 static void set_ioapic_source_id(int apic_id, struct iremap_entry *ire)
@@ -219,7 +219,7 @@ static unsigned int alloc_remap_entry(struct iommu *iommu, 
unsigned int nr)
 else
 p = &iremap_entries[i % (1 << IREMAP_ENTRY_ORDER)];
 
-if ( p->lo_val || p->hi_val ) /* not a free entry */
+if ( p->lo || p->hi ) /* not a free entry */
 found = 0;
 else if ( ++found == nr )
 break;
@@ -253,7 +253,7 @@ static int remap_entry_to_ioapic_rte(
 GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
  iremap_entries, iremap_entry);
 
-if ( iremap_entry->hi_val == 0 && iremap_entry->lo_val == 0 )
+if ( iremap_entry->hi == 0 && iremap_entry->lo == 0 )
 {
 dprintk(XENLOG_ERR VTDPREFIX,
 "%s: index (%d) get an empty entry!\n",
@@ -263,13 +263,13 @@ static int remap_entry_to_ioapic_rte(
 return -EFAULT;
 }
 
-old_rte->vector = iremap_entry->lo.vector;
-old_rte->delivery_mode = iremap_entry->lo.dlm;
-old_rte->dest_mode = iremap_entry->lo.dm;
-old_rte->trigger = iremap_entry->lo.tm;
+old_rte->vector = iremap_entry->remap.vector;
+old_rte->delivery_mode = iremap_entry->remap.dlm;
+old_rte->dest_mode = iremap_entry->remap.dm;
+old_rte->trigger = iremap_entry->remap.tm;
 old_rte->__reserved_2 = 0;
 old_rte->dest.logical.__reserved_1 = 0;
-old_rte->dest.logical.logical_dest = iremap_entry->lo.dst >> 8;
+old_rte->dest.logical.logical_dest = iremap_entry->remap.dst >> 8;
 
 unmap_vtd_domain_page(iremap_entries);
 spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
@@ -317,27 +317,28 @@ static int ioapic_rte_to_remap_entry(struct iommu *iommu,
 if ( rte_upper )
 {
 if ( x2apic_enabled )
-new_ire.lo.dst = value;
+new_ire.remap.dst = value;
 else
-new_ire.lo.dst = (value >> 24) << 8;
+new_ire.remap.dst = (value >> 24) << 8;
 }
 else
 {
 *(((u32 *)&new_rte) + 0) = value;
-new_ire.lo.fpd = 0;
-new_ire.lo.dm = new_rte.dest_mode;
-new_ire.lo.tm = new_rte.trigger;
-new_ire.lo.dlm = new_rte.delivery_mode;
+new_ire.remap.fpd = 0;
+new_ire.remap.dm = new_rte.dest_mode;
+new_ire.remap.tm = new_rte.trigger;
+new_ire.remap.dlm = new_rte.delivery_mode;
 /* Hardware require RH = 1 for LPR delivery mode */
-new_ire.lo.rh = (new_ire.lo.dlm == dest_LowestPrio);
-new_ire.lo.avail = 0;
-new_ire.lo.res_1 = 0;
-new_ire.lo.vector = new_rte.vector;
-new_ire.lo.res_2 = 0;
+new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
+new_ire.remap.avail = 0;
+new_ire.remap.res_1 = 0;
+new_ire.remap.vector = new_rte.vector;
+new_ire.remap.res_2 = 0;
 
 set_ioapic_source_id(IO_APIC_ID(apic), &new_ire);
-new_ire.hi.res_1 = 0;
-new_ire.lo.p = 1; /* finally, set present bit */
+new_ire.remap.res_3 = 0;
+new_ire.remap.res_4 = 0;
+new_ire.remap.p = 1; /* finally, set present bit */
 
 /* now construct new ioapic rte entry */
 remap_rte->vector = new_rte.vector;
@@ -510,7 +511,7 @@ static int remap_entry_to_msi_msg(
 GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
  iremap_entries, iremap_entry);
 
-if ( iremap_entry->hi_val == 0 && iremap_entry->lo_val == 0 )
+if ( iremap_entry->hi == 0 && iremap_entry->lo == 0 )
 {
 dprintk(XENLOG_ERR 

[Xen-devel] [PATCH v5 13/17] vmx: posted-interrupt handling when vCPU is blocked

2015-08-11 Thread Feng Wu
This patch includes the following aspects:
- Add a global vector to wake up the blocked vCPU
  when an interrupt is being posted to it (This
  part was sugguested by Yang Zhang ).
- Adds a new per-vCPU tasklet to wakeup the blocked
  vCPU. It can be used in the case vcpu_unblock
  cannot be called directly.
- Define two per-cpu variables:
  * pi_blocked_vcpu:
  A list storing the vCPUs which were blocked on this pCPU.

  * pi_blocked_vcpu_lock:
  The spinlock to protect pi_blocked_vcpu.

CC: Kevin Tian 
CC: Keir Fraser 
CC: Jan Beulich 
CC: Andrew Cooper 
Signed-off-by: Feng Wu 
---
v4:
- Use local variables in pi_wakeup_interrupt()
- Remove vcpu from the blocked list when pi_desc.on==1, this
- avoid kick vcpu multiple times.
- Remove tasklet

v3:
- This patch is generated by merging the following three patches in v2:
   [RFC v2 09/15] Add a new per-vCPU tasklet to wakeup the blocked vCPU
   [RFC v2 10/15] vmx: Define two per-cpu variables
   [RFC v2 11/15] vmx: Add a global wake-up vector for VT-d Posted-Interrupts
- rename 'vcpu_wakeup_tasklet' to 'pi_vcpu_wakeup_tasklet'
- Move the definition of 'pi_vcpu_wakeup_tasklet' to 'struct arch_vmx_struct'
- rename 'vcpu_wakeup_tasklet_handler' to 'pi_vcpu_wakeup_tasklet_handler'
- Make pi_wakeup_interrupt() static
- Rename 'blocked_vcpu_list' to 'pi_blocked_vcpu_list'
- move 'pi_blocked_vcpu_list' to 'struct arch_vmx_struct'
- Rename 'blocked_vcpu' to 'pi_blocked_vcpu'
- Rename 'blocked_vcpu_lock' to 'pi_blocked_vcpu_lock'

 xen/arch/x86/hvm/vmx/vmcs.c|  3 ++
 xen/arch/x86/hvm/vmx/vmx.c | 63 ++
 xen/include/asm-x86/hvm/vmx/vmcs.h |  3 ++
 xen/include/asm-x86/hvm/vmx/vmx.h  |  5 +++
 4 files changed, 74 insertions(+)

diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 28c553f..2dabf16 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -661,6 +661,9 @@ int vmx_cpu_up(void)
 if ( cpu_has_vmx_vpid )
 vpid_sync_all();
 
+INIT_LIST_HEAD(&per_cpu(pi_blocked_vcpu, cpu));
+spin_lock_init(&per_cpu(pi_blocked_vcpu_lock, cpu));
+
 return 0;
 }
 
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index d2a4cfb..e80d888 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -83,7 +83,15 @@ static int vmx_msr_write_intercept(unsigned int msr, 
uint64_t msr_content);
 static void vmx_invlpg_intercept(unsigned long vaddr);
 static int vmx_vmfunc_intercept(struct cpu_user_regs *regs);
 
+/*
+ * We maintian a per-CPU linked-list of vCPU, so in PI wakeup handler we
+ * can find which vCPU should be waken up.
+ */
+DEFINE_PER_CPU(struct list_head, pi_blocked_vcpu);
+DEFINE_PER_CPU(spinlock_t, pi_blocked_vcpu_lock);
+
 uint8_t __read_mostly posted_intr_vector;
+uint8_t __read_mostly pi_wakeup_vector;
 
 static int vmx_domain_initialise(struct domain *d)
 {
@@ -106,6 +114,9 @@ static int vmx_vcpu_initialise(struct vcpu *v)
 
 spin_lock_init(&v->arch.hvm_vmx.vmcs_lock);
 
+INIT_LIST_HEAD(&v->arch.hvm_vmx.pi_blocked_vcpu_list);
+INIT_LIST_HEAD(&v->arch.hvm_vmx.pi_vcpu_on_set_list);
+
 v->arch.schedule_tail= vmx_do_resume;
 v->arch.ctxt_switch_from = vmx_ctxt_switch_from;
 v->arch.ctxt_switch_to   = vmx_ctxt_switch_to;
@@ -1975,6 +1986,53 @@ static struct hvm_function_table __initdata 
vmx_function_table = {
 .altp2m_vcpu_emulate_vmfunc = vmx_vcpu_emulate_vmfunc,
 };
 
+/*
+ * Handle VT-d posted-interrupt when VCPU is blocked.
+ */
+static void pi_wakeup_interrupt(struct cpu_user_regs *regs)
+{
+struct arch_vmx_struct *vmx, *tmp;
+struct vcpu *v;
+spinlock_t *lock = &this_cpu(pi_blocked_vcpu_lock);
+struct list_head *blocked_vcpus = &this_cpu(pi_blocked_vcpu);
+LIST_HEAD(list);
+
+spin_lock(lock);
+
+/*
+ * XXX: The length of the list depends on how many vCPU is current
+ * blocked on this specific pCPU. This may hurt the interrupt latency
+ * if the list grows to too many entries.
+ */
+list_for_each_entry_safe(vmx, tmp, blocked_vcpus, pi_blocked_vcpu_list)
+{
+if ( pi_test_on(&vmx->pi_desc) )
+{
+list_del_init(&vmx->pi_blocked_vcpu_list);
+
+/*
+ * We cannot call vcpu_unblock here, since it also needs
+ * 'pi_blocked_vcpu_lock', we store the vCPUs with ON
+ * set in another list and unblock them after we release
+ * 'pi_blocked_vcpu_lock'.
+ */
+list_add_tail(&vmx->pi_vcpu_on_set_list, &list);
+}
+}
+
+spin_unlock(lock);
+
+list_for_each_entry_safe(vmx, tmp, &list, pi_vcpu_on_set_list)
+{
+v = container_of(vmx, struct vcpu, arch.hvm_vmx);
+  

[Xen-devel] [PATCH v5 15/17] vmx: Add some scheduler hooks for VT-d posted interrupts

2015-08-11 Thread Feng Wu
This patch adds the following arch hooks in scheduler:
- vmx_pre_ctx_switch_pi():
It is called before context switch, we update the posted
interrupt descriptor when the vCPU is preempted, go to sleep,
or is blocked.

- vmx_post_ctx_switch_pi()
It is called after context switch, we update the posted
interrupt descriptor when the vCPU is going to run.

- arch_vcpu_wake_prepare()
It will be called when waking up the vCPU, we update
the posted interrupt descriptor when the vCPU is unblocked.

CC: Keir Fraser 
CC: Jan Beulich 
CC: Andrew Cooper 
CC: Kevin Tian 
CC: George Dunlap 
CC: Dario Faggioli 
Sugguested-by: Dario Faggioli 
Signed-off-by: Feng Wu 
---
v5:
- Rename arch_vcpu_wake to arch_vcpu_wake_prepare
- Make arch_vcpu_wake_prepare() inline for ARM
- Merge the ARM dummy hook with together
- Changes to some code comments
- Leave 'pi_ctxt_switch_from' and 'pi_ctxt_switch_to' NULL if
  PI is disabled or the vCPU is not in HVM
- Coding style

v4:
- Newly added

 xen/arch/x86/domain.c  |  11 +++
 xen/arch/x86/hvm/vmx/vmx.c | 147 +
 xen/common/schedule.c  |   2 +
 xen/include/asm-arm/domain.h   |   2 +
 xen/include/asm-x86/domain.h   |   3 +
 xen/include/asm-x86/hvm/hvm.h  |   2 +
 xen/include/asm-x86/hvm/vmx/vmcs.h |   8 ++
 7 files changed, 175 insertions(+)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 045f6ff..130f859 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1605,9 +1605,20 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
 
 set_current(next);
 
+/*
+ * When switching from non-idle to idle, we only do a lazy context switch.
+ * However, in order for posted interrupt (if available and enabled) to
+ * work properly, we at least need to update the descriptors.
+ */
+if ( prev->arch.pi_ctxt_switch_from && !is_idle_vcpu(prev) )
+prev->arch.pi_ctxt_switch_from(prev);
+
 if ( (per_cpu(curr_vcpu, cpu) == next) ||
  (is_idle_domain(nextd) && cpu_online(cpu)) )
 {
+if ( next->arch.pi_ctxt_switch_to && !is_idle_vcpu(next) )
+next->arch.pi_ctxt_switch_to(next);
+
 local_irq_enable();
 }
 else
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index c8a4371..758809a 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -67,6 +67,8 @@ enum handler_return { HNDL_done, HNDL_unhandled, 
HNDL_exception_raised };
 
 static void vmx_ctxt_switch_from(struct vcpu *v);
 static void vmx_ctxt_switch_to(struct vcpu *v);
+static void vmx_pre_ctx_switch_pi(struct vcpu *v);
+static void vmx_post_ctx_switch_pi(struct vcpu *v);
 
 static int  vmx_alloc_vlapic_mapping(struct domain *d);
 static void vmx_free_vlapic_mapping(struct domain *d);
@@ -117,10 +119,20 @@ static int vmx_vcpu_initialise(struct vcpu *v)
 INIT_LIST_HEAD(&v->arch.hvm_vmx.pi_blocked_vcpu_list);
 INIT_LIST_HEAD(&v->arch.hvm_vmx.pi_vcpu_on_set_list);
 
+v->arch.hvm_vmx.pi_block_cpu = -1;
+
+spin_lock_init(&v->arch.hvm_vmx.pi_lock);
+
 v->arch.schedule_tail= vmx_do_resume;
 v->arch.ctxt_switch_from = vmx_ctxt_switch_from;
 v->arch.ctxt_switch_to   = vmx_ctxt_switch_to;
 
+if ( iommu_intpost && is_hvm_vcpu(v) )
+{
+v->arch.pi_ctxt_switch_from = vmx_pre_ctx_switch_pi;
+v->arch.pi_ctxt_switch_to = vmx_post_ctx_switch_pi;
+}
+
 if ( (rc = vmx_create_vmcs(v)) != 0 )
 {
 dprintk(XENLOG_WARNING,
@@ -718,6 +730,140 @@ static void vmx_fpu_leave(struct vcpu *v)
 }
 }
 
+void arch_vcpu_wake_prepare(struct vcpu *v)
+{
+unsigned long gflags;
+
+if ( !iommu_intpost || !is_hvm_vcpu(v) || !has_arch_pdevs(v->domain) )
+return;
+
+spin_lock_irqsave(&v->arch.hvm_vmx.pi_lock, gflags);
+
+if ( likely(vcpu_runnable(v)) ||
+ !test_bit(_VPF_blocked, &v->pause_flags) )
+{
+struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc;
+unsigned long flags;
+
+/*
+ * We don't need to send notification event to a non-running
+ * vcpu, the interrupt information will be delivered to it before
+ * VM-ENTRY when the vcpu is scheduled to run next time.
+ */
+pi_set_sn(pi_desc);
+
+/*
+ * Set 'NV' feild back to posted_intr_vector, so the
+ * Posted-Interrupts can be delivered to the vCPU by
+ * VT-d HW after it is scheduled to run.
+ */
+write_atomic((uint8_t*)&pi_desc->nv, posted_intr_vector);
+
+/*
+ * Delete the vCPU from the related block list
+ * if we are resuming from blocked state
+ */
+if ( v->arch.hvm_vmx.pi_block_cpu != -1 )
+{
+spin_lock_irqsave(&per_cpu(pi_blocked_vcpu_lock,
+

[Xen-devel] [PATCH v5 17/17] Add a command line parameter for VT-d posted-interrupts

2015-08-11 Thread Feng Wu
Enable VT-d Posted-Interrupts and add a command line
parameter for it.

Signed-off-by: Feng Wu 
Reviewed-by: Kevin Tian 
---
 docs/misc/xen-command-line.markdown | 9 -
 xen/drivers/passthrough/iommu.c | 3 +++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/docs/misc/xen-command-line.markdown 
b/docs/misc/xen-command-line.markdown
index 204e7a4..d83a292 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -855,7 +855,7 @@ debug hypervisor only).
 > Default: `new` unless directed-EOI is supported
 
 ### iommu
-> `= List of [  | force | required | intremap | qinval | snoop | 
sharept | dom0-passthrough | dom0-strict | amd-iommu-perdev-intremap | 
workaround_bios_bug | verbose | debug ]`
+> `= List of [  | force | required | intremap | intpost | qinval | 
snoop | sharept | dom0-passthrough | dom0-strict | amd-iommu-perdev-intremap | 
workaround_bios_bug | verbose | debug ]`
 
 > Sub-options:
 
@@ -882,6 +882,13 @@ debug hypervisor only).
 >> Control the use of interrupt remapping (DMA remapping will always be enabled
 >> if IOMMU functionality is enabled).
 
+> `intpost`
+
+> Default: `true`
+
+>> Control the use of interrupt posting, which depends on the availability of
+>> interrupt remapping.
+
 > `qinval` (VT-d)
 
 > Default: `true`
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 8eb77f7..84b1e43 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -38,6 +38,7 @@ static void iommu_dump_p2m_table(unsigned char key);
  *   no-snoop   Disable VT-d Snoop Control
  *   no-qinval  Disable VT-d Queued Invalidation
  *   no-intremapDisable VT-d Interrupt Remapping
+ *   no-intpost Disable VT-d Interrupt posting
  */
 custom_param("iommu", parse_iommu_param);
 bool_t __initdata iommu_enable = 1;
@@ -102,6 +103,8 @@ static void __init parse_iommu_param(char *s)
 iommu_qinval = val;
 else if ( !strcmp(s, "intremap") )
 iommu_intremap = val;
+else if ( !strcmp(s, "intpost") )
+iommu_intpost = val;
 else if ( !strcmp(s, "debug") )
 {
 iommu_debug = val;
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v6 15/18] vmx: Properly handle notification event when vCPU is running

2015-08-24 Thread Feng Wu
When a vCPU is running in Root mode and a notification event
has been injected to it. we need to set VCPU_KICK_SOFTIRQ for
the current cpu, so the pending interrupt in PIRR will be
synced to vIRR before VM-Exit in time.

CC: Kevin Tian 
CC: Keir Fraser 
CC: Jan Beulich 
CC: Andrew Cooper 
Signed-off-by: Feng Wu 
Acked-by: Kevin Tian 
---
v6:
- Ack the interrupt in the beginning of pi_notification_interrupt()

v4:
- Coding style.

v3:
- Make pi_notification_interrupt() static

 xen/arch/x86/hvm/vmx/vmx.c | 48 +-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 9cde9a4..5167fae 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2035,6 +2035,52 @@ static void pi_wakeup_interrupt(struct cpu_user_regs 
*regs)
 this_cpu(irq_count)++;
 }
 
+/* Handle VT-d posted-interrupt when VCPU is running. */
+static void pi_notification_interrupt(struct cpu_user_regs *regs)
+{
+ack_APIC_irq();
+
+/*
+ * We get here when a vCPU is running in root-mode (such as via hypercall,
+ * or any other reasons which can result in VM-Exit), and before vCPU is
+ * back to non-root, external interrupts from an assigned device happen
+ * and a notification event is delivered to this logical CPU.
+ *
+ * we need to set VCPU_KICK_SOFTIRQ for the current cpu, just like
+ * __vmx_deliver_posted_interrupt(). So the pending interrupt in PIRR will
+ * be synced to vIRR before VM-Exit in time.
+ *
+ * Please refer to the following code fragments from
+ * xen/arch/x86/hvm/vmx/entry.S:
+ *
+ * .Lvmx_do_vmentry
+ *
+ *  ..
+ *  point 1
+ *
+ *  cmp  %ecx,(%rdx,%rax,1)
+ *  jnz  .Lvmx_process_softirqs
+ *
+ *  ..
+ *
+ *  je   .Lvmx_launch
+ *
+ *  ..
+ *
+ * .Lvmx_process_softirqs:
+ *  sti
+ *  call do_softirq
+ *  jmp  .Lvmx_do_vmentry
+ *
+ * If VT-d engine issues a notification event at point 1 above, it cannot
+ * be delivered to the guest during this VM-entry without raising the
+ * softirq in this notification handler.
+ */
+raise_softirq(VCPU_KICK_SOFTIRQ);
+
+this_cpu(irq_count)++;
+}
+
 const struct hvm_function_table * __init start_vmx(void)
 {
 set_in_cr4(X86_CR4_VMXE);
@@ -2073,7 +2119,7 @@ const struct hvm_function_table * __init start_vmx(void)
 
 if ( cpu_has_vmx_posted_intr_processing )
 {
-alloc_direct_apic_vector(&posted_intr_vector, event_check_interrupt);
+alloc_direct_apic_vector(&posted_intr_vector, 
pi_notification_interrupt);
 
 if ( iommu_intpost )
 alloc_direct_apic_vector(&pi_wakeup_vector, pi_wakeup_interrupt);
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v6 14/18] vmx: posted-interrupt handling when vCPU is blocked

2015-08-24 Thread Feng Wu
This patch includes the following aspects:
- Add a global vector to wake up the blocked vCPU
  when an interrupt is being posted to it (This
  part was sugguested by Yang Zhang ).
- Adds a new per-vCPU tasklet to wakeup the blocked
  vCPU. It can be used in the case vcpu_unblock
  cannot be called directly.
- Define two per-cpu variables:
  * pi_blocked_vcpu:
  A list storing the vCPUs which were blocked on this pCPU.

  * pi_blocked_vcpu_lock:
  The spinlock to protect pi_blocked_vcpu.

CC: Kevin Tian 
CC: Keir Fraser 
CC: Jan Beulich 
CC: Andrew Cooper 
Signed-off-by: Feng Wu 
---
v6:
- Fix some typos
- Ack the interrupt right after the spin_unlock in pi_wakeup_interrupt()

v4:
- Use local variables in pi_wakeup_interrupt()
- Remove vcpu from the blocked list when pi_desc.on==1, this
- avoid kick vcpu multiple times.
- Remove tasklet

v3:
- This patch is generated by merging the following three patches in v2:
   [RFC v2 09/15] Add a new per-vCPU tasklet to wakeup the blocked vCPU
   [RFC v2 10/15] vmx: Define two per-cpu variables
   [RFC v2 11/15] vmx: Add a global wake-up vector for VT-d Posted-Interrupts
- rename 'vcpu_wakeup_tasklet' to 'pi_vcpu_wakeup_tasklet'
- Move the definition of 'pi_vcpu_wakeup_tasklet' to 'struct arch_vmx_struct'
- rename 'vcpu_wakeup_tasklet_handler' to 'pi_vcpu_wakeup_tasklet_handler'
- Make pi_wakeup_interrupt() static
- Rename 'blocked_vcpu_list' to 'pi_blocked_vcpu_list'
- move 'pi_blocked_vcpu_list' to 'struct arch_vmx_struct'
- Rename 'blocked_vcpu' to 'pi_blocked_vcpu'
- Rename 'blocked_vcpu_lock' to 'pi_blocked_vcpu_lock'

 xen/arch/x86/hvm/vmx/vmcs.c|  3 ++
 xen/arch/x86/hvm/vmx/vmx.c | 64 ++
 xen/include/asm-x86/hvm/vmx/vmcs.h |  3 ++
 xen/include/asm-x86/hvm/vmx/vmx.h  |  5 +++
 4 files changed, 75 insertions(+)

diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 28c553f..2dabf16 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -661,6 +661,9 @@ int vmx_cpu_up(void)
 if ( cpu_has_vmx_vpid )
 vpid_sync_all();
 
+INIT_LIST_HEAD(&per_cpu(pi_blocked_vcpu, cpu));
+spin_lock_init(&per_cpu(pi_blocked_vcpu_lock, cpu));
+
 return 0;
 }
 
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 2c1c770..9cde9a4 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -83,7 +83,15 @@ static int vmx_msr_write_intercept(unsigned int msr, 
uint64_t msr_content);
 static void vmx_invlpg_intercept(unsigned long vaddr);
 static int vmx_vmfunc_intercept(struct cpu_user_regs *regs);
 
+/*
+ * We maintain a per-CPU linked-list of vCPU, so in PI wakeup handler we
+ * can find which vCPU should be woken up.
+ */
+DEFINE_PER_CPU(struct list_head, pi_blocked_vcpu);
+DEFINE_PER_CPU(spinlock_t, pi_blocked_vcpu_lock);
+
 uint8_t __read_mostly posted_intr_vector;
+uint8_t __read_mostly pi_wakeup_vector;
 
 static int vmx_domain_initialise(struct domain *d)
 {
@@ -106,6 +114,9 @@ static int vmx_vcpu_initialise(struct vcpu *v)
 
 spin_lock_init(&v->arch.hvm_vmx.vmcs_lock);
 
+INIT_LIST_HEAD(&v->arch.hvm_vmx.pi_blocked_vcpu_list);
+INIT_LIST_HEAD(&v->arch.hvm_vmx.pi_vcpu_on_set_list);
+
 v->arch.schedule_tail= vmx_do_resume;
 v->arch.ctxt_switch_from = vmx_ctxt_switch_from;
 v->arch.ctxt_switch_to   = vmx_ctxt_switch_to;
@@ -1976,6 +1987,54 @@ static struct hvm_function_table __initdata 
vmx_function_table = {
 .altp2m_vcpu_emulate_vmfunc = vmx_vcpu_emulate_vmfunc,
 };
 
+/*
+ * Handle VT-d posted-interrupt when VCPU is blocked.
+ */
+static void pi_wakeup_interrupt(struct cpu_user_regs *regs)
+{
+struct arch_vmx_struct *vmx, *tmp;
+struct vcpu *v;
+spinlock_t *lock = &this_cpu(pi_blocked_vcpu_lock);
+struct list_head *blocked_vcpus = &this_cpu(pi_blocked_vcpu);
+LIST_HEAD(list);
+
+spin_lock(lock);
+
+/*
+ * XXX: The length of the list depends on how many vCPU is current
+ * blocked on this specific pCPU. This may hurt the interrupt latency
+ * if the list grows to too many entries.
+ */
+list_for_each_entry_safe(vmx, tmp, blocked_vcpus, pi_blocked_vcpu_list)
+{
+if ( pi_test_on(&vmx->pi_desc) )
+{
+list_del_init(&vmx->pi_blocked_vcpu_list);
+
+/*
+ * We cannot call vcpu_unblock here, since it also needs
+ * 'pi_blocked_vcpu_lock', we store the vCPUs with ON
+ * set in another list and unblock them after we release
+ * 'pi_blocked_vcpu_lock'.
+ */
+list_add_tail(&vmx->pi_vcpu_on_set_list, &list);
+}
+}
+
+spin_unlock(lock);
+
+ack_APIC_irq();
+
+list_for_each_entry_saf

[Xen-devel] [PATCH v6 17/18] VT-d: Dump the posted format IRTE

2015-08-24 Thread Feng Wu
Add the utility to dump the posted format IRTE.

CC: Yang Zhang 
CC: Kevin Tian 
Signed-off-by: Feng Wu 
---
v6:
- Fix a typo

v4:
- Newly added

 xen/drivers/passthrough/vtd/utils.c | 43 +++--
 1 file changed, 41 insertions(+), 2 deletions(-)

diff --git a/xen/drivers/passthrough/vtd/utils.c 
b/xen/drivers/passthrough/vtd/utils.c
index 9d556da..0c7ce3f 100644
--- a/xen/drivers/passthrough/vtd/utils.c
+++ b/xen/drivers/passthrough/vtd/utils.c
@@ -203,6 +203,9 @@ static void dump_iommu_info(unsigned char key)
 ecap_intr_remap(iommu->ecap) ? "" : "not ",
 (status & DMA_GSTS_IRES) ? " and enabled" : "" );
 
+printk("  Interrupt Posting: %ssupported.\n",
+cap_intr_post(iommu->ecap) ? "" : "not ");
+
 if ( status & DMA_GSTS_IRES )
 {
 /* Dump interrupt remapping table. */
@@ -213,6 +216,7 @@ static void dump_iommu_info(unsigned char key)
 
 printk("  Interrupt remapping table (nr_entry=%#x. "
 "Only dump P=1 entries here):\n", nr_entry);
+printk ("Entries for remapped format:\n");
 printk("   SVT  SQ   SID  DST  V  AVL DLM TM RH DM "
"FPD P\n");
 for ( i = 0; i < nr_entry; i++ )
@@ -220,7 +224,7 @@ static void dump_iommu_info(unsigned char key)
 struct iremap_entry *p;
 if ( i % (1 << IREMAP_ENTRY_ORDER) == 0 )
 {
-/* This entry across page boundry */
+/* This entry across page boundary. */
 if ( iremap_entries )
 unmap_vtd_domain_page(iremap_entries);
 
@@ -230,7 +234,7 @@ static void dump_iommu_info(unsigned char key)
 else
 p = &iremap_entries[i % (1 << IREMAP_ENTRY_ORDER)];
 
-if ( !p->remap.p )
+if ( !p->remap.p || p->remap.im )
 continue;
 printk("  %04x:  %x   %x  %04x %08x %02x%x   %x  %x  %x  
%x"
 "   %x %x\n", i,
@@ -239,6 +243,41 @@ static void dump_iommu_info(unsigned char key)
 p->remap.rh, p->remap.dm, p->remap.fpd, p->remap.p);
 print_cnt++;
 }
+
+if ( iremap_entries )
+unmap_vtd_domain_page(iremap_entries);
+
+iremap_entries = NULL;
+printk ("\nEntries for posted format:\n");
+printk("   SVT  SQ   SID  PDA  V  URG AVL FPD 
P\n");
+for ( i = 0; i < nr_entry; i++ )
+{
+struct iremap_entry *p;
+if ( i % (1 << IREMAP_ENTRY_ORDER) == 0 )
+{
+/* This entry across page boundry */
+if ( iremap_entries )
+unmap_vtd_domain_page(iremap_entries);
+
+GET_IREMAP_ENTRY(iremap_maddr, i,
+ iremap_entries, p);
+}
+else
+p = &iremap_entries[i % (1 << IREMAP_ENTRY_ORDER)];
+
+if ( !p->post.p || !p->post.im )
+continue;
+
+printk("  %04x:  %x   %x  %04x %16lx %02x%x   %x  %x  
%x\n",
+i,
+p->post.svt, p->post.sq, p->post.sid,
+((u64)p->post.pda_h << 32) | (p->post.pda_l << 6),
+p->post.vector, p->post.urg, p->post.avail, p->post.fpd,
+p->post.p);
+
+print_cnt++;
+}
+
 if ( iremap_entries )
 unmap_vtd_domain_page(iremap_entries);
 if ( iommu_ir_ctrl(iommu)->iremap_num != print_cnt )
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v6 12/18] x86: move some APIC related macros to apicdef.h

2015-08-24 Thread Feng Wu
Move some APIC related macros to apicdef.h, so they can be used
outside of vlapic.c.

Signed-off-by: Feng Wu 
---
v6:
- Newly introduced.

 xen/arch/x86/hvm/vlapic.c | 5 -
 xen/include/asm-x86/apicdef.h | 4 
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
index b893b40..9b7c871 100644
--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -65,11 +65,6 @@ static const unsigned int vlapic_lvt_mask[VLAPIC_LVT_NUM] =
  LVT_MASK
 };
 
-/* Following could belong in apicdef.h */
-#define APIC_SHORT_MASK  0xc
-#define APIC_DEST_NOSHORT0x0
-#define APIC_DEST_MASK   0x800
-
 #define vlapic_lvt_vector(vlapic, lvt_type) \
 (vlapic_get_reg(vlapic, lvt_type) & APIC_VECTOR_MASK)
 
diff --git a/xen/include/asm-x86/apicdef.h b/xen/include/asm-x86/apicdef.h
index 6069fce..6d1fd94 100644
--- a/xen/include/asm-x86/apicdef.h
+++ b/xen/include/asm-x86/apicdef.h
@@ -124,6 +124,10 @@
 
 #define MAX_IO_APICS 128
 
+#define APIC_SHORT_MASK  0xc
+#define APIC_DEST_NOSHORT0x0
+#define APIC_DEST_MASK   0x800
+
 /*
  * the local APIC register structure, memory mapped. Not terribly well
  * tested, but we might eventually use this one in the future - the
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


  1   2   3   >