date:20130411

[PATCH 6/8] KVM: PPC: Book3S: Add support for ibm,int-on/off RTAS calls

2013-04-11 Thread Paul Mackerras

This adds support for the ibm,int-on and ibm,int-off RTAS calls to the
in-kernel XICS emulation and corrects the handling of the saved
priority by the ibm,set-xive RTAS call.  With this, ibm,int-off sets
the specified interrupt's priority in its saved_priority field and
sets the priority to 0xff (the least favoured value).  ibm,int-on
restores the saved_priority to the priority field, and ibm,set-xive
sets both the priority and the saved_priority to the specified
priority value.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_ppc.h |2 +
 arch/powerpc/kvm/book3s_rtas.c |   40 +
 arch/powerpc/kvm/book3s_xics.c |   86 +---
 arch/powerpc/kvm/book3s_xics.h |2 +-
 4 files changed, 114 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 1f7f5f6..e5a0614 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -173,6 +173,8 @@ extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
 extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
 extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 
priority);
 extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 
*priority);
+extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq);
+extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq);
 
 /*
  * Cuts out inst bits with ordering according to spec.
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
index 6a6c1fe..fc1a749 100644
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -64,6 +64,44 @@ out:
args-rets[0] = rc;
 }
 
+static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+   u32 irq;
+   int rc;
+
+   if (args-nargs != 1 || args-nret != 1) {
+   rc = -3;
+   goto out;
+   }
+
+   irq = args-args[0];
+
+   rc = kvmppc_xics_int_off(vcpu-kvm, irq);
+   if (rc)
+   rc = -3;
+out:
+   args-rets[0] = rc;
+}
+
+static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+   u32 irq;
+   int rc;
+
+   if (args-nargs != 1 || args-nret != 1) {
+   rc = -3;
+   goto out;
+   }
+
+   irq = args-args[0];
+
+   rc = kvmppc_xics_int_on(vcpu-kvm, irq);
+   if (rc)
+   rc = -3;
+out:
+   args-rets[0] = rc;
+}
+
 struct rtas_handler {
void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args);
char *name;
@@ -72,6 +110,8 @@ struct rtas_handler {
 static struct rtas_handler rtas_handlers[] = {
{ .name = ibm,set-xive, .handler = kvm_rtas_set_xive },
{ .name = ibm,get-xive, .handler = kvm_rtas_get_xive },
+   { .name = ibm,int-off,  .handler = kvm_rtas_int_off },
+   { .name = ibm,int-on,   .handler = kvm_rtas_int_on },
 };
 
 struct rtas_token_definition {
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 278eecc..d1ec4b0 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -120,6 +120,28 @@ static void ics_check_resend(struct kvmppc_xics *xics, 
struct kvmppc_ics *ics,
mutex_unlock(ics-lock);
 }
 
+static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
+  struct ics_irq_state *state,
+  u32 server, u32 priority, u32 saved_priority)
+{
+   bool deliver;
+
+   mutex_lock(ics-lock);
+
+   state-server = server;
+   state-priority = priority;
+   state-saved_priority = saved_priority;
+   deliver = false;
+   if ((state-masked_pending || state-resend)  priority != MASKED) {
+   state-masked_pending = 0;
+   deliver = true;
+   }
+
+   mutex_unlock(ics-lock);
+
+   return deliver;
+}
+
 int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority)
 {
struct kvmppc_xics *xics = kvm-arch.xics;
@@ -127,7 +149,6 @@ int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 
server, u32 priority)
struct kvmppc_ics *ics;
struct ics_irq_state *state;
u16 src;
-   bool deliver;
 
if (!xics)
return -ENODEV;
@@ -141,23 +162,11 @@ int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 
server, u32 priority)
if (!icp)
return -EINVAL;
 
-   mutex_lock(ics-lock);
-
XICS_DBG(set_xive %#x server %#x prio %#x MP:%d RS:%d\n,
 irq, server, priority,
 state-masked_pending, state-resend);
 
-   state-server = server;
-   state-priority = priority;
-   deliver = false;
-   if ((state-masked_pending || state-resend)  priority != MASKED) {
-   state-masked_pending = 0;
-   deliver = true;
-   }
-
-   mutex_unlock(ics-lock);
-
-   if (deliver)
+   if

[PATCH v4 0/8] In-kernel XICS interrupt controller emulation

2013-04-11 Thread Paul Mackerras

This is a repost of my patch series implementing in-kernel emulation
of the XICS interrupt controller architecture defined in PAPR (Power
Architecture Platform Requirements, the document that defines IBM's
pSeries platform architecture).  This version of the patch series uses
the device API as posted by Scott Wood.  I have structured the series
so that the API is added by the last two patches, so as to be able to
accommodate any future revisions to the device API with minimal
changes.

The series is based on Alex Graf's kvm-ppc-next branch with Scott
Wood's recent patch series applied on top, together with the patch
below to allow it to compile with CONFIG_KVM_MPIC=n.

The API defined here uses KVM_CREATE_DEVICE to create the XICS,
KVM_DEVICE_SET_ATTR/KVM_DEVICE_GET_ATTR to manipulate the interrupt
sources (for initialization and migration), a new KVM_CAP_IRQ_XICS
capability to connect vcpus to the XICS, a new identifier
KVM_REG_PPC_ICP_STATE for the one-reg interface to get and set
per-vcpu state, and the existing KVM_IRQ_LINE ioctl to assert and
deassert interrupt sources.

Paul.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 7/8] KVM: PPC: Book3S: Facilities to save/restore XICS presentation ctrler state

2013-04-11 Thread Paul Mackerras

This adds the ability for userspace to save and restore the state
of the XICS interrupt presentation controllers (ICPs) via the
KVM_GET/SET_ONE_REG interface.  Since there is one ICP per vcpu, we
simply define a new 64-bit register in the ONE_REG space for the ICP
state.  The state includes the CPU priority setting, the pending IPI
priority, and the priority and source number of any pending external
interrupt.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 Documentation/virtual/kvm/api.txt   |1 +
 arch/powerpc/include/asm/kvm_ppc.h  |2 +
 arch/powerpc/include/uapi/asm/kvm.h |   13 +
 arch/powerpc/kvm/book3s.c   |   19 
 arch/powerpc/kvm/book3s_xics.c  |   90 +++
 5 files changed, 125 insertions(+)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 4247d65..54bb6ad 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1792,6 +1792,7 @@ registers, find a list below:
   PPC   | KVM_REG_PPC_TSR  | 32
   PPC   | KVM_REG_PPC_OR_TSR   | 32
   PPC   | KVM_REG_PPC_CLEAR_TSR| 32
+  PPC   | KVM_REG_PPC_ICP_STATE | 64
 
 ARM registers are mapped using the lower 32 bits.  The upper 16 of that
 is the register group type, or coprocessor number:
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index e5a0614..c1b6150 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -312,6 +312,8 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
 extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
 extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server);
 extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
+extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
+extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
 #else
 static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{ return 0; }
diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index 6beb876..9781927 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -429,4 +429,17 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_TCR(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x89)
 #define KVM_REG_PPC_TSR(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8a)
 
+/* Per-vcpu interrupt controller state */
+#define KVM_REG_PPC_ICP_STATE  (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8b)
+
+/* Layout of above for XICS */
+#define  KVM_REG_PPC_ICP_CPPR_SHIFT56  /* current proc priority */
+#define  KVM_REG_PPC_ICP_CPPR_MASK 0xff
+#define  KVM_REG_PPC_ICP_XISR_SHIFT32  /* interrupt status field */
+#define  KVM_REG_PPC_ICP_XISR_MASK 0xff
+#define  KVM_REG_PPC_ICP_MFRR_SHIFT24  /* pending IPI priority */
+#define  KVM_REG_PPC_ICP_MFRR_MASK 0xff
+#define  KVM_REG_PPC_ICP_PPRI_SHIFT16  /* pending irq priority */
+#define  KVM_REG_PPC_ICP_PPRI_MASK 0xff
+
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index c5a4478..07d3709 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -529,6 +529,15 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 
struct kvm_one_reg *reg)
val = get_reg_val(reg-id, vcpu-arch.vscr.u[3]);
break;
 #endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_KVM_BOOK3S_64
+   case KVM_REG_PPC_ICP_STATE:
+   if (!vcpu-arch.icp) {
+   r = -ENXIO;
+   break;
+   }
+   val = get_reg_val(reg-id, kvmppc_xics_get_icp(vcpu));
+   break;
+#endif /* CONFIG_KVM_BOOK3S_64 */
default:
r = -EINVAL;
break;
@@ -591,6 +600,16 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 
struct kvm_one_reg *reg)
vcpu-arch.vscr.u[3] = set_reg_val(reg-id, val);
break;
 #endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_KVM_BOOK3S_64
+   case KVM_REG_PPC_ICP_STATE:
+   if (!vcpu-arch.icp) {
+   r = -ENXIO;
+   break;
+   }
+   r = kvmppc_xics_set_icp(vcpu,
+   set_reg_val(reg-id, val));
+   break;
+#endif /* CONFIG_KVM_BOOK3S_64 */
default:
r = -EINVAL;
break;
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index d1ec4b0..4eb4f4b 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -953,6 +953,96 @@ int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned 
long server_num)

[PATCH 3/8] KVM: PPC: Book3S HV: Speed up wakeups of CPUs on HV KVM

2013-04-11 Thread Paul Mackerras

From: Benjamin Herrenschmidt b...@kernel.crashing.org

Currently, we wake up a CPU by sending a host IPI with
smp_send_reschedule() to thread 0 of that core, which will take all
threads out of the guest, and cause them to re-evaluate their
interrupt status on the way back in.

This adds a mechanism to differentiate real host IPIs from IPIs sent
by KVM for guest threads to poke each other, in order to target the
guest threads precisely when possible and avoid that global switch of
the core to host state.

We then use this new facility in the in-kernel XICS code.

Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org
Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_book3s_asm.h |8 ++-
 arch/powerpc/include/asm/kvm_ppc.h|   29 
 arch/powerpc/kernel/asm-offsets.c |2 +
 arch/powerpc/kvm/book3s_hv.c  |   26 +++-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   |  102 -
 arch/powerpc/kvm/book3s_xics.c|2 +-
 arch/powerpc/sysdev/xics/icp-native.c |8 +++
 7 files changed, 158 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h 
b/arch/powerpc/include/asm/kvm_book3s_asm.h
index cdc3d27..9039d3c 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -20,6 +20,11 @@
 #ifndef __ASM_KVM_BOOK3S_ASM_H__
 #define __ASM_KVM_BOOK3S_ASM_H__
 
+/* XICS ICP register offsets */
+#define XICS_XIRR  4
+#define XICS_MFRR  0xc
+#define XICS_IPI   2   /* interrupt source # for IPIs */
+
 #ifdef __ASSEMBLY__
 
 #ifdef CONFIG_KVM_BOOK3S_HANDLER
@@ -81,10 +86,11 @@ struct kvmppc_host_state {
 #ifdef CONFIG_KVM_BOOK3S_64_HV
u8 hwthread_req;
u8 hwthread_state;
-
+   u8 host_ipi;
struct kvm_vcpu *kvm_vcpu;
struct kvmppc_vcore *kvm_vcore;
unsigned long xics_phys;
+   u32 saved_xirr;
u64 dabr;
u64 host_mmcr[3];
u32 host_pmc[8];
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 5f5821b..1f7f5f6 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -264,6 +264,21 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned 
long addr)
paca[cpu].kvm_hstate.xics_phys = addr;
 }
 
+static inline u32 kvmppc_get_xics_latch(void)
+{
+   u32 xirr = get_paca()-kvm_hstate.saved_xirr;
+
+   get_paca()-kvm_hstate.saved_xirr = 0;
+
+   return xirr;
+}
+
+static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
+{
+   paca[cpu].kvm_hstate.host_ipi = host_ipi;
+}
+
+extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
 extern void kvm_linear_init(void);
 
 #else
@@ -273,6 +288,18 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned 
long addr)
 static inline void kvm_linear_init(void)
 {}
 
+static inline u32 kvmppc_get_xics_latch(void)
+{
+   return 0;
+}
+
+static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
+{}
+
+static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+   kvm_vcpu_kick(vcpu);
+}
 #endif
 
 #ifdef CONFIG_PPC_BOOK3S_64
@@ -363,4 +390,6 @@ static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu 
*vcpu, int ra, int rb)
return ea;
 }
 
+extern void xics_wake_cpu(int cpu);
+
 #endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index d87c908..75e31ac 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -573,6 +573,8 @@ int main(void)
HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
+   HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
+   HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
HSTATE_FIELD(HSTATE_MMCR, host_mmcr);
HSTATE_FIELD(HSTATE_PMC, host_pmc);
HSTATE_FIELD(HSTATE_PURR, host_purr);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 506b5ea..ceb3d81 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -66,6 +66,31 @@
 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
 
+void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+   int me;
+   int cpu = vcpu-cpu;
+   wait_queue_head_t *wqp;
+
+   wqp = kvm_arch_vcpu_wq(vcpu);
+   if (waitqueue_active(wqp)) {
+   wake_up_interruptible(wqp);
+   ++vcpu-stat.halt_wakeup;
+   }
+
+   me = get_cpu();
+
+   /* CPU points to the first thread of the core */
+   if (cpu != me  cpu = 0  cpu  nr_cpu_ids) {
+   int real_cpu = cpu + vcpu-arch.ptid;
+   if (paca[real_cpu].kvm_hstate.xics_phys)
+   xics_wake_cpu(real_cpu);
+   else if (cpu_online(cpu))
+

[PATCH 5/8] KVM: PPC: Book3S HV: Improve real-mode handling of external interrupts

2013-04-11 Thread Paul Mackerras

This streamlines our handling of external interrupts that come in
while we're in the guest.  First, when waking up a hardware thread
that was napping, we split off the napping due to H_CEDE case
earlier, and use the code that handles an external interrupt (0x500)
in the guest to handle that too.  Secondly, the code that handles
those external interrupts now checks if any other thread is exiting
to the host before bouncing an external interrupt to the guest, and
also checks that there is actually an external interrupt pending for
the guest before setting the LPCR MER bit (mediated external request).

This also makes sure that we clear the ceded flag when we handle a
wakeup from cede in real mode, and fixes a potential infinite loop
in kvmppc_run_vcpu() which can occur if we ever end up with the ceded
flag set but MSR[EE] off.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/reg.h  |1 +
 arch/powerpc/kvm/book3s_hv.c|5 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |  140 +--
 3 files changed, 81 insertions(+), 65 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index c9c67fc..7993224 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -290,6 +290,7 @@
 #define LPCR_PECE1 0x2000  /* decrementer can cause exit */
 #define LPCR_PECE2 0x1000  /* machine check etc can cause exit */
 #define   LPCR_MER 0x0800  /* Mediated External Exception */
+#define   LPCR_MER_SH  11
 #define   LPCR_LPES0x000c
 #define   LPCR_LPES0   0x0008  /* LPAR Env selector 0 */
 #define   LPCR_LPES1   0x0004  /* LPAR Env selector 1 */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index ceb3d81..c066b77 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1376,9 +1376,12 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, 
struct kvm_vcpu *vcpu)
break;
vc-runner = vcpu;
n_ceded = 0;
-   list_for_each_entry(v, vc-runnable_threads, arch.run_list)
+   list_for_each_entry(v, vc-runnable_threads, arch.run_list) {
if (!v-arch.pending_exceptions)
n_ceded += v-arch.ceded;
+   else
+   v-arch.ceded = 0;
+   }
if (n_ceded == vc-n_runnable)
kvmppc_vcore_blocked(vc);
else
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 4fa187f..3835963 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -97,50 +97,51 @@ kvm_start_guest:
li  r0,1
stb r0,PACA_NAPSTATELOST(r13)
 
-   /* get vcpu pointer, NULL if we have no vcpu to run */
-   ld  r4,HSTATE_KVM_VCPU(r13)
-   cmpdi   cr1,r4,0
+   /* were we napping due to cede? */
+   lbz r0,HSTATE_NAPPING(r13)
+   cmpwi   r0,0
+   bne kvm_end_cede
+
+   /*
+* We weren't napping due to cede, so this must be a secondary
+* thread being woken up to run a guest, or being woken up due
+* to a stray IPI.  (Or due to some machine check or hypervisor
+* maintenance interrupt while the core is in KVM.)
+*/
 
/* Check the wake reason in SRR1 to see why we got here */
mfspr   r3,SPRN_SRR1
rlwinm  r3,r3,44-31,0x7 /* extract wake reason field */
cmpwi   r3,4/* was it an external interrupt? */
-   bne 27f
-
-   /*
-* External interrupt - for now assume it is an IPI, since we
-* should never get any other interrupts sent to offline threads.
-* Only do this for secondary threads.
-*/
-   beq cr1,25f
-   lwz r3,VCPU_PTID(r4)
-   cmpwi   r3,0
-   beq 27f
-25:ld  r5,HSTATE_XICS_PHYS(r13)
-   li  r0,0xff
-   li  r6,XICS_MFRR
-   li  r7,XICS_XIRR
+   bne 27f /* if not */
+   ld  r5,HSTATE_XICS_PHYS(r13)
+   li  r7,XICS_XIRR/* if it was an external interrupt, */
lwzcix  r8,r5,r7/* get and ack the interrupt */
sync
clrldi. r9,r8,40/* get interrupt source ID. */
-   beq 27f /* none there? */
-   cmpwi   r9,XICS_IPI
-   bne 26f
+   beq 28f /* none there? */
+   cmpwi   r9,XICS_IPI /* was it an IPI? */
+   bne 29f
+   li  r0,0xff
+   li  r6,XICS_MFRR
stbcix  r0,r5,r6/* clear IPI */
-26:stwcix  r8,r5,r7/* EOI the interrupt */
-
-27:/* XXX should handle hypervisor maintenance interrupts etc. here */
+

[PATCH 2/8] KVM: PPC: Book3S: Add kernel emulation for the XICS interrupt controller

2013-04-11 Thread Paul Mackerras

From: Benjamin Herrenschmidt b...@kernel.crashing.org

This adds in-kernel emulation of the XICS (eXternal Interrupt
Controller Specification) interrupt controller specified by PAPR, for
both HV and PR KVM guests.

The XICS emulation supports up to 1048560 interrupt sources.
Interrupt source numbers below 16 are reserved; 0 is used to mean no
interrupt and 2 is used for IPIs.  Internally these are represented in
blocks of 1024, called ICS (interrupt controller source) entities, but
that is not visible to userspace.

Each vcpu gets one ICP (interrupt controller presentation) entity,
used to store the per-vcpu state such as vcpu priority, pending
interrupt state, IPI request, etc.

This does not include any API or any way to connect vcpus to their
ICP state; that will be added in later patches.

This is based on an initial implementation by Michael Ellerman
mich...@ellerman.id.au reworked by Benjamin Herrenschmidt and
Paul Mackerras.

Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org
Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_book3s.h |1 +
 arch/powerpc/include/asm/kvm_host.h   |9 +
 arch/powerpc/include/asm/kvm_ppc.h|   27 +
 arch/powerpc/include/uapi/asm/kvm.h   |1 +
 arch/powerpc/kvm/Kconfig  |8 +
 arch/powerpc/kvm/Makefile |3 +
 arch/powerpc/kvm/book3s.c |2 +-
 arch/powerpc/kvm/book3s_hv.c  |9 +
 arch/powerpc/kvm/book3s_pr_papr.c |   16 +
 arch/powerpc/kvm/book3s_rtas.c|   51 +-
 arch/powerpc/kvm/book3s_xics.c|  942 +
 arch/powerpc/kvm/book3s_xics.h|  113 
 arch/powerpc/kvm/powerpc.c|5 +
 13 files changed, 1185 insertions(+), 2 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s_xics.c
 create mode 100644 arch/powerpc/kvm/book3s_xics.h

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index 5a56e1c..17c9a15 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -142,6 +142,7 @@ extern int kvmppc_mmu_hv_init(void);
 extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, 
bool data);
 extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, 
bool data);
 extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int 
vec);
+extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, unsigned int 
vec);
 extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags);
 extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
   bool upper, u32 val);
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 8fe8ef5..c7497dd 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -188,6 +188,10 @@ struct kvmppc_linear_info {
int  type;
 };
 
+/* XICS components, defined in boo3s_xics.c */
+struct kvmppc_xics;
+struct kvmppc_icp;
+
 /*
  * The reverse mapping array has one entry for each HPTE,
  * which stores the guest's view of the second word of the HPTE
@@ -256,6 +260,7 @@ struct kvm_arch {
 #ifdef CONFIG_PPC_BOOK3S_64
struct list_head spapr_tce_tables;
struct list_head rtas_tokens;
+   struct kvmppc_xics *xics;
 #endif
 };
 
@@ -378,6 +383,7 @@ struct kvmppc_booke_debug_reg {
 
 #define KVMPPC_IRQ_DEFAULT 0
 #define KVMPPC_IRQ_MPIC1
+#define KVMPPC_IRQ_XICS2
 
 struct openpic;
 
@@ -562,6 +568,9 @@ struct kvm_vcpu_arch {
 
int irq_type;   /* one of KVM_IRQ_* */
struct openpic *mpic;   /* KVM_IRQ_MPIC */
+#ifdef CONFIG_KVM_XICS
+   struct kvmppc_icp *icp; /* XICS presentation controller */
+#endif
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
struct kvm_vcpu_arch_shared shregs;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index f4e66c4..5f5821b 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -130,6 +130,8 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm,
 extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
struct kvm_memory_slot *memslot, unsigned long porder);
 extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
+extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
+
 extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce *args);
 extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
@@ -169,6 +171,8 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct 
kvm_interrupt *irq);
 extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp);
 extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
 extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
+extern int kvmppc_xics_set_xive(struct kvm *kvm,

[PATCH 8/8] KVM: PPC: Book 3S: Add API for in-kernel XICS emulation

2013-04-11 Thread Paul Mackerras

This adds the API for userspace to instantiate an XICS device in a VM
and connect VCPUs to it.  The API consists of a new device type for
the KVM_CREATE_DEVICE ioctl, a new capability KVM_CAP_IRQ_XICS, which
functions similarly to KVM_CAP_IRQ_MPIC, and the KVM_IRQ_LINE ioctl,
which is used to assert and deassert interrupt inputs of the XICS.

The XICS device has one attribute group, KVM_DEV_XICS_GRP_SOURCES.
Each attribute within this group corresponds to the state of one
interrupt source.  The attribute number is the same as the interrupt
source number.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 Documentation/virtual/kvm/api.txt  |8 ++
 Documentation/virtual/kvm/devices/xics.txt |   66 +
 arch/powerpc/kvm/book3s_xics.c |  206 +++-
 arch/powerpc/kvm/powerpc.c |   31 +
 include/linux/kvm_host.h   |1 +
 include/uapi/linux/kvm.h   |   14 ++
 virt/kvm/kvm_main.c|   14 ++
 7 files changed, 335 insertions(+), 5 deletions(-)
 create mode 100644 Documentation/virtual/kvm/devices/xics.txt

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 54bb6ad..db230f8 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2756,3 +2756,11 @@ Parameters: args[0] is the MPIC device fd
 args[1] is the MPIC CPU number for this vcpu
 
 This capability connects the vcpu to an in-kernel MPIC device.
+
+6.7 KVM_CAP_IRQ_XICS
+
+Architectures: ppc
+Parameters: args[0] is the XICS device fd
+args[1] is the XICS CPU number (server ID) for this vcpu
+
+This capability connects the vcpu to an in-kernel XICS device.
diff --git a/Documentation/virtual/kvm/devices/xics.txt 
b/Documentation/virtual/kvm/devices/xics.txt
new file mode 100644
index 000..4286493
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/xics.txt
@@ -0,0 +1,66 @@
+XICS interrupt controller
+
+Device type supported: KVM_DEV_TYPE_XICS
+
+Groups:
+  KVM_DEV_XICS_SOURCES
+  Attributes: One per interrupt source, indexed by the source number.
+
+This device emulates the XICS (eXternal Interrupt Controller
+Specification) defined in PAPR.  The XICS has a set of interrupt
+sources, each identified by a 20-bit source number, and a set of
+Interrupt Control Presentation (ICP) entities, also called servers,
+each associated with a virtual CPU.
+
+The ICP entities are created by enabling the KVM_CAP_IRQ_ARCH
+capability for each vcpu, specifying KVM_CAP_IRQ_XICS in args[0] and
+the interrupt server number (i.e. the vcpu number from the XICS's
+point of view) in args[1] of the kvm_enable_cap struct.  Each ICP has
+64 bits of state which can be read and written using the
+KVM_GET_ONE_REG and KVM_SET_ONE_REG ioctls on the vcpu.  The 64 bit
+state word has the following bitfields, starting at the
+least-significant end of the word:
+
+* Unused, 16 bits
+
+* Pending interrupt priority, 8 bits
+  Zero is the highest priority, 255 means no interrupt is pending.
+
+* Pending IPI (inter-processor interrupt) priority, 8 bits
+  Zero is the highest priority, 255 means no IPI is pending.
+
+* Pending interrupt source number, 24 bits
+  Zero means no interrupt pending, 2 means an IPI is pending
+
+* Current processor priority, 8 bits
+  Zero is the highest priority, meaning no interrupts can be
+  delivered, and 255 is the lowest priority.
+
+Each source has 64 bits of state that can be read and written using
+the KVM_GET_DEVICE_ATTR and KVM_SET_DEVICE_ATTR ioctls, specifying the
+KVM_DEV_XICS_SOURCES attribute group, with the attribute number being
+the interrupt source number.  The 64 bit state word has the following
+bitfields, starting from the least-significant end of the word:
+
+* Destination (server number), 32 bits
+  This specifies where the interrupt should be sent, and is the
+  interrupt server number specified for the destination vcpu.
+
+* Priority, 8 bits
+  This is the priority specified for this interrupt source, where 0 is
+  the highest priority and 255 is the lowest.  An interrupt with a
+  priority of 255 will never be delivered.
+
+* Level sensitive flag, 1 bit
+  This bit is 1 for a level-sensitive interrupt source, or 0 for
+  edge-sensitive (or MSI).
+
+* Masked flag, 1 bit
+  This bit is set to 1 if the interrupt is masked (cannot be delivered
+  regardless of its priority), for example by the ibm,int-off RTAS
+  call, or 0 if it is not masked.
+
+* Pending flag, 1 bit
+  This bit is 1 if the source has a pending interrupt, otherwise 0.
+
+Only one XICS instance may be created per VM.
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 4eb4f4b..eb58abf 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -11,6 +11,7 @@
 #include linux/kvm_host.h
 #include linux/err.h
 #include linux/gfp.h
+#include linux/anon_inodes.h
 
 #include asm/uaccess.h
 #include

[PATCH 4/8] KVM: PPC: Book3S HV: Add support for real mode ICP in XICS emulation

2013-04-11 Thread Paul Mackerras

From: Benjamin Herrenschmidt b...@kernel.crashing.org

This adds an implementation of the XICS hypercalls in real mode for HV
KVM, which allows us to avoid exiting the guest MMU context on all
threads for a variety of operations such as fetching a pending
interrupt, EOI of messages, IPIs, etc.

Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org
Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/kvm/Makefile   |1 +
 arch/powerpc/kvm/book3s_hv_rm_xics.c|  402 +++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |   10 +-
 arch/powerpc/kvm/book3s_xics.c  |   64 -
 arch/powerpc/kvm/book3s_xics.h  |   16 ++
 5 files changed, 475 insertions(+), 18 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s_hv_rm_xics.c

diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index cccd85f..24a2896 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -77,6 +77,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
book3s_hv_rm_mmu.o \
book3s_64_vio_hv.o \
book3s_hv_ras.o \
+   book3s_hv_rm_xics.o \
book3s_hv_builtin.o
 
 kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c 
b/arch/powerpc/kvm/book3s_hv_rm_xics.c
new file mode 100644
index 000..4cb7df8
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -0,0 +1,402 @@
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include linux/kernel.h
+#include linux/kvm_host.h
+#include linux/err.h
+
+#include asm/kvm_book3s.h
+#include asm/kvm_ppc.h
+#include asm/hvcall.h
+#include asm/xics.h
+#include asm/debug.h
+#include asm/synch.h
+#include asm/ppc-opcode.h
+
+#include book3s_xics.h
+
+#define DEBUG_PASSUP
+
+static inline void rm_writeb(unsigned long paddr, u8 val)
+{
+   __asm__ __volatile__(sync; stbcix %0,0,%1
+   : : r (val), r (paddr) : memory);
+}
+
+static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, struct kvm_vcpu 
*this_vcpu)
+{
+   struct kvmppc_icp *this_icp = this_vcpu-arch.icp;
+   unsigned long xics_phys;
+   int cpu;
+
+   /* Mark the target VCPU as having an interrupt pending */
+   vcpu-stat.queue_intr++;
+   set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, vcpu-arch.pending_exceptions);
+
+   /* Kick self ? Just set MER and return */
+   if (vcpu == this_vcpu) {
+   mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_MER);
+   return;
+   }
+
+   /* Check if the core is loaded, if not, too hard */
+   cpu = vcpu-cpu;
+   if (cpu  0 || cpu = nr_cpu_ids) {
+   this_icp-rm_action |= XICS_RM_KICK_VCPU;
+   this_icp-rm_kick_target = vcpu;
+   return;
+   }
+   /* In SMT cpu will always point to thread 0, we adjust it */
+   cpu += vcpu-arch.ptid;
+
+   /* Not too hard, then poke the target */
+   xics_phys = paca[cpu].kvm_hstate.xics_phys;
+   rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
+}
+
+static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
+{
+   /* Note: Only called on self ! */
+   clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 
vcpu-arch.pending_exceptions);
+   mtspr(SPRN_LPCR, mfspr(SPRN_LPCR)  ~LPCR_MER);
+}
+
+static inline bool icp_rm_try_update(struct kvmppc_icp *icp,
+union kvmppc_icp_state old,
+union kvmppc_icp_state new)
+{
+   struct kvm_vcpu *this_vcpu = local_paca-kvm_hstate.kvm_vcpu;
+   bool success;
+
+   /* Calculate new output value */
+   new.out_ee = (new.xisr  (new.pending_pri  new.cppr));
+
+   /* Attempt atomic update */
+   success = cmpxchg64(icp-state.raw, old.raw, new.raw) == old.raw;
+   if (!success)
+   goto bail;
+
+   /*
+* Check for output state update
+*
+* Note that this is racy since another processor could be updating
+* the state already. This is why we never clear the interrupt output
+* here, we only ever set it. The clear only happens prior to doing
+* an update and only by the processor itself. Currently we do it
+* in Accept (H_XIRR) and Up_Cppr (H_XPPR).
+*
+* We also do not try to figure out whether the EE state has changed,
+* we unconditionally set it if the new state calls for it. The reason
+* for that is that we opportunistically remove the pending interrupt
+* flag when raising CPPR, so we need to set it back here if an
+* interrupt is still pending.
+*/
+   if (new.out_ee)
+   icp_rm_set_vcpu_irq(icp-vcpu, this_vcpu);
+
+   /* Expose the

[PATCH 1/8] KVM: PPC: Book3S: Add infrastructure to implement kernel-side RTAS calls

2013-04-11 Thread Paul Mackerras

From: Michael Ellerman mich...@ellerman.id.au

For pseries machine emulation, in order to move the interrupt
controller code to the kernel, we need to intercept some RTAS
calls in the kernel itself.  This adds an infrastructure to allow
in-kernel handlers to be registered for RTAS services by name.
A new ioctl, KVM_PPC_RTAS_DEFINE_TOKEN, then allows userspace to
associate token values with those service names.  Then, when the
guest requests an RTAS service with one of those token values, it
will be handled by the relevant in-kernel handler rather than being
passed up to userspace as at present.

Signed-off-by: Michael Ellerman mich...@ellerman.id.au
Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org
Signed-off-by: Paul Mackerras pau...@samba.org
---
 Documentation/virtual/kvm/api.txt   |   19 
 arch/powerpc/include/asm/hvcall.h   |3 +
 arch/powerpc/include/asm/kvm_host.h |1 +
 arch/powerpc/include/asm/kvm_ppc.h  |4 +
 arch/powerpc/include/uapi/asm/kvm.h |6 ++
 arch/powerpc/kvm/Makefile   |1 +
 arch/powerpc/kvm/book3s_hv.c|   18 +++-
 arch/powerpc/kvm/book3s_pr.c|1 +
 arch/powerpc/kvm/book3s_pr_papr.c   |7 ++
 arch/powerpc/kvm/book3s_rtas.c  |  182 +++
 arch/powerpc/kvm/powerpc.c  |8 ++
 include/uapi/linux/kvm.h|3 +
 12 files changed, 252 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/kvm/book3s_rtas.c

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 4c326ae..4247d65 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2325,6 +2325,25 @@ and distributor interface, the ioctl must be called 
after calling
 KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs.  Calling
 this ioctl twice for any of the base addresses will return -EEXIST.
 
+4.82 KVM_PPC_RTAS_DEFINE_TOKEN
+
+Capability: KVM_CAP_PPC_RTAS
+Architectures: ppc
+Type: vm ioctl
+Parameters: struct kvm_rtas_token_args
+Returns: 0 on success, -1 on error
+
+Defines a token value for a RTAS (Run Time Abstraction Services)
+service in order to allow it to be handled in the kernel.  The
+argument struct gives the name of the service, which must be the name
+of a service that has a kernel-side implementation.  If the token
+value is non-zero, it will be associated with that service, and
+subsequent RTAS calls by the guest specifying that token will be
+handled by the kernel.  If the token value is 0, then any token
+associated with the service will be forgotten, and subsequent RTAS
+calls by the guest for that service will be passed to userspace to be
+handled.
+
 
 5. The kvm_run structure
 
diff --git a/arch/powerpc/include/asm/hvcall.h 
b/arch/powerpc/include/asm/hvcall.h
index 4bc2c3d..cf4df8e 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -270,6 +270,9 @@
 #define H_SET_MODE 0x31C
 #define MAX_HCALL_OPCODE   H_SET_MODE
 
+/* Platform specific hcalls, used by KVM */
+#define H_RTAS 0xf000
+
 #ifndef __ASSEMBLY__
 
 /**
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 2a2e235..8fe8ef5 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -255,6 +255,7 @@ struct kvm_arch {
 #endif /* CONFIG_KVM_BOOK3S_64_HV */
 #ifdef CONFIG_PPC_BOOK3S_64
struct list_head spapr_tce_tables;
+   struct list_head rtas_tokens;
 #endif
 };
 
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index f54707f..f4e66c4 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -166,6 +166,10 @@ extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, 
struct kvm_get_htab_fd *);
 
 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
 
+extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp);
+extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
+extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
+
 /*
  * Cuts out inst bits with ordering according to spec.
  * That means the leftmost bit is zero. All given bits are included.
diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index ef072b1..a599ea5 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -299,6 +299,12 @@ struct kvm_allocate_rma {
__u64 rma_size;
 };
 
+/* for KVM_CAP_PPC_RTAS */
+struct kvm_rtas_token_args {
+   char name[120];
+   __u64 token;/* Use a token of 0 to undefine a mapping */
+};
+
 struct kvm_book3e_206_tlb_entry {
__u32 mas8;
__u32 mas1;
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 4a2277a..d2c8a88 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -86,6 +86,7 @@ kvm-book3s_64-module-objs := \

RE: [PATCH v9 0/7] KVM: VMX: Add Posted Interrupt supporting

2013-04-11 Thread Zhang, Yang Z

Gleb Natapov wrote on 2013-04-11:
 On Thu, Apr 11, 2013 at 01:03:30AM +, Zhang, Yang Z wrote:
 Gleb Natapov wrote on 2013-04-10:
 On Wed, Apr 10, 2013 at 09:22:50PM +0800, Yang Zhang wrote:
 From: Yang Zhang yang.z.zh...@intel.com
 
 The follwoing patches are adding the Posted Interrupt supporting to KVM:
 The first patch enables the feature 'acknowledge interrupt on vmexit'.Since
 it is required by Posted interrupt, we need to enable it firstly.
 
 And the subsequent patches are adding the posted interrupt supporting:
 Posted Interrupt allows APIC interrupts to inject into guest directly
 without any vmexit.
 
 - When delivering a interrupt to guest, if target vcpu is running,
   update Posted-interrupt requests bitmap and send a notification
   event to the vcpu. Then the vcpu will handle this interrupt
   automatically, without any software involvemnt. - If target vcpu is
   not running or there already a notification event pending in the
   vcpu, do nothing. The interrupt will be handled by next vm entry
 Changes from v8 to v9:
 * Add tracing in PI case when deliver interrupt.
 * Scan ioapic when updating SPIV register.
 Do not see it at the patch series. Have I missed it?
 The change is in forth patch:
 
 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
 index 6796218..4ccdc94 100644
 --- a/arch/x86/kvm/lapic.c
 +++ b/arch/x86/kvm/lapic.c
 @@ -134,11 +134,7 @@ static inline void apic_set_spiv(struct kvm_lapic *apic,
 u32 val)
  static_key_slow_inc(apic_sw_disabled.key);
  }
  apic_set_reg(apic, APIC_SPIV, val);
 -}
 -
 -static inline int apic_enabled(struct kvm_lapic *apic)
 -{
 -return kvm_apic_sw_enabled(apic)  kvm_apic_hw_enabled(apic);
 +kvm_make_request(KVM_REQ_SCAN_IOAPIC, apic-vcpu);
  }
 OK, see it now. Thanks.
 
 As you mentioned, since it will call apic_enabled() to check whether apic is
 enabled in vcpu_scan_ioapic. So we must ensure rescan ioapic when apic state
 changed.
 And I found recalculate_apic_map() doesn't track the enable/disable apic by
 software approach. So make_scan_ioapic_request in recalculate_apic_map() is
 not enough.
 We also should force rescan ioapic when apic state is changed via
 software approach(update spiv reg).
 
 10.4.7.2 Local APIC State After It Has Been Software Disabled says:
 
   Pending interrupts in the IRR and ISR registers are held and require
   masking or handling by the CPU.
 My understanding is that we should treat software disabled APIC as a
 valid target for an interrupt. vcpu_scan_ioapic() should check
 kvm_apic_hw_enabled() only.
Indeed. kvm_apic_hw_enabled() is the right one.

Best regards,
Yang


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 01/11] KVM: nVMX: Stats counters for nVMX

2013-04-11 Thread Gleb Natapov

On Wed, Apr 10, 2013 at 10:08:56PM +0300, Abel Gordon wrote:
 
 
 Gleb Natapov g...@redhat.com wrote on 08/04/2013 01:27:28 PM:
 
 
  stat counters are deprecated in favor of trace points. Adding kvmnested
  trace system is very welcome though.
 
 So, should I keep or remove this patch ?
Remove.

 If a kvmnested trace system is added then I'll add shadow-vmcs related
 events
Adding it is as simple as adding arch/x86/kvm/nestedtrace.h file similar
to arch/x86/kvm/mmutrace.h. Doing #define TRACE_SYSTEM kvmnested there
and adding trace events that you want to trace. The advantage is that
you can provide additional information with each trace point. For
example name and value of a field that was vmwread.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 10/11] KVM: nVMX: Synchronize VMCS12 content with the shadow vmcs

2013-04-11 Thread Gleb Natapov

On Wed, Apr 10, 2013 at 10:15:37PM +0300, Abel Gordon wrote:
 
 
 Gleb Natapov g...@redhat.com wrote on 09/04/2013 04:14:35 PM:
 
  On Mon, Mar 11, 2013 at 09:54:47AM +0200, Abel Gordon wrote:
   Nadav Har'El n...@math.technion.ac.il wrote on 11/03/2013 12:43:35
 AM:
  
  
On Sun, Mar 10, 2013, Abel Gordon wrote about [PATCH 10/11] KVM:
nVMX: Synchronize VMCS12 content with the shadow vmcs:
 nested_vmx_vmexit(vcpu);
 +   if (enable_shadow_vmcs)
 +  copy_vmcs12_to_shadow(to_vmx(vcpu));
   
I was curious why your patch adds this call to copy_vmcs12_to_shadow
   after
every nested_vmx_vmexit (3 times), instead of making this call inside
nested_vmx_vmexit(), say right after prepare_vmcs12(). Until I saw:
  
 
   Because nested code sometimes modifies vmcs fileds after
   nested_vmx_vmexit (see below). I was afraid nested logic
   may be changed in the future and some field may become out-of-sync.
  
   If we do have to call copy_vmcs12_to_shadow explicitly, then, it will
 be
   more difficult to miss some field.
  
 
 
  I think the patch already miss some fields. What if nested_vmx_run()
  fails and calls nested_vmx_entry_failure(). nested_vmx_entry_failure()
  sets vmcs12-vm_exit_reason and vmcs12-exit_qualification, but where do
  we copy them back to shadow before going back to L1?
 
 Good catch! :)
 
 Note that the entry path is easy to handle because we copy the fields
 as part of nested_vmx_entry. This is not like exit path where
 KVM(L0) code can modify fields after nested_vmx_vmexit is called.
 
 So here, we could simple call copy_vmcs12_to_shadow if the entry fails
 (as part of nested_vmx_entry_failure or nested_vmx). We could optimize
 the code by updating these specific fields directly, but I don't think
 we really need to optimize code that is part of the error path.
 
We needn't.

  May be we need to introduce vmcs12 accessors to track what is changes
  and if something need to be copied to shadow before going back to L1.
 
 That means we will need to modify all the lines of code that uses
 vmcs12- with an inline nested_vmcs_read or nested_vmcs_write function.
 Inside these inline functions we could access the shadow vmcs directly.
 However, to access the shadow vmcs we need to vmptrld first and this will
 force
 unnecessary vmptrlds (between shadow vmcs 12 and current vmcs 01) each time
 the code accesses a vmcs12 field. Alternatively, if we want to avoid
 unnecessary vmptrlds each time we access vmcs12 we could simple set a
 flag that indicates when a shadow field was changed. In this case, we will
 need to find all the places to check the flag and copy the fields,
 considering both success and error paths.
That's not how I see it. nested_vmcs_write() will set a request bit in
vcpu (this is the flag you mention above). The bit will be checked during
a guest entry and vmcs12 will be synced to shadow at this point. Later
we can track what fields were written and sync only them.

 Finally, I am afraid that these directions will introduce new issues,
 will force us to modify too many lines and they may create a merge/rebase
 mess...
Conflicts should be trivial and I do not expect many iterations for the
patch series. I like the approach you take to use vmcs shadowing and most
comment are nitpicks.  I promise to review the next version as soon as
it is posted :)

 
 Maybe we should simple fix nested_vmx_entry_failure (as well as the
 other fixes you suggested in other patches) and apply the code.
 Do you agree ?
 
The approach is error prone. Even if we will fix all bugs in the current
code each new nested vmx modification will have to be reviewed with shadowing
in mind and sometimes it is hard to see global picture just from a
patch. It is better to hide shadow details from occasional nested vmx hacker.
Are you expecting EPT patches to make it right for instance?

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] vhost_net: remove tx polling state

2013-04-11 Thread Jason Wang

After commit 2b8b328b61c799957a456a5a8dab8cc7dea68575 (vhost_net: handle polling
errors when setting backend), we in fact track the polling state through
poll-wqh, so there's no need to duplicate the work with an extra
vhost_net_polling_state. So this patch removes this and make the code simpler.

This patch also removes the all tx starting/stopping code in tx path according
to Michael's suggestion.

Netperf test shows almost the same result in stream test, but gets improvements
on TCP_RR tests (both zerocopy or copy) especially on low load cases.

Tested between multiqueue kvm guest and external host with two direct
connected 82599s.

zerocopy disabled:

sessions|transaction rates|normalize|
before/after/+improvements
1 | 9510.24/11727.29/+23.3%| 693.54/887.68/+28.0%   |
25| 192931.50/241729.87/+25.3% | 2376.80/2771.70/+16.6% |
50| 277634.64/291905.76/+5%| 3118.36/3230.11/+3.6%  |

zerocopy enabled:

sessions|transaction rates|normalize|
before/after/+improvements
1 | 7318.33/11929.76/+63.0%| 521.86/843.30/+61.6%   |
25| 167264.88/242422.15/+44.9% | 2181.60/2788.16/+27.8% |
50| 272181.02/294347.04/+8.1%  | 3071.56/3257.85/+6.1%  |

Signed-off-by: Jason Wang jasow...@redhat.com
---
 drivers/vhost/net.c   |   74 -
 drivers/vhost/vhost.c |3 ++
 2 files changed, 9 insertions(+), 68 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index ec6fb3f..87c216c 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -64,20 +64,10 @@ enum {
VHOST_NET_VQ_MAX = 2,
 };
 
-enum vhost_net_poll_state {
-   VHOST_NET_POLL_DISABLED = 0,
-   VHOST_NET_POLL_STARTED = 1,
-   VHOST_NET_POLL_STOPPED = 2,
-};
-
 struct vhost_net {
struct vhost_dev dev;
struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
struct vhost_poll poll[VHOST_NET_VQ_MAX];
-   /* Tells us whether we are polling a socket for TX.
-* We only do this when socket buffer fills up.
-* Protected by tx vq lock. */
-   enum vhost_net_poll_state tx_poll_state;
/* Number of TX recently submitted.
 * Protected by tx vq lock. */
unsigned tx_packets;
@@ -155,28 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, 
struct iovec *to,
}
 }
 
-/* Caller must have TX VQ lock */
-static void tx_poll_stop(struct vhost_net *net)
-{
-   if (likely(net-tx_poll_state != VHOST_NET_POLL_STARTED))
-   return;
-   vhost_poll_stop(net-poll + VHOST_NET_VQ_TX);
-   net-tx_poll_state = VHOST_NET_POLL_STOPPED;
-}
-
-/* Caller must have TX VQ lock */
-static int tx_poll_start(struct vhost_net *net, struct socket *sock)
-{
-   int ret;
-
-   if (unlikely(net-tx_poll_state != VHOST_NET_POLL_STOPPED))
-   return 0;
-   ret = vhost_poll_start(net-poll + VHOST_NET_VQ_TX, sock-file);
-   if (!ret)
-   net-tx_poll_state = VHOST_NET_POLL_STARTED;
-   return ret;
-}
-
 /* In case of DMA done not in order in lower device driver for some reason.
  * upend_idx is used to track end of used idx, done_idx is used to track head
  * of used idx. Once lower device DMA done contiguously, we will signal KVM
@@ -242,7 +210,7 @@ static void handle_tx(struct vhost_net *net)
.msg_flags = MSG_DONTWAIT,
};
size_t len, total_len = 0;
-   int err, wmem;
+   int err;
size_t hdr_size;
struct socket *sock;
struct vhost_ubuf_ref *uninitialized_var(ubufs);
@@ -253,19 +221,9 @@ static void handle_tx(struct vhost_net *net)
if (!sock)
return;
 
-   wmem = atomic_read(sock-sk-sk_wmem_alloc);
-   if (wmem = sock-sk-sk_sndbuf) {
-   mutex_lock(vq-mutex);
-   tx_poll_start(net, sock);
-   mutex_unlock(vq-mutex);
-   return;
-   }
-
mutex_lock(vq-mutex);
vhost_disable_notify(net-dev, vq);
 
-   if (wmem  sock-sk-sk_sndbuf / 2)
-   tx_poll_stop(net);
hdr_size = vq-vhost_hlen;
zcopy = vq-ubufs;
 
@@ -285,23 +243,14 @@ static void handle_tx(struct vhost_net *net)
if (head == vq-num) {
int num_pends;
 
-   wmem = atomic_read(sock-sk-sk_wmem_alloc);
-   if (wmem = sock-sk-sk_sndbuf * 3 / 4) {
-   tx_poll_start(net, sock);
-   set_bit(SOCK_ASYNC_NOSPACE, sock-flags);
-   break;
-   }
/* If more outstanding DMAs, queue the work.
 * Handle upend_idx wrap around
 */
num_pends = likely(vq-upend_idx = vq-done_idx) ?
(vq-upend_idx - vq-done_idx) :
(vq-upend_idx + UIO_MAXIOV - vq-done_idx);
-   if (unlikely(num_pends  VHOST_MAX_PEND))

Re: [PATCH v9 7/7] KVM: Use eoi to track RTC interrupt delivery status

2013-04-11 Thread Gleb Natapov

On Wed, Apr 10, 2013 at 09:22:20PM +0800, Yang Zhang wrote:
 From: Yang Zhang yang.z.zh...@intel.com
 
 Current interrupt coalescing logci which only used by RTC has conflict
 with Posted Interrupt.
 This patch introduces a new mechinism to use eoi to track interrupt:
 When delivering an interrupt to vcpu, the pending_eoi set to number of
 vcpu that received the interrupt. And decrease it when each vcpu writing
 eoi. No subsequent RTC interrupt can deliver to vcpu until all vcpus
 write eoi.
 
 Signed-off-by: Yang Zhang yang.z.zh...@intel.com
 ---
  virt/kvm/ioapic.c |   39 ++-
  1 files changed, 38 insertions(+), 1 deletions(-)
 
 diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
 index a49fcd5..aeac154 100644
 --- a/virt/kvm/ioapic.c
 +++ b/virt/kvm/ioapic.c
 @@ -147,6 +147,26 @@ static void kvm_rtc_eoi_tracking_restore_all(struct 
 kvm_ioapic *ioapic)
   __rtc_irq_eoi_tracking_restore_one(vcpu);
  }
  
 +static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu)
 +{
 + if (test_and_clear_bit(vcpu-vcpu_id, ioapic-rtc_status.dest_map))
 + --ioapic-rtc_status.pending_eoi;
 +
 + WARN_ON(ioapic-rtc_status.pending_eoi  0);
 +}
 +
 +static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic, int irq,
 + bool line_status)
 +{
 + if (irq != RTC_GSI || !line_status)
 + return false;
Please move the check from rtc_irq_check_coalesced() to
kvm_ioapic_set_irq() like this:
if (irq == RTC_GSI  line_status  rtc_irq_check_coalesced(ioapic, irq, 
line_status))


I was going to fix it myself while applying, but since there will be
new posted interrupt series anyway you can as well fix this one too.

 +
 + if (ioapic-rtc_status.pending_eoi  0)
 + return true; /* coalesced */
 +
 + return false;
 +}
 +
  static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx,
   bool line_status)
  {
 @@ -260,6 +280,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int 
 irq, bool line_status)
  {
   union kvm_ioapic_redirect_entry *entry = ioapic-redirtbl[irq];
   struct kvm_lapic_irq irqe;
 + int ret;
  
   ioapic_debug(dest=%x dest_mode=%x delivery_mode=%x 
vector=%x trig_mode=%x\n,
 @@ -275,7 +296,15 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int 
 irq, bool line_status)
   irqe.level = 1;
   irqe.shorthand = 0;
  
 - return kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe, NULL);
 + if (irq == RTC_GSI  line_status) {
 + BUG_ON(ioapic-rtc_status.pending_eoi != 0);
 + ret = kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe,
 + ioapic-rtc_status.dest_map);
 + ioapic-rtc_status.pending_eoi = ret;
 + } else
 + ret = kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe, NULL);
 +
 + return ret;
  }
  
  int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
 @@ -299,6 +328,11 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int 
 irq, int irq_source_id,
   ret = 1;
   } else {
   int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
 +
 + if (rtc_irq_check_coalesced(ioapic, irq, line_status)) {
 + ret = 0; /* coalesced */
 + goto out;
 + }
   ioapic-irr |= mask;
   if ((edge  old_irr != ioapic-irr) ||
   (!edge  !entry.fields.remote_irr))
 @@ -306,6 +340,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int 
 irq, int irq_source_id,
   else
   ret = 0; /* report coalesced interrupt */
   }
 +out:
   trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
   spin_unlock(ioapic-lock);
  
 @@ -333,6 +368,8 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
   if (ent-fields.vector != vector)
   continue;
  
 + if (i == RTC_GSI)
 + rtc_irq_eoi(ioapic, vcpu);
   /*
* We are dropping lock while calling ack notifiers because ack
* notifier callbacks for assigned devices call into IOAPIC
 -- 
 1.7.1

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH v9 7/7] KVM: Use eoi to track RTC interrupt delivery status

2013-04-11 Thread Zhang, Yang Z

Gleb Natapov wrote on 2013-04-11:
 On Wed, Apr 10, 2013 at 09:22:20PM +0800, Yang Zhang wrote:
 From: Yang Zhang yang.z.zh...@intel.com
 
 Current interrupt coalescing logci which only used by RTC has conflict
 with Posted Interrupt.
 This patch introduces a new mechinism to use eoi to track interrupt:
 When delivering an interrupt to vcpu, the pending_eoi set to number of
 vcpu that received the interrupt. And decrease it when each vcpu writing
 eoi. No subsequent RTC interrupt can deliver to vcpu until all vcpus
 write eoi.
 
 Signed-off-by: Yang Zhang yang.z.zh...@intel.com
 ---
  virt/kvm/ioapic.c |   39 ++-
  1 files changed, 38 insertions(+), 1 deletions(-)
 diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
 index a49fcd5..aeac154 100644
 --- a/virt/kvm/ioapic.c
 +++ b/virt/kvm/ioapic.c
 @@ -147,6 +147,26 @@ static void kvm_rtc_eoi_tracking_restore_all(struct
 kvm_ioapic *ioapic)
  __rtc_irq_eoi_tracking_restore_one(vcpu);
  }
 +static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu)
 +{
 +if (test_and_clear_bit(vcpu-vcpu_id, ioapic-rtc_status.dest_map))
 +--ioapic-rtc_status.pending_eoi;
 +
 +WARN_ON(ioapic-rtc_status.pending_eoi  0);
 +}
 +
 +static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic, int irq,
 +bool line_status)
 +{
 +if (irq != RTC_GSI || !line_status)
 +return false;
 Please move the check from rtc_irq_check_coalesced() to
 kvm_ioapic_set_irq() like this: if (irq == RTC_GSI  line_status 
 rtc_irq_check_coalesced(ioapic, irq, line_status)) 
 
 I was going to fix it myself while applying, but since there will be
 new posted interrupt series anyway you can as well fix this one too.
You mean fix it and send out it with posted interrupt series? Or just rebase 
the posted interrupt series on the top of this fix, but needn't to send out it?

 
 +
 +if (ioapic-rtc_status.pending_eoi  0)
 +return true; /* coalesced */
 +
 +return false;
 +}
 +
  static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx,
  bool line_status)
  {
 @@ -260,6 +280,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int 
 irq,
 bool line_status)
  {
  union kvm_ioapic_redirect_entry *entry = ioapic-redirtbl[irq];
  struct kvm_lapic_irq irqe;
 +int ret;
 
  ioapic_debug(dest=%x dest_mode=%x delivery_mode=%x 
   vector=%x trig_mode=%x\n,
 @@ -275,7 +296,15 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int
 irq, bool line_status)
  irqe.level = 1;
  irqe.shorthand = 0;
 -return kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe, NULL);
 +if (irq == RTC_GSI  line_status) {
 +BUG_ON(ioapic-rtc_status.pending_eoi != 0);
 +ret = kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe,
 +ioapic-rtc_status.dest_map);
 +ioapic-rtc_status.pending_eoi = ret;
 +} else
 +ret = kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe, NULL);
 +
 +return ret;
  }
  
  int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int 
 irq_source_id,
 @@ -299,6 +328,11 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int
 irq, int irq_source_id,
  ret = 1;
  } else {
  int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
 +
 +if (rtc_irq_check_coalesced(ioapic, irq, line_status)) {
 +ret = 0; /* coalesced */
 +goto out;
 +}
  ioapic-irr |= mask;
  if ((edge  old_irr != ioapic-irr) ||
  (!edge  !entry.fields.remote_irr))
 @@ -306,6 +340,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int 
 irq,
 int irq_source_id,
  elseret = 0; /* report coalesced interrupt 
 */   } +out:
  trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
  spin_unlock(ioapic-lock);
 @@ -333,6 +368,8 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu
 *vcpu,
  if (ent-fields.vector != vector)
  continue;
 +if (i == RTC_GSI)
 +rtc_irq_eoi(ioapic, vcpu);
  /*
   * We are dropping lock while calling ack notifiers because ack
   * notifier callbacks for assigned devices call into IOAPIC
 --
 1.7.1
 
 --
   Gleb.


Best regards,
Yang


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v9 7/7] KVM: Use eoi to track RTC interrupt delivery status

2013-04-11 Thread Gleb Natapov

On Thu, Apr 11, 2013 at 07:54:01AM +, Zhang, Yang Z wrote:
 Gleb Natapov wrote on 2013-04-11:
  On Wed, Apr 10, 2013 at 09:22:20PM +0800, Yang Zhang wrote:
  From: Yang Zhang yang.z.zh...@intel.com
  
  Current interrupt coalescing logci which only used by RTC has conflict
  with Posted Interrupt.
  This patch introduces a new mechinism to use eoi to track interrupt:
  When delivering an interrupt to vcpu, the pending_eoi set to number of
  vcpu that received the interrupt. And decrease it when each vcpu writing
  eoi. No subsequent RTC interrupt can deliver to vcpu until all vcpus
  write eoi.
  
  Signed-off-by: Yang Zhang yang.z.zh...@intel.com
  ---
   virt/kvm/ioapic.c |   39 ++-
   1 files changed, 38 insertions(+), 1 deletions(-)
  diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
  index a49fcd5..aeac154 100644
  --- a/virt/kvm/ioapic.c
  +++ b/virt/kvm/ioapic.c
  @@ -147,6 +147,26 @@ static void kvm_rtc_eoi_tracking_restore_all(struct
  kvm_ioapic *ioapic)
 __rtc_irq_eoi_tracking_restore_one(vcpu);
   }
  +static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu)
  +{
  +  if (test_and_clear_bit(vcpu-vcpu_id, ioapic-rtc_status.dest_map))
  +  --ioapic-rtc_status.pending_eoi;
  +
  +  WARN_ON(ioapic-rtc_status.pending_eoi  0);
  +}
  +
  +static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic, int irq,
  +  bool line_status)
  +{
  +  if (irq != RTC_GSI || !line_status)
  +  return false;
  Please move the check from rtc_irq_check_coalesced() to
  kvm_ioapic_set_irq() like this: if (irq == RTC_GSI  line_status 
  rtc_irq_check_coalesced(ioapic, irq, line_status)) 
  
  I was going to fix it myself while applying, but since there will be
  new posted interrupt series anyway you can as well fix this one too.
 You mean fix it and send out it with posted interrupt series? Or just rebase 
 the posted interrupt series on the top of this fix, but needn't to send out 
 it?
 
Send both series. RTC one with this change.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH v9 7/7] KVM: Use eoi to track RTC interrupt delivery status

2013-04-11 Thread Zhang, Yang Z

Gleb Natapov wrote on 2013-04-11:
 On Thu, Apr 11, 2013 at 07:54:01AM +, Zhang, Yang Z wrote:
 Gleb Natapov wrote on 2013-04-11:
 On Wed, Apr 10, 2013 at 09:22:20PM +0800, Yang Zhang wrote:
 From: Yang Zhang yang.z.zh...@intel.com
 
 Current interrupt coalescing logci which only used by RTC has conflict
 with Posted Interrupt.
 This patch introduces a new mechinism to use eoi to track interrupt:
 When delivering an interrupt to vcpu, the pending_eoi set to number of
 vcpu that received the interrupt. And decrease it when each vcpu writing
 eoi. No subsequent RTC interrupt can deliver to vcpu until all vcpus
 write eoi.
 
 Signed-off-by: Yang Zhang yang.z.zh...@intel.com
 ---
  virt/kvm/ioapic.c |   39 ++- 1
  files changed, 38 insertions(+), 1 deletions(-)
 diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
 index a49fcd5..aeac154 100644
 --- a/virt/kvm/ioapic.c
 +++ b/virt/kvm/ioapic.c
 @@ -147,6 +147,26 @@ static void kvm_rtc_eoi_tracking_restore_all(struct
 kvm_ioapic *ioapic)
__rtc_irq_eoi_tracking_restore_one(vcpu);
  }
 +static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu)
 +{
 +  if (test_and_clear_bit(vcpu-vcpu_id, ioapic-rtc_status.dest_map))
 +  --ioapic-rtc_status.pending_eoi;
 +
 +  WARN_ON(ioapic-rtc_status.pending_eoi  0);
 +}
 +
 +static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic, int irq,
 +  bool line_status)
 +{
 +  if (irq != RTC_GSI || !line_status)
 +  return false;
 Please move the check from rtc_irq_check_coalesced() to
 kvm_ioapic_set_irq() like this: if (irq == RTC_GSI  line_status 
 rtc_irq_check_coalesced(ioapic, irq, line_status)) 
 
 I was going to fix it myself while applying, but since there will be
 new posted interrupt series anyway you can as well fix this one too.
 You mean fix it and send out it with posted interrupt series? Or just
 rebase the posted interrupt series on the top of this fix, but needn't
 to send out it?
 
 Send both series. RTC one with this change.
Sure.

Best regards,
Yang


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v2 0/4] tcm_vhost fix cmd leak and send bad target

2013-04-11 Thread Michael S. Tsirkin

On Wed, Apr 10, 2013 at 02:19:02PM -0700, Nicholas A. Bellinger wrote:
 On Wed, 2013-04-10 at 15:06 +0800, Asias He wrote:
  v2:
  - Fix the order of out and head parameter.
  
  Asias He (4):
tcm_vhost: Remove double check of response
tcm_vhost: Fix tv_cmd leak in vhost_scsi_handle_vq
tcm_vhost: Add vhost_scsi_send_bad_target() helper
tcm_vhost: Send bad target to guest when cmd fails
  
   drivers/vhost/tcm_vhost.c | 53 
  +--
   1 file changed, 28 insertions(+), 25 deletions(-)
  
 
 Looks good.  MST, care to ACK for 3.9..?
 
 Thanks Asias!
 
 --nab

Sounds like a reasonable thing to apply for 3.9.

Acked-by: Michael S. Tsirkin m...@redhat.com


-- 
MST
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] vhost_net: remove tx polling state

2013-04-11 Thread Michael S. Tsirkin

On Thu, Apr 11, 2013 at 02:50:48PM +0800, Jason Wang wrote:
 After commit 2b8b328b61c799957a456a5a8dab8cc7dea68575 (vhost_net: handle 
 polling
 errors when setting backend), we in fact track the polling state through
 poll-wqh, so there's no need to duplicate the work with an extra
 vhost_net_polling_state. So this patch removes this and make the code simpler.
 
 This patch also removes the all tx starting/stopping code in tx path according
 to Michael's suggestion.
 
 Netperf test shows almost the same result in stream test, but gets 
 improvements
 on TCP_RR tests (both zerocopy or copy) especially on low load cases.
 
 Tested between multiqueue kvm guest and external host with two direct
 connected 82599s.
 
 zerocopy disabled:
 
 sessions|transaction rates|normalize|
 before/after/+improvements
 1 | 9510.24/11727.29/+23.3%| 693.54/887.68/+28.0%   |
 25| 192931.50/241729.87/+25.3% | 2376.80/2771.70/+16.6% |
 50| 277634.64/291905.76/+5%| 3118.36/3230.11/+3.6%  |
 
 zerocopy enabled:
 
 sessions|transaction rates|normalize|
 before/after/+improvements
 1 | 7318.33/11929.76/+63.0%| 521.86/843.30/+61.6%   |
 25| 167264.88/242422.15/+44.9% | 2181.60/2788.16/+27.8% |
 50| 272181.02/294347.04/+8.1%  | 3071.56/3257.85/+6.1%  |
 
 Signed-off-by: Jason Wang jasow...@redhat.com

Less code and better speed, what's not to like.
Davem, could you pick this up for 3.10 please?

Acked-by: Michael S. Tsirkin m...@redhat.com


 ---
  drivers/vhost/net.c   |   74 
 -
  drivers/vhost/vhost.c |3 ++
  2 files changed, 9 insertions(+), 68 deletions(-)
 
 diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
 index ec6fb3f..87c216c 100644
 --- a/drivers/vhost/net.c
 +++ b/drivers/vhost/net.c
 @@ -64,20 +64,10 @@ enum {
   VHOST_NET_VQ_MAX = 2,
  };
  
 -enum vhost_net_poll_state {
 - VHOST_NET_POLL_DISABLED = 0,
 - VHOST_NET_POLL_STARTED = 1,
 - VHOST_NET_POLL_STOPPED = 2,
 -};
 -
  struct vhost_net {
   struct vhost_dev dev;
   struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
   struct vhost_poll poll[VHOST_NET_VQ_MAX];
 - /* Tells us whether we are polling a socket for TX.
 -  * We only do this when socket buffer fills up.
 -  * Protected by tx vq lock. */
 - enum vhost_net_poll_state tx_poll_state;
   /* Number of TX recently submitted.
* Protected by tx vq lock. */
   unsigned tx_packets;
 @@ -155,28 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, 
 struct iovec *to,
   }
  }
  
 -/* Caller must have TX VQ lock */
 -static void tx_poll_stop(struct vhost_net *net)
 -{
 - if (likely(net-tx_poll_state != VHOST_NET_POLL_STARTED))
 - return;
 - vhost_poll_stop(net-poll + VHOST_NET_VQ_TX);
 - net-tx_poll_state = VHOST_NET_POLL_STOPPED;
 -}
 -
 -/* Caller must have TX VQ lock */
 -static int tx_poll_start(struct vhost_net *net, struct socket *sock)
 -{
 - int ret;
 -
 - if (unlikely(net-tx_poll_state != VHOST_NET_POLL_STOPPED))
 - return 0;
 - ret = vhost_poll_start(net-poll + VHOST_NET_VQ_TX, sock-file);
 - if (!ret)
 - net-tx_poll_state = VHOST_NET_POLL_STARTED;
 - return ret;
 -}
 -
  /* In case of DMA done not in order in lower device driver for some reason.
   * upend_idx is used to track end of used idx, done_idx is used to track head
   * of used idx. Once lower device DMA done contiguously, we will signal KVM
 @@ -242,7 +210,7 @@ static void handle_tx(struct vhost_net *net)
   .msg_flags = MSG_DONTWAIT,
   };
   size_t len, total_len = 0;
 - int err, wmem;
 + int err;
   size_t hdr_size;
   struct socket *sock;
   struct vhost_ubuf_ref *uninitialized_var(ubufs);
 @@ -253,19 +221,9 @@ static void handle_tx(struct vhost_net *net)
   if (!sock)
   return;
  
 - wmem = atomic_read(sock-sk-sk_wmem_alloc);
 - if (wmem = sock-sk-sk_sndbuf) {
 - mutex_lock(vq-mutex);
 - tx_poll_start(net, sock);
 - mutex_unlock(vq-mutex);
 - return;
 - }
 -
   mutex_lock(vq-mutex);
   vhost_disable_notify(net-dev, vq);
  
 - if (wmem  sock-sk-sk_sndbuf / 2)
 - tx_poll_stop(net);
   hdr_size = vq-vhost_hlen;
   zcopy = vq-ubufs;
  
 @@ -285,23 +243,14 @@ static void handle_tx(struct vhost_net *net)
   if (head == vq-num) {
   int num_pends;
  
 - wmem = atomic_read(sock-sk-sk_wmem_alloc);
 - if (wmem = sock-sk-sk_sndbuf * 3 / 4) {
 - tx_poll_start(net, sock);
 - set_bit(SOCK_ASYNC_NOSPACE, sock-flags);
 - break;
 - }
   /* If more outstanding DMAs, queue the work.
* Handle upend_idx wrap around
*/
   num_pends = likely(vq-upend_idx =

Re: [PATCH v2 0/4] tcm_vhost fix cmd leak and send bad target

2013-04-11 Thread Asias He

On Thu, Apr 11, 2013 at 10:22:33AM +0300, Michael S. Tsirkin wrote:
 On Wed, Apr 10, 2013 at 02:19:02PM -0700, Nicholas A. Bellinger wrote:
  On Wed, 2013-04-10 at 15:06 +0800, Asias He wrote:
   v2:
   - Fix the order of out and head parameter.
   
   Asias He (4):
 tcm_vhost: Remove double check of response
 tcm_vhost: Fix tv_cmd leak in vhost_scsi_handle_vq
 tcm_vhost: Add vhost_scsi_send_bad_target() helper
 tcm_vhost: Send bad target to guest when cmd fails
   
drivers/vhost/tcm_vhost.c | 53 
   +--
1 file changed, 28 insertions(+), 25 deletions(-)
   
  
  Looks good.  MST, care to ACK for 3.9..?
  
  Thanks Asias!
  
  --nab
 
 Sounds like a reasonable thing to apply for 3.9.
 
 Acked-by: Michael S. Tsirkin m...@redhat.com

Thanks!

-- 
Asias
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] reply: reply: qemu crashed when starting vm(kvm) with vnc connect

2013-04-11 Thread Stefan Hajnoczi

On Mon, Apr 08, 2013 at 12:27:06PM +, Zhanghaoyu (A) wrote:
 On Sun, Apr 07, 2013 at 04:58:07AM +, Zhanghaoyu (A) wrote:
   I start a kvm VM with vnc(using the zrle protocol) connect, sometimes 
   qemu program crashed during starting period, received signal SIGABRT.
   Trying about 20 times, this crash may be reproduced.
   I guess the cause memory corruption or double free.
  
   Which version of QEMU are you running?
   
   Please try qemu.git/master.
   
   Stefan
  
  I used the QEMU download from qemu.git (http://git.qemu.org/git/qemu.git).
 
  Great, thanks!  Can you please post a backtrace?
  
  The easiest way is:
  
   $ ulimit -c unlimited
   $ qemu-system-x86_64 -enable-kvm -m 1024 ...
   ...crash...
   $ gdb -c qemu-system-x86_64.core
   (gdb) bt
  
  Depending on how your system is configured the core file might have a 
  different filename but there should be a file name *core* the current 
  working directory
 after the crash.
  
  The backtrace will make it possible to find out where the crash occurred.
  
  Thanks,
  Stefan
 
 backtrace from core file is shown as below:
 
 Program received signal SIGABRT, Aborted.
 0x7f32eda3dd95 in raise () from /lib64/libc.so.6
 (gdb) bt
 #0  0x7f32eda3dd95 in raise () from /lib64/libc.so.6
 #1  0x7f32eda3f2ab in abort () from /lib64/libc.so.6
 #2  0x7f32eda77ece in __libc_message () from /lib64/libc.so.6
 #3  0x7f32eda7dc06 in malloc_printerr () from /lib64/libc.so.6
 #4  0x7f32eda7ecda in _int_free () from /lib64/libc.so.6
 #5  0x7f32efd3452c in free_and_trace (mem=0x7f329cd0) at vl.c:2880
 #6  0x7f32efd251a1 in buffer_free (buffer=0x7f32f0c82890) at ui/vnc.c:505
 #7  0x7f32efd20c56 in vnc_zrle_clear (vs=0x7f32f0c762d0)
 at ui/vnc-enc-zrle.c:364
 #8  0x7f32efd26d07 in vnc_disconnect_finish (vs=0x7f32f0c762d0)
 at ui/vnc.c:1050
 #9  0x7f32efd275c5 in vnc_client_read (opaque=0x7f32f0c762d0)
 at ui/vnc.c:1349
 #10 0x7f32efcb397c in qemu_iohandler_poll (readfds=0x7f32f074d020,
 writefds=0x7f32f074d0a0, xfds=0x7f32f074d120, ret=1) at iohandler.c:124
 #11 0x7f32efcb46e8 in main_loop_wait (nonblocking=0) at main-loop.c:417
 #12 0x7f32efd31159 in main_loop () at vl.c:2133
 #13 0x7f32efd38070 in main (argc=46, argv=0x7fff7f5df178,
 envp=0x7fff7f5df2f0) at vl.c:4481

CCing Corentin and Gerd who are more familiar with the VNC code than me.

Stefan
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2/8 v3] KVM: PPC: e500: Expose MMU registers via ONE_REG

2013-04-11 Thread Mihai Caraman

MMU registers were exposed to user-space using sregs interface. Add them
to ONE_REG interface using kvmppc_get_one_reg/kvmppc_set_one_reg delegation
mechanism.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v3:
 - Fix case breaks
 
v2:
 - Restrict set_one_reg operation for MMU registers to HW values

 Documentation/virtual/kvm/api.txt   |   11 
 arch/powerpc/include/uapi/asm/kvm.h |   17 ++
 arch/powerpc/kvm/e500.c |6 ++-
 arch/powerpc/kvm/e500.h |4 ++
 arch/powerpc/kvm/e500_mmu.c |   94 +++
 arch/powerpc/kvm/e500mc.c   |6 ++-
 6 files changed, 134 insertions(+), 4 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 976eb65..1a76663 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1792,6 +1792,17 @@ registers, find a list below:
   PPC   | KVM_REG_PPC_TSR  | 32
   PPC   | KVM_REG_PPC_OR_TSR   | 32
   PPC   | KVM_REG_PPC_CLEAR_TSR| 32
+  PPC   | KVM_REG_PPC_MAS0 | 32
+  PPC   | KVM_REG_PPC_MAS1 | 32
+  PPC   | KVM_REG_PPC_MAS2 | 64
+  PPC   | KVM_REG_PPC_MAS7_3   | 64
+  PPC   | KVM_REG_PPC_MAS4 | 32
+  PPC   | KVM_REG_PPC_MAS6 | 32
+  PPC   | KVM_REG_PPC_MMUCFG   | 32
+  PPC   | KVM_REG_PPC_TLB0CFG  | 32
+  PPC   | KVM_REG_PPC_TLB1CFG  | 32
+  PPC   | KVM_REG_PPC_TLB2CFG  | 32
+  PPC   | KVM_REG_PPC_TLB3CFG  | 32
 
 ARM registers are mapped using the lower 32 bits.  The upper 16 of that
 is the register group type, or coprocessor number:
diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index c2ff99c..93d063f 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -426,4 +426,21 @@ struct kvm_get_htab_header {
 /* Debugging: Special instruction for software breakpoint */
 #define KVM_REG_PPC_DEBUG_INST (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8b)
 
+/* MMU registers */
+#define KVM_REG_PPC_MAS0   (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8c)
+#define KVM_REG_PPC_MAS1   (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8d)
+#define KVM_REG_PPC_MAS2   (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8e)
+#define KVM_REG_PPC_MAS7_3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8f)
+#define KVM_REG_PPC_MAS4   (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x90)
+#define KVM_REG_PPC_MAS6   (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x91)
+#define KVM_REG_PPC_MMUCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x92)
+/*
+ * TLBnCFG fields TLBnCFG_N_ENTRY and TLBnCFG_ASSOC can be changed only using
+ * KVM_CAP_SW_TLB ioctl
+ */
+#define KVM_REG_PPC_TLB0CFG(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x93)
+#define KVM_REG_PPC_TLB1CFG(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x94)
+#define KVM_REG_PPC_TLB2CFG(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x95)
+#define KVM_REG_PPC_TLB3CFG(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x96)
+
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 576010f..ce6b73c 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -428,13 +428,15 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct 
kvm_sregs *sregs)
 int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
union kvmppc_one_reg *val)
 {
-   return -EINVAL;
+   int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
+   return r;
 }
 
 int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
   union kvmppc_one_reg *val)
 {
-   return -EINVAL;
+   int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
+   return r;
 }
 
 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index 33db48a..b73ca7a 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -131,6 +131,10 @@ void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 
*vcpu_e500);
 void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 
+int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
+   union kvmppc_one_reg *val);
+int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
+  union kvmppc_one_reg *val);
 
 #ifdef CONFIG_KVM_E500V2
 unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index 5c44759..44f7762 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -596,6 +596,100 @@ int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, 
struct kvm_sregs *sregs)
return 0;
 }
 
+int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
+   union kvmppc_one_reg *val)
+{
+   int r = 0;
+   long int i;
+
+   switch (id) {
+   case KVM_REG_PPC_MAS0:
+   *val =

[PATCH 3/8 v3] KVM: PPC: e500: Move vcpu's MMU configuration to dedicated functions

2013-04-11 Thread Mihai Caraman

Vcpu's MMU default configuration and geometry update logic was buried in
a chunk of code. Move them to dedicated functions to add more clarity.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v3:
 - No change

v2:
 - Add better patch description

 arch/powerpc/kvm/e500_mmu.c |   60 +++---
 1 files changed, 38 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index 44f7762..08a5b0d 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -690,6 +690,20 @@ int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 
id,
return r;
 }
 
+static int vcpu_mmu_geometry_update(struct kvm_vcpu *vcpu,
+   struct kvm_book3e_206_tlb_params *params)
+{
+   vcpu-arch.tlbcfg[0] = ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+   if (params-tlb_sizes[0] = 2048)
+   vcpu-arch.tlbcfg[0] |= params-tlb_sizes[0];
+   vcpu-arch.tlbcfg[0] |= params-tlb_ways[0]  TLBnCFG_ASSOC_SHIFT;
+
+   vcpu-arch.tlbcfg[1] = ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+   vcpu-arch.tlbcfg[1] |= params-tlb_sizes[1];
+   vcpu-arch.tlbcfg[1] |= params-tlb_ways[1]  TLBnCFG_ASSOC_SHIFT;
+   return 0;
+}
+
 int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
  struct kvm_config_tlb *cfg)
 {
@@ -786,16 +800,8 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
vcpu_e500-gtlb_offset[0] = 0;
vcpu_e500-gtlb_offset[1] = params.tlb_sizes[0];
 
-   vcpu-arch.mmucfg = mfspr(SPRN_MMUCFG)  ~MMUCFG_LPIDSIZE;
-
-   vcpu-arch.tlbcfg[0] = ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
-   if (params.tlb_sizes[0] = 2048)
-   vcpu-arch.tlbcfg[0] |= params.tlb_sizes[0];
-   vcpu-arch.tlbcfg[0] |= params.tlb_ways[0]  TLBnCFG_ASSOC_SHIFT;
-
-   vcpu-arch.tlbcfg[1] = ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
-   vcpu-arch.tlbcfg[1] |= params.tlb_sizes[1];
-   vcpu-arch.tlbcfg[1] |= params.tlb_ways[1]  TLBnCFG_ASSOC_SHIFT;
+   /* Update vcpu's MMU geometry based on SW_TLB input */
+   vcpu_mmu_geometry_update(vcpu, params);
 
vcpu_e500-shared_tlb_pages = pages;
vcpu_e500-num_shared_tlb_pages = num_pages;
@@ -831,6 +837,27 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
return 0;
 }
 
+/* Vcpu's MMU default configuration */
+static int vcpu_mmu_init(struct kvm_vcpu *vcpu,
+  struct kvmppc_e500_tlb_params *params)
+{
+   /* Initialize RASIZE, PIDSIZE, NTLBS and MAVN fields with host values*/
+   vcpu-arch.mmucfg = mfspr(SPRN_MMUCFG)  ~MMUCFG_LPIDSIZE;
+
+   /* Initialize TLBnCFG fields with host values and SW_TLB geometry*/
+   vcpu-arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) 
+~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+   vcpu-arch.tlbcfg[0] |= params[0].entries;
+   vcpu-arch.tlbcfg[0] |= params[0].ways  TLBnCFG_ASSOC_SHIFT;
+
+   vcpu-arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) 
+~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+   vcpu-arch.tlbcfg[1] |= params[1].entries;
+   vcpu-arch.tlbcfg[1] |= params[1].ways  TLBnCFG_ASSOC_SHIFT;
+
+   return 0;
+}
+
 int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
struct kvm_vcpu *vcpu = vcpu_e500-vcpu;
@@ -875,18 +902,7 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 
*vcpu_e500)
if (!vcpu_e500-g2h_tlb1_map)
goto err;
 
-   /* Init TLB configuration register */
-   vcpu-arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) 
-~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
-   vcpu-arch.tlbcfg[0] |= vcpu_e500-gtlb_params[0].entries;
-   vcpu-arch.tlbcfg[0] |=
-   vcpu_e500-gtlb_params[0].ways  TLBnCFG_ASSOC_SHIFT;
-
-   vcpu-arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) 
-~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
-   vcpu-arch.tlbcfg[1] |= vcpu_e500-gtlb_params[1].entries;
-   vcpu-arch.tlbcfg[1] |=
-   vcpu_e500-gtlb_params[1].ways  TLBnCFG_ASSOC_SHIFT;
+   vcpu_mmu_init(vcpu, vcpu_e500-gtlb_params);
 
kvmppc_recalc_tlb1map_range(vcpu_e500);
return 0;
-- 
1.7.4.1


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 6/8 v3] KVM: PPC: e500: Remove E.PT and E.HV.LRAT categories from VCPUs

2013-04-11 Thread Mihai Caraman

Embedded.Page Table (E.PT) category is not supported yet in e6500 kernel.
Configure TLBnCFG to remove E.PT and E.HV.LRAT categories from VCPUs.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v3:
 - No change

 arch/powerpc/kvm/e500_mmu.c |4 
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index 1c1c5cb..c41a5a9 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -885,8 +885,12 @@ static int vcpu_mmu_init(struct kvm_vcpu *vcpu,
vcpu-arch.tlbps[0] = mfspr(SPRN_TLB0PS);
vcpu-arch.tlbps[1] = mfspr(SPRN_TLB1PS);
 
+   vcpu-arch.mmucfg = ~MMUCFG_LRAT;
+
/* Guest mmu emulation currently doesn't handle E.PT */
vcpu-arch.eptcfg = 0;
+   vcpu-arch.tlbcfg[0] = ~TLBnCFG_PT;
+   vcpu-arch.tlbcfg[1] = ~TLBnCFG_IND;
}
 
return 0;
-- 
1.7.4.1


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 4/8 v3] KVM: PPC: e500: Add support for TLBnPS registers

2013-04-11 Thread Mihai Caraman

Add support for TLBnPS registers available in MMU Architecture Version
(MAV) 2.0.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v3:
 - Add vcpu_ftr enum
 
v2:
 - Add vcpu generic function has_feature()

 Documentation/virtual/kvm/api.txt   |4 
 arch/powerpc/include/asm/kvm_host.h |1 +
 arch/powerpc/include/uapi/asm/kvm.h |4 
 arch/powerpc/kvm/e500.h |   18 ++
 arch/powerpc/kvm/e500_emulate.c |   10 ++
 arch/powerpc/kvm/e500_mmu.c |   22 ++
 6 files changed, 59 insertions(+), 0 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 1a76663..f045377 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1803,6 +1803,10 @@ registers, find a list below:
   PPC   | KVM_REG_PPC_TLB1CFG  | 32
   PPC   | KVM_REG_PPC_TLB2CFG  | 32
   PPC   | KVM_REG_PPC_TLB3CFG  | 32
+  PPC   | KVM_REG_PPC_TLB0PS   | 32
+  PPC   | KVM_REG_PPC_TLB1PS   | 32
+  PPC   | KVM_REG_PPC_TLB2PS   | 32
+  PPC   | KVM_REG_PPC_TLB3PS   | 32
 
 ARM registers are mapped using the lower 32 bits.  The upper 16 of that
 is the register group type, or coprocessor number:
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index e34f8fe..3b6cee3 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -502,6 +502,7 @@ struct kvm_vcpu_arch {
spinlock_t wdt_lock;
struct timer_list wdt_timer;
u32 tlbcfg[4];
+   u32 tlbps[4];
u32 mmucfg;
u32 epr;
u32 crit_save;
diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index 93d063f..91341d9 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -442,5 +442,9 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_TLB1CFG(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x94)
 #define KVM_REG_PPC_TLB2CFG(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x95)
 #define KVM_REG_PPC_TLB3CFG(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x96)
+#define KVM_REG_PPC_TLB0PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x97)
+#define KVM_REG_PPC_TLB1PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x98)
+#define KVM_REG_PPC_TLB2PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x99)
+#define KVM_REG_PPC_TLB3PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a)
 
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index b73ca7a..c2e5e98 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -23,6 +23,10 @@
 #include asm/mmu-book3e.h
 #include asm/tlb.h
 
+enum vcpu_ftr {
+   VCPU_FTR_MMU_V2
+};
+
 #define E500_PID_NUM   3
 #define E500_TLB_NUM   2
 
@@ -299,4 +303,18 @@ static inline unsigned int get_tlbmiss_tid(struct kvm_vcpu 
*vcpu)
 #define get_tlb_sts(gtlbe)  (MAS1_TS)
 #endif /* !BOOKE_HV */
 
+static inline bool has_feature(const struct kvm_vcpu *vcpu,
+  enum vcpu_ftr ftr)
+{
+   bool has_ftr;
+   switch (ftr) {
+   case VCPU_FTR_MMU_V2:
+   has_ftr = ((vcpu-arch.mmucfg  MMUCFG_MAVN) == MMUCFG_MAVN_V2);
+   break;
+   default:
+   return false;
+   }
+   return has_ftr;
+}
+
 #endif /* KVM_E500_H */
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index e78f353..12b8de2 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -284,6 +284,16 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int 
sprn, ulong *spr_val)
case SPRN_TLB1CFG:
*spr_val = vcpu-arch.tlbcfg[1];
break;
+   case SPRN_TLB0PS:
+   if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
+   return EMULATE_FAIL;
+   *spr_val = vcpu-arch.tlbps[0];
+   break;
+   case SPRN_TLB1PS:
+   if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
+   return EMULATE_FAIL;
+   *spr_val = vcpu-arch.tlbps[1];
+   break;
case SPRN_L1CSR0:
*spr_val = vcpu_e500-l1csr0;
break;
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index 08a5b0d..a863dc1 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -631,6 +631,13 @@ int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 
id,
i = id - KVM_REG_PPC_TLB0CFG;
*val = get_reg_val(id, vcpu-arch.tlbcfg[i]);
break;
+   case KVM_REG_PPC_TLB0PS:
+   case KVM_REG_PPC_TLB1PS:
+   case KVM_REG_PPC_TLB2PS:
+   case KVM_REG_PPC_TLB3PS:
+   i = id - KVM_REG_PPC_TLB0PS;
+   *val = get_reg_val(id, vcpu-arch.tlbps[i]);
+   break;
default:
r = -EINVAL;
break;
@@ -682,6 +689,16 @@ int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64

[PATCH 8/8 v3] KVM: PPC: e500: Add e6500 core to Kconfig description

2013-04-11 Thread Mihai Caraman

Add e6500 core to Kconfig description.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v3:
 - No change

 arch/powerpc/kvm/Kconfig |6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 63c67ec..4489520 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -136,15 +136,15 @@ config KVM_E500V2
  If unsure, say N.
 
 config KVM_E500MC
-   bool KVM support for PowerPC E500MC/E5500 processors
+   bool KVM support for PowerPC E500MC/E5500/E6500 processors
depends on PPC_E500MC
select KVM
select KVM_MMIO
select KVM_BOOKE_HV
select MMU_NOTIFIER
---help---
- Support running unmodified E500MC/E5500 (32-bit) guest kernels in
- virtual machines on E500MC/E5500 host processors.
+ Support running unmodified E500MC/E5500/E6500 guest kernels in
+ virtual machines on E500MC/E5500/E6500 host processors.
 
  This module provides access to the hardware capabilities through
  a character device node named /dev/kvm.
-- 
1.7.4.1


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 7/8 v3] KVM: PPC: e500mc: Enable e6500 cores

2013-04-11 Thread Mihai Caraman

Extend processor compatibility names to e6500 cores.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
Reviewed-by: Alexander Graf ag...@suse.de
---
v3:
 - No change

 arch/powerpc/kvm/e500mc.c |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index ab073a8..c3bdc0a 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -172,6 +172,8 @@ int kvmppc_core_check_processor_compat(void)
r = 0;
else if (strcmp(cur_cpu_spec-cpu_name, e5500) == 0)
r = 0;
+   else if (strcmp(cur_cpu_spec-cpu_name, e6500) == 0)
+   r = 0;
else
r = -ENOTSUPP;
 
-- 
1.7.4.1


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 5/8] KVM: PPC: e500: Add support for EPTCFG register

2013-04-11 Thread Mihai Caraman

EPTCFG register defined by E.PT is accessed unconditionally by Linux guests
in the presence of MAV 2.0. Emulate it now.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v3:
 - Initialize EPTCFG to 0 since E.PT is not supported now

 Documentation/virtual/kvm/api.txt   |1 +
 arch/powerpc/include/asm/kvm_host.h |1 +
 arch/powerpc/include/uapi/asm/kvm.h |1 +
 arch/powerpc/kvm/e500_emulate.c |9 +
 arch/powerpc/kvm/e500_mmu.c |   12 
 5 files changed, 24 insertions(+), 0 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index f045377..a1f2200 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1807,6 +1807,7 @@ registers, find a list below:
   PPC   | KVM_REG_PPC_TLB1PS   | 32
   PPC   | KVM_REG_PPC_TLB2PS   | 32
   PPC   | KVM_REG_PPC_TLB3PS   | 32
+  PPC   | KVM_REG_PPC_EPTCFG   | 32
 
 ARM registers are mapped using the lower 32 bits.  The upper 16 of that
 is the register group type, or coprocessor number:
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 3b6cee3..8a48e68 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -504,6 +504,7 @@ struct kvm_vcpu_arch {
u32 tlbcfg[4];
u32 tlbps[4];
u32 mmucfg;
+   u32 eptcfg;
u32 epr;
u32 crit_save;
struct kvmppc_booke_debug_reg dbg_reg;
diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index 91341d9..7f4d191 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -446,5 +446,6 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_TLB1PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x98)
 #define KVM_REG_PPC_TLB2PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x99)
 #define KVM_REG_PPC_TLB3PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a)
+#define KVM_REG_PPC_EPTCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b)
 
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index 12b8de2..b10a012 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -317,6 +317,15 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int 
sprn, ulong *spr_val)
case SPRN_MMUCFG:
*spr_val = vcpu-arch.mmucfg;
break;
+   case SPRN_EPTCFG:
+   if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
+   return EMULATE_FAIL;
+   /*
+* Legacy Linux guests access EPTCFG register even if the E.PT
+* category is disabled in the VM. Give them a chance to live.
+*/
+   *spr_val = vcpu-arch.eptcfg;
+   break;
 
/* extra exceptions */
case SPRN_IVOR32:
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index a863dc1..1c1c5cb 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -624,6 +624,9 @@ int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 
id,
case KVM_REG_PPC_MMUCFG:
*val = get_reg_val(id, vcpu-arch.mmucfg);
break;
+   case KVM_REG_PPC_EPTCFG:
+   *val = get_reg_val(id, vcpu-arch.eptcfg);
+   break;
case KVM_REG_PPC_TLB0CFG:
case KVM_REG_PPC_TLB1CFG:
case KVM_REG_PPC_TLB2CFG:
@@ -678,6 +681,12 @@ int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 
id,
r = -EINVAL;
break;
}
+   case KVM_REG_PPC_EPTCFG: {
+   u32 reg = set_reg_val(id, *val);
+   if (reg != vcpu-arch.eptcfg)
+   r = -EINVAL;
+   break;
+   }
case KVM_REG_PPC_TLB0CFG:
case KVM_REG_PPC_TLB1CFG:
case KVM_REG_PPC_TLB2CFG:
@@ -875,6 +884,9 @@ static int vcpu_mmu_init(struct kvm_vcpu *vcpu,
if (has_feature(vcpu, VCPU_FTR_MMU_V2)) {
vcpu-arch.tlbps[0] = mfspr(SPRN_TLB0PS);
vcpu-arch.tlbps[1] = mfspr(SPRN_TLB1PS);
+
+   /* Guest mmu emulation currently doesn't handle E.PT */
+   vcpu-arch.eptcfg = 0;
}
 
return 0;
-- 
1.7.4.1


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/8 v3] KVM: PPC: Book3E: Refactor ONE_REG ioctl implementation

2013-04-11 Thread Mihai Caraman

Refactor Book3E ONE_REG ioctl implementation to use kvmppc_get_one_reg/
kvmppc_set_one_reg delegation interface introduced by Book3S. This is
necessary for MMU SPRs which are platform specifics.

Get rid of useless case braces in the process.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v3:
 - Split ONE_REG ioctl refactoring in its own patch

 arch/powerpc/kvm/44x.c|   12 +
 arch/powerpc/kvm/booke.c  |  102 -
 arch/powerpc/kvm/e500.c   |   12 +
 arch/powerpc/kvm/e500mc.c |   12 +
 4 files changed, 91 insertions(+), 47 deletions(-)

diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 3d7fd21..2f5c6b6 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -124,6 +124,18 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct 
kvm_sregs *sregs)
return kvmppc_set_sregs_ivor(vcpu, sregs);
 }
 
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
+   union kvmppc_one_reg *val)
+{
+   return -EINVAL;
+}
+
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
+  union kvmppc_one_reg *val)
+{
+   return -EINVAL;
+}
+
 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 {
struct kvmppc_vcpu_44x *vcpu_44x;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index a49a68a..08f6540 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1412,117 +1412,125 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu 
*vcpu,
 
 int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 {
-   int r = -EINVAL;
+   int r = 0;
+   union kvmppc_one_reg val;
+   int size;
+   long int i;
+
+   size = one_reg_size(reg-id);
+   if (size  sizeof(val))
+   return -EINVAL;
 
switch (reg-id) {
case KVM_REG_PPC_IAC1:
case KVM_REG_PPC_IAC2:
case KVM_REG_PPC_IAC3:
-   case KVM_REG_PPC_IAC4: {
-   int iac = reg-id - KVM_REG_PPC_IAC1;
-   r = copy_to_user((u64 __user *)(long)reg-addr,
-vcpu-arch.dbg_reg.iac[iac], sizeof(u64));
+   case KVM_REG_PPC_IAC4:
+   i = reg-id - KVM_REG_PPC_IAC1;
+   val = get_reg_val(reg-id, vcpu-arch.dbg_reg.iac[i]);
break;
-   }
case KVM_REG_PPC_DAC1:
-   case KVM_REG_PPC_DAC2: {
-   int dac = reg-id - KVM_REG_PPC_DAC1;
-   r = copy_to_user((u64 __user *)(long)reg-addr,
-vcpu-arch.dbg_reg.dac[dac], sizeof(u64));
+   case KVM_REG_PPC_DAC2:
+   i = reg-id - KVM_REG_PPC_DAC1;
+   val = get_reg_val(reg-id, vcpu-arch.dbg_reg.dac[i]);
break;
-   }
case KVM_REG_PPC_EPR: {
u32 epr = get_guest_epr(vcpu);
-   r = put_user(epr, (u32 __user *)(long)reg-addr);
+   val = get_reg_val(reg-id, epr);
break;
}
 #if defined(CONFIG_64BIT)
case KVM_REG_PPC_EPCR:
-   r = put_user(vcpu-arch.epcr, (u32 __user *)(long)reg-addr);
+   val = get_reg_val(reg-id, vcpu-arch.epcr);
break;
 #endif
case KVM_REG_PPC_TCR:
-   r = put_user(vcpu-arch.tcr, (u32 __user *)(long)reg-addr);
+   val = get_reg_val(reg-id, vcpu-arch.tcr);
break;
case KVM_REG_PPC_TSR:
-   r = put_user(vcpu-arch.tsr, (u32 __user *)(long)reg-addr);
+   val = get_reg_val(reg-id, vcpu-arch.tsr);
break;
-   case KVM_REG_PPC_DEBUG_INST: {
-   u32 opcode = KVMPPC_INST_EHPRIV;
-   r = copy_to_user((u32 __user *)(long)reg-addr,
-opcode, sizeof(u32));
+   case KVM_REG_PPC_DEBUG_INST:
+   val = get_reg_val(reg-id, KVMPPC_INST_EHPRIV);
break;
-   }
default:
+   r = kvmppc_get_one_reg(vcpu, reg-id, val);
break;
}
+
+   if (r)
+   return r;
+
+   if (copy_to_user((char __user *)(unsigned long)reg-addr, val, size))
+   r = -EFAULT;
+
return r;
 }
 
 int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 {
-   int r = -EINVAL;
+   int r = 0;
+   union kvmppc_one_reg val;
+   int size;
+   long int i;
+
+   size = one_reg_size(reg-id);
+   if (size  sizeof(val))
+   return -EINVAL;
+
+   if (copy_from_user(val, (char __user *)(unsigned long)reg-addr, size))
+   return -EFAULT;
 
switch (reg-id) {
case KVM_REG_PPC_IAC1:
case KVM_REG_PPC_IAC2:
case KVM_REG_PPC_IAC3:
-   case KVM_REG_PPC_IAC4: {
-   int iac = reg-id - KVM_REG_PPC_IAC1;
-   r = copy_from_user(vcpu-arch.dbg_reg.iac[iac],
-(u64 __user

[PATCH 0/8 v3] KVM: PPC: e500: Enable FSL e6500 core

2013-04-11 Thread Mihai Caraman

Enable basic support for Freescale e6500 core, adding MAV 2.0 support.
Validated on T4240QDS platfrom. Altivec, Multithreading and HW Tablewalk
are not addressed by this patchset.

Mihai Caraman (8):
  KVM: PPC: Book3E: Refactor ONE_REG ioctl implementation
  KVM: PPC: e500: Expose MMU registers via ONE_REG
  KVM: PPC: e500: Move vcpu's MMU configuration to dedicated functions
  KVM: PPC: e500: Add support for TLBnPS registers
  KVM: PPC: e500: Add support for EPTCFG register
  KVM: PPC: e500: Remove E.PT and E.HV.LRAT categories from VCPUs
  KVM: PPC: e500mc: Enable e6500 cores
  KVM: PPC: e500: Add e6500 core to Kconfig description

 Documentation/virtual/kvm/api.txt   |   16 +++
 arch/powerpc/include/asm/kvm_host.h |2 +
 arch/powerpc/include/uapi/asm/kvm.h |   22 
 arch/powerpc/kvm/44x.c  |   12 ++
 arch/powerpc/kvm/Kconfig|6 +-
 arch/powerpc/kvm/booke.c|  102 ++-
 arch/powerpc/kvm/e500.c |   14 +++
 arch/powerpc/kvm/e500.h |   22 
 arch/powerpc/kvm/e500_emulate.c |   19 
 arch/powerpc/kvm/e500_mmu.c |  192 +++
 arch/powerpc/kvm/e500mc.c   |   16 +++
 11 files changed, 351 insertions(+), 72 deletions(-)

-- 
1.7.4.1


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 3/4] KVM: emulator: Do not fail on emulation of undefined opcode

2013-04-11 Thread Gleb Natapov

Emulation of undefined opcode should inject #UD instead of causing
emulation failure. Do that by moving Undefined flag check to emulation
stage and injection #UD there.

Signed-off-by: Gleb Natapov g...@redhat.com
---
 arch/x86/kvm/emulate.c |5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index c2b7f33..2f66e98 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4374,7 +4374,7 @@ done_prefixes:
ctxt-intercept = opcode.intercept;
 
/* Unrecognised? */
-   if ((ctxt-d  NotImpl) || (ctxt-d  Undefined))
+   if ((ctxt-d  NotImpl))
return EMULATION_FAILED;
 
if (!(ctxt-d  VendorSpecific)  ctxt-only_vendor_specific_insn)
@@ -4512,7 +4512,8 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 
ctxt-mem_read.pos = 0;
 
-   if (ctxt-mode == X86EMUL_MODE_PROT64  (ctxt-d  No64)) {
+   if ((ctxt-mode == X86EMUL_MODE_PROT64  (ctxt-d  No64)) ||
+   (ctxt-d  Undefined)) {
rc = emulate_ud(ctxt);
goto done;
}
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 4/4] KVM: emulator: mark 0xff 0x7d opcode as undefined.

2013-04-11 Thread Gleb Natapov


Signed-off-by: Gleb Natapov g...@redhat.com
---
 arch/x86/kvm/emulate.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 2f66e98..5a44d7f 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3714,7 +3714,7 @@ static const struct opcode group5[] = {
I(SrcMemFAddr | ImplicitOps | Stack,em_call_far),
I(SrcMem | Stack,   em_grp45),
I(SrcMemFAddr | ImplicitOps,em_grp45),
-   I(SrcMem | Stack,   em_grp45), N,
+   I(SrcMem | Stack,   em_grp45), D(Undefined),
 };
 
 static const struct opcode group6[] = {
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/4] KVM: emulator: fix unimplemented instruction detection

2013-04-11 Thread Gleb Natapov

Unimplemented instruction detection is broken for group instructions
since it relies on flags field of opcode to be zero, but all
instructions in a group inherit flags from a group encoding. Fix that by
having a separate flag for unimplemented instructions.

Signed-off-by: Gleb Natapov g...@redhat.com
---
 arch/x86/kvm/emulate.c |7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a335cc6..c2b7f33 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -132,8 +132,9 @@
 #define Priv(127) /* instruction generates #GP if current CPL != 0 */
 #define No64   (128)
 #define PageTable   (1  29)   /* instruction used to write page table */
+#define NotImpl (1  30)   /* instruction is not implemented */
 /* Source 2 operand type */
-#define Src2Shift   (30)
+#define Src2Shift   (31)
 #define Src2None(OpNone  Src2Shift)
 #define Src2CL  (OpCL  Src2Shift)
 #define Src2ImmByte (OpImmByte  Src2Shift)
@@ -3615,7 +3616,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
 #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i }
 #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \
  .check_perm = (_p) }
-#define ND(0)
+#define ND(NotImpl)
 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
@@ -4373,7 +4374,7 @@ done_prefixes:
ctxt-intercept = opcode.intercept;
 
/* Unrecognised? */
-   if (ctxt-d == 0 || (ctxt-d  Undefined))
+   if ((ctxt-d  NotImpl) || (ctxt-d  Undefined))
return EMULATION_FAILED;
 
if (!(ctxt-d  VendorSpecific)  ctxt-only_vendor_specific_insn)
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2/4] KVM: VMX: do not try to reexecute failed instruction while emulating invalid guest state

2013-04-11 Thread Gleb Natapov

During invalid guest state emulation vcpu cannot enter guest mode to try
to reexecute instruction that emulator failed to emulate, so emulation
will happen again and again.  Prevent that by telling the emulator that
instruction reexecution should not be attempted.

Signed-off-by: Gleb Natapov g...@redhat.com
---
 arch/x86/include/asm/kvm_host.h |1 +
 arch/x86/kvm/vmx.c  |2 +-
 arch/x86/kvm/x86.c  |   13 +
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b2c7263..82f1dc6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -795,6 +795,7 @@ enum emulation_result {
 #define EMULTYPE_TRAP_UD   (1  1)
 #define EMULTYPE_SKIP  (1  2)
 #define EMULTYPE_RETRY (1  3)
+#define EMULTYPE_NO_REEXECUTE  (1  4)
 int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
int emulation_type, void *insn, int insn_len);
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 669b803..d268677 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5189,7 +5189,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu 
*vcpu)
if (test_bit(KVM_REQ_EVENT, vcpu-requests))
return 1;
 
-   err = emulate_instruction(vcpu, 0);
+   err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE);
 
if (err == EMULATE_DO_MMIO) {
ret = 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index eb9927e..999d124 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4765,11 +4765,15 @@ static int handle_emulation_failure(struct kvm_vcpu 
*vcpu)
 }
 
 static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
- bool write_fault_to_shadow_pgtable)
+ bool write_fault_to_shadow_pgtable,
+ int emulation_type)
 {
gpa_t gpa = cr2;
pfn_t pfn;
 
+   if (emulation_type  EMULTYPE_NO_REEXECUTE)
+   return false;
+
if (!vcpu-arch.mmu.direct_map) {
/*
 * Write permission should be allowed since only
@@ -4912,8 +4916,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
if (r != EMULATION_OK)  {
if (emulation_type  EMULTYPE_TRAP_UD)
return EMULATE_FAIL;
-   if (reexecute_instruction(vcpu, cr2,
- write_fault_to_spt))
+   if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
+   emulation_type))
return EMULATE_DONE;
if (emulation_type  EMULTYPE_SKIP)
return EMULATE_FAIL;
@@ -4943,7 +4947,8 @@ restart:
return EMULATE_DONE;
 
if (r == EMULATION_FAILED) {
-   if (reexecute_instruction(vcpu, cr2, write_fault_to_spt))
+   if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
+   emulation_type))
return EMULATE_DONE;
 
return handle_emulation_failure(vcpu);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] tcm_vhost: Wait for pending requests in vhost_scsi_flush()

2013-04-11 Thread Michael S. Tsirkin

On Tue, Apr 09, 2013 at 05:39:43PM +0800, Asias He wrote:
 This patch makes vhost_scsi_flush() wait for all the pending requests
 issued before the flush operation to be finished.
 
 Changes in v3:
 - Rebase
 - Drop 'tcm_vhost: Wait for pending requests in
   vhost_scsi_clear_endpoint()' in this series, we already did that in
   'tcm_vhost: Use vq-private_data to indicate if the endpoint is setup'
 
 Changes in v2:
 - Increase/Decrease inflight requests in
   vhost_scsi_{allocate,free}_cmd and tcm_vhost_{allocate,free}_evt
 
 Signed-off-by: Asias He as...@redhat.com

Nack, let's not do this home-grown here.  Please use a kref.

The array of two trick is also too tricky for my taste.

Please replace during_flush in tcm_vhost_cmd and tcm_vhost_evt
by a kref pointer, allocate a new kref when you flush.

Access can be done with RCU so we won't need any locks.

 ---
  drivers/vhost/tcm_vhost.c | 88 
 ---
  drivers/vhost/tcm_vhost.h |  4 +++
  2 files changed, 87 insertions(+), 5 deletions(-)
 
 diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
 index 1f9116c..719ce13 100644
 --- a/drivers/vhost/tcm_vhost.c
 +++ b/drivers/vhost/tcm_vhost.c
 @@ -91,6 +91,15 @@ struct vhost_scsi {
   struct mutex vs_events_lock; /* protect vs_events_dropped,events_nr */
   bool vs_events_dropped; /* any missed events */
   int vs_events_nr; /* num of pending events */
 +
 + /*
 +  * vs_inflight[0]/[1] are used to track requests issued
 +  * before/during the flush operation
 +  */
 + u64 vs_inflight[2];
 + wait_queue_head_t vs_flush_wait; /* wait queue for flush operation */
 + spinlock_t vs_flush_lock; /* lock to protect vs_during_flush */
 + int vs_during_flush; /* flag to indicate if we are in flush operation */
  };
  
  /* Local pointer to allocated TCM configfs fabric module */
 @@ -108,6 +117,46 @@ static int iov_num_pages(struct iovec *iov)
  ((unsigned long)iov-iov_base  PAGE_MASK))  PAGE_SHIFT;
  }
  
 +static int tcm_vhost_inc_inflight(struct vhost_scsi *vs)
 +{
 + int during_flush;
 +
 + spin_lock(vs-vs_flush_lock);
 + during_flush = vs-vs_during_flush;
 + vs-vs_inflight[during_flush]++;
 + spin_unlock(vs-vs_flush_lock);
 +
 + return during_flush;
 +}
 +
 +static void tcm_vhost_dec_inflight(struct vhost_scsi *vs, int during_flush)
 +{
 + u64 inflight;
 +
 + spin_lock(vs-vs_flush_lock);
 + inflight = vs-vs_inflight[during_flush]--;
 + /*
 +  * Wakeup the waiter when all the requests issued before the flush
 +  * operation are finished and we are during the flush operation.
 +  */
 + if (!inflight  !during_flush  vs-vs_during_flush)
 + wake_up(vs-vs_flush_wait);
 + spin_unlock(vs-vs_flush_lock);
 +}
 +
 +static bool tcm_vhost_done_inflight(struct vhost_scsi *vs)
 +{
 + bool ret = false;
 +
 + /* The requests issued before the flush operation are finished ? */
 + spin_lock(vs-vs_flush_lock);
 + if (!vs-vs_inflight[0])
 + ret = true;
 + spin_unlock(vs-vs_flush_lock);
 +
 + return ret;
 +}
 +
  static bool tcm_vhost_check_feature(struct vhost_scsi *vs, int feature)
  {
   bool ret = false;
 @@ -402,6 +451,7 @@ static int tcm_vhost_queue_tm_rsp(struct se_cmd *se_cmd)
  static void tcm_vhost_free_evt(struct vhost_scsi *vs, struct tcm_vhost_evt 
 *evt)
  {
   mutex_lock(vs-vs_events_lock);
 + tcm_vhost_dec_inflight(vs, evt-during_flush);
   vs-vs_events_nr--;
   kfree(evt);
   mutex_unlock(vs-vs_events_lock);
 @@ -423,6 +473,7 @@ static struct tcm_vhost_evt 
 *tcm_vhost_allocate_evt(struct vhost_scsi *vs,
   if (evt) {
   evt-event.event = event;
   evt-event.reason = reason;
 + evt-during_flush = tcm_vhost_inc_inflight(vs);
   vs-vs_events_nr++;
   }
   mutex_unlock(vs-vs_events_lock);
 @@ -433,6 +484,7 @@ static struct tcm_vhost_evt 
 *tcm_vhost_allocate_evt(struct vhost_scsi *vs,
  static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd)
  {
   struct se_cmd *se_cmd = tv_cmd-tvc_se_cmd;
 + struct vhost_scsi *vs = tv_cmd-tvc_vhost;
  
   /* TODO locking against target/backend threads? */
   transport_generic_free_cmd(se_cmd, 1);
 @@ -445,13 +497,16 @@ static void vhost_scsi_free_cmd(struct tcm_vhost_cmd 
 *tv_cmd)
   kfree(tv_cmd-tvc_sgl);
   }
  
 + tcm_vhost_dec_inflight(vs, tv_cmd-during_flush);
 +
   kfree(tv_cmd);
  }
  
  static void tcm_vhost_do_evt_work(struct vhost_scsi *vs,
 - struct virtio_scsi_event *event)
 + struct tcm_vhost_evt *evt)
  {
   struct vhost_virtqueue *vq = vs-vqs[VHOST_SCSI_VQ_EVT];
 + struct virtio_scsi_event *event = evt-event;
   struct virtio_scsi_event __user *eventp;
   unsigned out, in;
   int head, ret;
 @@ -511,7 +566,7 @@ static void tcm_vhost_evt_work(struct vhost_work *work)
   while (llnode) {

Re: [PATCH v3 4/5] KVM: nVMX: Fix conditions for interrupt injection

2013-04-11 Thread Gleb Natapov

On Sun, Mar 24, 2013 at 07:44:47PM +0100, Jan Kiszka wrote:
 From: Jan Kiszka jan.kis...@siemens.com
 
 If we are in guest mode, L0 can only inject events into L2 if L1 has
 nothing pending. Otherwise, L0 would overwrite L1's events and they
 would get lost. But even if no injection of L1 is pending, we do not
 want L0 to interrupt unnecessarily an on going vmentry with all its side
 effects on the vmcs. Therefore, injection shall be disallowed during
 L1-L2 transitions. This check is conceptually independent of
 nested_exit_on_intr.
 
 If L1 traps external interrupts, then we also need to look at L1's
 idt_vectoring_info_field. If it is empty, we can kick the guest from L2
 to L1, just like the previous code worked.
 
 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---
  arch/x86/kvm/vmx.c |   28 
  1 files changed, 20 insertions(+), 8 deletions(-)
 
 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
 index d1bc834..30aa198 100644
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -4325,16 +4325,28 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
  
  static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
  {
 - if (is_guest_mode(vcpu)  nested_exit_on_intr(vcpu)) {
 + if (is_guest_mode(vcpu)) {
   struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 - if (to_vmx(vcpu)-nested.nested_run_pending ||
 - (vmcs12-idt_vectoring_info_field 
 -  VECTORING_INFO_VALID_MASK))
 +
 + if (to_vmx(vcpu)-nested.nested_run_pending)
   return 0;
 - nested_vmx_vmexit(vcpu);
 - vmcs12-vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT;
 - vmcs12-vm_exit_intr_info = 0;
 - /* fall through to normal code, but now in L1, not L2 */
 + if (nested_exit_on_intr(vcpu)) {
 + /*
 +  * Check if the idt_vectoring_info_field is free. We
 +  * cannot raise EXIT_REASON_EXTERNAL_INTERRUPT if it
 +  * isn't.
 +  */
 + if (vmcs12-idt_vectoring_info_field 
 + VECTORING_INFO_VALID_MASK)
 + return 0;
After patch 2 I do not see how this can be true. Now this case is
handled by the common code: since event queue is not empty the code will not
get here.

 + nested_vmx_vmexit(vcpu);
 + vmcs12-vm_exit_reason =
 + EXIT_REASON_EXTERNAL_INTERRUPT;
 + vmcs12-vm_exit_intr_info = 0;
 + /*
 +  * fall through to normal code, but now in L1, not L2
 +  */
 + }
   }
  
   return (vmcs_readl(GUEST_RFLAGS)  X86_EFLAGS_IF) 
 -- 
 1.7.3.4

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v3 2/5] KVM: nVMX: Rework event injection and recovery

2013-04-11 Thread Gleb Natapov

On Sun, Mar 24, 2013 at 07:44:45PM +0100, Jan Kiszka wrote:
 From: Jan Kiszka jan.kis...@siemens.com
 
 The basic idea is to always transfer the pending event injection on
 vmexit into the architectural state of the VCPU and then drop it from
 there if it turns out that we left L2 to enter L1, i.e. if we enter
 prepare_vmcs12.
 
 vmcs12_save_pending_events takes care to transfer pending L0 events into
 the queue of L1. That is mandatory as L1 may decide to switch the guest
 state completely, invalidating or preserving the pending events for
 later injection (including on a different node, once we support
 migration).
 
 This concept is based on the rule that a pending vmlaunch/vmresume is
 not canceled. Otherwise, we would risk to lose injected events or leak
 them into the wrong queues. Encode this rule via a WARN_ON_ONCE at the
 entry of nested_vmx_vmexit.
 
 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---
  arch/x86/kvm/vmx.c |   90 +--
  1 files changed, 58 insertions(+), 32 deletions(-)
 
 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
 index 8827b3b..9d9ff74 100644
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -6493,8 +6493,6 @@ static void __vmx_complete_interrupts(struct kvm_vcpu 
 *vcpu,
  
  static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
  {
 - if (is_guest_mode(vmx-vcpu))
 - return;
   __vmx_complete_interrupts(vmx-vcpu, vmx-idt_vectoring_info,
 VM_EXIT_INSTRUCTION_LEN,
 IDT_VECTORING_ERROR_CODE);
 @@ -6502,8 +6500,6 @@ static void vmx_complete_interrupts(struct vcpu_vmx 
 *vmx)
  
  static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
  {
 - if (is_guest_mode(vcpu))
 - return;
   __vmx_complete_interrupts(vcpu,
 vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
 VM_ENTRY_INSTRUCTION_LEN,
 @@ -6535,21 +6531,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu 
 *vcpu)
   struct vcpu_vmx *vmx = to_vmx(vcpu);
   unsigned long debugctlmsr;
  
 - if (is_guest_mode(vcpu)  !vmx-nested.nested_run_pending) {
 - struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 - if (vmcs12-idt_vectoring_info_field 
 - VECTORING_INFO_VALID_MASK) {
 - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
 - vmcs12-idt_vectoring_info_field);
 - vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
 - vmcs12-vm_exit_instruction_len);
 - if (vmcs12-idt_vectoring_info_field 
 - VECTORING_INFO_DELIVER_CODE_MASK)
 - vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
 - vmcs12-idt_vectoring_error_code);
 - }
 - }
 -
   /* Record the guest's net vcpu time for enforced NMI injections. */
   if (unlikely(!cpu_has_virtual_nmis()  vmx-soft_vnmi_blocked))
   vmx-entry_time = ktime_get();
 @@ -6708,17 +6689,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu 
 *vcpu)
  
   vmx-idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
  
 - if (is_guest_mode(vcpu)) {
 - struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 - vmcs12-idt_vectoring_info_field = vmx-idt_vectoring_info;
 - if (vmx-idt_vectoring_info  VECTORING_INFO_VALID_MASK) {
 - vmcs12-idt_vectoring_error_code =
 - vmcs_read32(IDT_VECTORING_ERROR_CODE);
 - vmcs12-vm_exit_instruction_len =
 - vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
 - }
 - }
 -
   vmx-loaded_vmcs-launched = 1;
  
   vmx-exit_reason = vmcs_read32(VM_EXIT_REASON);
 @@ -7325,6 +7295,48 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 
 *vmcs12)
   vcpu-arch.cr4_guest_owned_bits));
  }
  
 +static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
 +struct vmcs12 *vmcs12)
 +{
 + u32 idt_vectoring;
 + unsigned int nr;
 +
 + if (vcpu-arch.exception.pending) {
 + nr = vcpu-arch.exception.nr;
 + idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
 +
 + if (kvm_exception_is_soft(nr)) {
 + vmcs12-vm_exit_instruction_len =
 + vcpu-arch.event_exit_inst_len;
 + idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
 + } else
 + idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
 +
 + if (vcpu-arch.exception.has_error_code) {
 + idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
 + vmcs12-idt_vectoring_error_code =
 + vcpu-arch.exception.error_code;
 + }
 +
 +

[PATCH v10 0/7] Use eoi to track RTC interrupt delivery status

2013-04-11 Thread Yang Zhang

From: Yang Zhang yang.z.zh...@intel.com

Current interrupt coalescing logci which only used by RTC has conflict
with Posted Interrupt.

This patch introduces a new mechinism to use eoi to track interrupt:
When delivering an interrupt to vcpu, the pending_eoi set to number of
vcpu that received the interrupt. And decrease it when each vcpu writing
eoi. No subsequent RTC interrupt can deliver to vcpu until all vcpus
write eoi.

Changes from v9 to v10
* Remove irq check and line_status check from rtc_irq_check_coalesced() to
  kvm_ioapic_set_irq().
* Rebase on top of KVM.

Changes from v8 to v9
* Removing pass vector to __rtc_irq_eoi_tracking_restore_one.
* Rebase on top of KVM.

Changes from v7 to v8
* Revamping restore logic.
* Add BUG_ON to check pending_eoi.
* Rebase on top of KVM.

Yang Zhang (7):
  KVM: Add vcpu info to ioapic_update_eoi()
  KVM: Introduce struct rtc_status
  KVM: Return destination vcpu on interrupt injection
  KVM: Add reset/restore rtc_status support
  KVM: Force vmexit with virtual interrupt delivery
  KVM: Let ioapic know the irq line status
  KVM: Use eoi to track RTC interrupt delivery status

 arch/x86/kvm/i8254.c |4 +-
 arch/x86/kvm/lapic.c |   36 ++
 arch/x86/kvm/lapic.h |7 ++-
 arch/x86/kvm/x86.c   |6 ++-
 include/linux/kvm_host.h |   11 +++--
 virt/kvm/assigned-dev.c  |   13 +++--
 virt/kvm/eventfd.c   |   15 +++--
 virt/kvm/ioapic.c|  126 --
 virt/kvm/ioapic.h|   20 ++-
 virt/kvm/irq_comm.c  |   31 +++-
 virt/kvm/kvm_main.c  |3 +-
 11 files changed, 207 insertions(+), 65 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v10 1/7] KVM: Add vcpu info to ioapic_update_eoi()

2013-04-11 Thread Yang Zhang

From: Yang Zhang yang.z.zh...@intel.com

Add vcpu info to ioapic_update_eoi, so we can know which vcpu
issued this EOI.

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
---
 arch/x86/kvm/lapic.c |2 +-
 virt/kvm/ioapic.c|   12 ++--
 virt/kvm/ioapic.h|3 ++-
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index e227474..3e22536 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -739,7 +739,7 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int 
vector)
trigger_mode = IOAPIC_LEVEL_TRIG;
else
trigger_mode = IOAPIC_EDGE_TRIG;
-   kvm_ioapic_update_eoi(apic-vcpu-kvm, vector, trigger_mode);
+   kvm_ioapic_update_eoi(apic-vcpu, vector, trigger_mode);
}
 }
 
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 914cbe0..1d8906d 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -264,8 +264,8 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int 
irq_source_id)
spin_unlock(ioapic-lock);
 }
 
-static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
-int trigger_mode)
+static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
+   struct kvm_ioapic *ioapic, int vector, int trigger_mode)
 {
int i;
 
@@ -304,12 +304,12 @@ bool kvm_ioapic_handles_vector(struct kvm *kvm, int 
vector)
return test_bit(vector, ioapic-handled_vectors);
 }
 
-void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
+void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode)
 {
-   struct kvm_ioapic *ioapic = kvm-arch.vioapic;
+   struct kvm_ioapic *ioapic = vcpu-kvm-arch.vioapic;
 
spin_lock(ioapic-lock);
-   __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
+   __kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode);
spin_unlock(ioapic-lock);
 }
 
@@ -407,7 +407,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, 
gpa_t addr, int len,
break;
 #ifdef CONFIG_IA64
case IOAPIC_REG_EOI:
-   __kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG);
+   __kvm_ioapic_update_eoi(NULL, ioapic, data, IOAPIC_LEVEL_TRIG);
break;
 #endif
 
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 0400a46..2fc61a5 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -70,7 +70,8 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm 
*kvm)
 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
int short_hand, int dest, int dest_mode);
 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
-void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
+void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
+   int trigger_mode);
 bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector);
 int kvm_ioapic_init(struct kvm *kvm);
 void kvm_ioapic_destroy(struct kvm *kvm);
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v10 2/7] KVM: Introduce struct rtc_status

2013-04-11 Thread Yang Zhang

From: Yang Zhang yang.z.zh...@intel.com

rtc_status is used to track RTC interrupt delivery status. The pending_eoi
will be increased by vcpu who received RTC interrupt and will be decreased
when EOI to this interrupt.
Also, we use dest_map to record the destination vcpu to avoid the case that
vcpu who didn't get the RTC interupt, but issued EOI with same vector of RTC
and descreased pending_eoi by mistake.

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
---
 virt/kvm/ioapic.h |   12 
 1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 2fc61a5..87cd94b 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -34,6 +34,17 @@ struct kvm_vcpu;
 #defineIOAPIC_INIT 0x5
 #defineIOAPIC_EXTINT   0x7
 
+#ifdef CONFIG_X86
+#define RTC_GSI 8
+#else
+#define RTC_GSI -1U
+#endif
+
+struct rtc_status {
+   int pending_eoi;
+   DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS);
+};
+
 struct kvm_ioapic {
u64 base_address;
u32 ioregsel;
@@ -47,6 +58,7 @@ struct kvm_ioapic {
void (*ack_notifier)(void *opaque, int irq);
spinlock_t lock;
DECLARE_BITMAP(handled_vectors, 256);
+   struct rtc_status rtc_status;
 };
 
 #ifdef DEBUG
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v10 3/7] KVM: Return destination vcpu on interrupt injection

2013-04-11 Thread Yang Zhang

From: Yang Zhang yang.z.zh...@intel.com

Add a new parameter to know vcpus who received the interrupt.

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
---
 arch/x86/kvm/lapic.c |   25 -
 arch/x86/kvm/lapic.h |5 +++--
 virt/kvm/ioapic.c|2 +-
 virt/kvm/ioapic.h|2 +-
 virt/kvm/irq_comm.c  |   12 ++--
 5 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3e22536..0b73402 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -384,14 +384,16 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
 }
 
 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
-int vector, int level, int trig_mode);
+int vector, int level, int trig_mode,
+unsigned long *dest_map);
 
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
+   unsigned long *dest_map)
 {
struct kvm_lapic *apic = vcpu-arch.apic;
 
return __apic_accept_irq(apic, irq-delivery_mode, irq-vector,
-   irq-level, irq-trig_mode);
+   irq-level, irq-trig_mode, dest_map);
 }
 
 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
@@ -564,7 +566,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct 
kvm_lapic *source,
 }
 
 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
-   struct kvm_lapic_irq *irq, int *r)
+   struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
 {
struct kvm_apic_map *map;
unsigned long bitmap = 1;
@@ -575,7 +577,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct 
kvm_lapic *src,
*r = -1;
 
if (irq-shorthand == APIC_DEST_SELF) {
-   *r = kvm_apic_set_irq(src-vcpu, irq);
+   *r = kvm_apic_set_irq(src-vcpu, irq, dest_map);
return true;
}
 
@@ -620,7 +622,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct 
kvm_lapic *src,
continue;
if (*r  0)
*r = 0;
-   *r += kvm_apic_set_irq(dst[i]-vcpu, irq);
+   *r += kvm_apic_set_irq(dst[i]-vcpu, irq, dest_map);
}
 
ret = true;
@@ -634,7 +636,8 @@ out:
  * Return 1 if successfully added and 0 if discarded.
  */
 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
-int vector, int level, int trig_mode)
+int vector, int level, int trig_mode,
+unsigned long *dest_map)
 {
int result = 0;
struct kvm_vcpu *vcpu = apic-vcpu;
@@ -647,6 +650,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int 
delivery_mode,
if (unlikely(!apic_enabled(apic)))
break;
 
+   if (dest_map)
+   __set_bit(vcpu-vcpu_id, dest_map);
+
if (trig_mode) {
apic_debug(level trig mode for vector %d, vector);
apic_set_vector(vector, apic-regs + APIC_TMR);
@@ -805,7 +811,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
   irq.vector);
 
-   kvm_irq_delivery_to_apic(apic-vcpu-kvm, apic, irq);
+   kvm_irq_delivery_to_apic(apic-vcpu-kvm, apic, irq, NULL);
 }
 
 static u32 apic_get_tmcct(struct kvm_lapic *apic)
@@ -1441,7 +1447,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int 
lvt_type)
vector = reg  APIC_VECTOR_MASK;
mode = reg  APIC_MODE_MASK;
trig_mode = reg  APIC_LVT_LEVEL_TRIGGER;
-   return __apic_accept_irq(apic, mode, vector, 1, trig_mode);
+   return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
+   NULL);
}
return 0;
 }
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index baa20cf..3e5a431 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -55,11 +55,12 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu);
 
 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
+   unsigned long *dest_map);
 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
 
 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
-   struct kvm_lapic_irq *irq, int *r);
+   struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
 
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
 void

[PATCH v10 5/7] KVM: Force vmexit with virtual interrupt delivery

2013-04-11 Thread Yang Zhang

From: Yang Zhang yang.z.zh...@intel.com

Need the EOI to track interrupt deliver status, so force vmexit
on EOI for rtc interrupt when enabling virtual interrupt delivery.

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
---
 virt/kvm/ioapic.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 9d76baa..76528ff 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -189,7 +189,7 @@ void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
if (!e-fields.mask 
(e-fields.trig_mode == IOAPIC_LEVEL_TRIG ||
 kvm_irq_has_notifier(ioapic-kvm, KVM_IRQCHIP_IOAPIC,
-index))) {
+index) || index == RTC_GSI)) {
if (kvm_apic_match_dest(vcpu, NULL, 0,
e-fields.dest_id, e-fields.dest_mode))
__set_bit(e-fields.vector, (unsigned long 
*)eoi_exit_bitmap);
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v10 6/7] KVM: Let ioapic know the irq line status

2013-04-11 Thread Yang Zhang

From: Yang Zhang yang.z.zh...@intel.com

Userspace may deliver RTC interrupt without query the status. So we
want to track RTC EOI for this case.

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
---
 arch/x86/kvm/i8254.c |4 ++--
 arch/x86/kvm/x86.c   |6 --
 include/linux/kvm_host.h |   11 +++
 virt/kvm/assigned-dev.c  |   13 +++--
 virt/kvm/eventfd.c   |   15 +--
 virt/kvm/ioapic.c|   18 ++
 virt/kvm/ioapic.h|2 +-
 virt/kvm/irq_comm.c  |   19 ---
 virt/kvm/kvm_main.c  |3 ++-
 9 files changed, 54 insertions(+), 37 deletions(-)

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index c1d30b2..412a5aa 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -290,8 +290,8 @@ static void pit_do_work(struct kthread_work *work)
}
spin_unlock(ps-inject_lock);
if (inject) {
-   kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 1);
-   kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 0);
+   kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 1, false);
+   kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 0, false);
 
/*
 * Provides NMI watchdog support via Virtual Wire mode.
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2aaba81..5e85d8d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3484,13 +3484,15 @@ out:
return r;
 }
 
-int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event)
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
+   bool line_status)
 {
if (!irqchip_in_kernel(kvm))
return -ENXIO;
 
irq_event-status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
-   irq_event-irq, irq_event-level);
+   irq_event-irq, irq_event-level,
+   line_status);
return 0;
 }
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1c0be23..7bcdb6b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -289,7 +289,8 @@ struct kvm_kernel_irq_routing_entry {
u32 gsi;
u32 type;
int (*set)(struct kvm_kernel_irq_routing_entry *e,
-  struct kvm *kvm, int irq_source_id, int level);
+  struct kvm *kvm, int irq_source_id, int level,
+  bool line_status);
union {
struct {
unsigned irqchip;
@@ -588,7 +589,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 
 int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
   struct kvm_userspace_memory_region *mem);
-int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level);
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
+   bool line_status);
 long kvm_arch_vm_ioctl(struct file *filp,
   unsigned int ioctl, unsigned long arg);
 
@@ -719,10 +721,11 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic 
*ioapic,
   union kvm_ioapic_redirect_entry *entry,
   unsigned long *deliver_bitmask);
 #endif
-int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
+int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
+   bool line_status);
 int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int 
level);
 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm 
*kvm,
-   int irq_source_id, int level);
+   int irq_source_id, int level, bool line_status);
 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index 3642239..f4c7f59 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -80,11 +80,12 @@ kvm_assigned_dev_raise_guest_irq(struct 
kvm_assigned_dev_kernel *assigned_dev,
spin_lock(assigned_dev-intx_mask_lock);
if (!(assigned_dev-flags  KVM_DEV_ASSIGN_MASK_INTX))
kvm_set_irq(assigned_dev-kvm,
-   assigned_dev-irq_source_id, vector, 1);
+   assigned_dev-irq_source_id, vector, 1,
+   false);
spin_unlock(assigned_dev-intx_mask_lock);
} else
kvm_set_irq(assigned_dev-kvm, assigned_dev-irq_source_id,
-   vector, 1);
+   vector, 1, false);
 }
 
 static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
@@ -165,7 +166,7 @@

[PATCH v10 7/7] KVM: Use eoi to track RTC interrupt delivery status

2013-04-11 Thread Yang Zhang

From: Yang Zhang yang.z.zh...@intel.com

Current interrupt coalescing logci which only used by RTC has conflict
with Posted Interrupt.
This patch introduces a new mechinism to use eoi to track interrupt:
When delivering an interrupt to vcpu, the pending_eoi set to number of
vcpu that received the interrupt. And decrease it when each vcpu writing
eoi. No subsequent RTC interrupt can deliver to vcpu until all vcpus
write eoi.

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
---
 virt/kvm/ioapic.c |   36 +++-
 1 files changed, 35 insertions(+), 1 deletions(-)

diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index a49fcd5..97c67a5 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -147,6 +147,22 @@ static void kvm_rtc_eoi_tracking_restore_all(struct 
kvm_ioapic *ioapic)
__rtc_irq_eoi_tracking_restore_one(vcpu);
 }
 
+static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu)
+{
+   if (test_and_clear_bit(vcpu-vcpu_id, ioapic-rtc_status.dest_map))
+   --ioapic-rtc_status.pending_eoi;
+
+   WARN_ON(ioapic-rtc_status.pending_eoi  0);
+}
+
+static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic)
+{
+   if (ioapic-rtc_status.pending_eoi  0)
+   return true; /* coalesced */
+
+   return false;
+}
+
 static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx,
bool line_status)
 {
@@ -260,6 +276,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int 
irq, bool line_status)
 {
union kvm_ioapic_redirect_entry *entry = ioapic-redirtbl[irq];
struct kvm_lapic_irq irqe;
+   int ret;
 
ioapic_debug(dest=%x dest_mode=%x delivery_mode=%x 
 vector=%x trig_mode=%x\n,
@@ -275,7 +292,15 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int 
irq, bool line_status)
irqe.level = 1;
irqe.shorthand = 0;
 
-   return kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe, NULL);
+   if (irq == RTC_GSI  line_status) {
+   BUG_ON(ioapic-rtc_status.pending_eoi != 0);
+   ret = kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe,
+   ioapic-rtc_status.dest_map);
+   ioapic-rtc_status.pending_eoi = ret;
+   } else
+   ret = kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe, NULL);
+
+   return ret;
 }
 
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
@@ -299,6 +324,12 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, 
int irq_source_id,
ret = 1;
} else {
int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
+
+   if (irq == RTC_GSI  line_status 
+   rtc_irq_check_coalesced(ioapic)) {
+   ret = 0; /* coalesced */
+   goto out;
+   }
ioapic-irr |= mask;
if ((edge  old_irr != ioapic-irr) ||
(!edge  !entry.fields.remote_irr))
@@ -306,6 +337,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, 
int irq_source_id,
else
ret = 0; /* report coalesced interrupt */
}
+out:
trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
spin_unlock(ioapic-lock);
 
@@ -333,6 +365,8 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
if (ent-fields.vector != vector)
continue;
 
+   if (i == RTC_GSI)
+   rtc_irq_eoi(ioapic, vcpu);
/*
 * We are dropping lock while calling ack notifiers because ack
 * notifier callbacks for assigned devices call into IOAPIC
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v10 4/7] KVM: Add reset/restore rtc_status support

2013-04-11 Thread Yang Zhang

From: Yang Zhang yang.z.zh...@intel.com

restore rtc_status from migration or save/restore

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
---
 arch/x86/kvm/lapic.c |9 +++
 arch/x86/kvm/lapic.h |2 +
 virt/kvm/ioapic.c|   58 ++
 virt/kvm/ioapic.h|1 +
 4 files changed, 70 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0b73402..6796218 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void *bitmap)
return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
 }
 
+bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
+{
+   struct kvm_lapic *apic = vcpu-arch.apic;
+
+   return apic_test_vector(vector, apic-regs + APIC_ISR) ||
+   apic_test_vector(vector, apic-regs + APIC_IRR);
+}
+
 static inline void apic_set_vector(int vec, void *bitmap)
 {
set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -1618,6 +1626,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
apic-highest_isr_cache = -1;
kvm_x86_ops-hwapic_isr_update(vcpu-kvm, apic_find_highest_isr(apic));
kvm_make_request(KVM_REQ_EVENT, vcpu);
+   kvm_rtc_eoi_tracking_restore_one(vcpu);
 }
 
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 3e5a431..16304b1 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -166,4 +166,6 @@ static inline bool kvm_apic_has_events(struct kvm_vcpu 
*vcpu)
return vcpu-arch.apic-pending_events;
 }
 
+bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
+
 #endif
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 27ae8dd..9d76baa 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -90,6 +90,62 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic 
*ioapic,
return result;
 }
 
+static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic)
+{
+   ioapic-rtc_status.pending_eoi = 0;
+   bitmap_zero(ioapic-rtc_status.dest_map, KVM_MAX_VCPUS);
+}
+
+static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
+{
+   bool new_val, old_val;
+   struct kvm_ioapic *ioapic = vcpu-kvm-arch.vioapic;
+   union kvm_ioapic_redirect_entry *e;
+
+   e = ioapic-redirtbl[RTC_GSI];
+   if (!kvm_apic_match_dest(vcpu, NULL, 0, e-fields.dest_id,
+   e-fields.dest_mode))
+   return;
+
+   new_val = kvm_apic_pending_eoi(vcpu, e-fields.vector);
+   old_val = test_bit(vcpu-vcpu_id, ioapic-rtc_status.dest_map);
+
+   if (new_val == old_val)
+   return;
+
+   if (new_val) {
+   __set_bit(vcpu-vcpu_id, ioapic-rtc_status.dest_map);
+   ioapic-rtc_status.pending_eoi++;
+   } else {
+   __clear_bit(vcpu-vcpu_id, ioapic-rtc_status.dest_map);
+   ioapic-rtc_status.pending_eoi--;
+   }
+
+   WARN_ON(ioapic-rtc_status.pending_eoi  0);
+}
+
+void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
+{
+   struct kvm_ioapic *ioapic = vcpu-kvm-arch.vioapic;
+
+   spin_lock(ioapic-lock);
+   __rtc_irq_eoi_tracking_restore_one(vcpu);
+   spin_unlock(ioapic-lock);
+}
+
+static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic)
+{
+   struct kvm_vcpu *vcpu;
+   int i;
+
+   if (RTC_GSI = IOAPIC_NUM_PINS)
+   return;
+
+   rtc_irq_eoi_tracking_reset(ioapic);
+   kvm_for_each_vcpu(i, vcpu, ioapic-kvm)
+   __rtc_irq_eoi_tracking_restore_one(vcpu);
+}
+
 static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
 {
union kvm_ioapic_redirect_entry *pent;
@@ -428,6 +484,7 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
ioapic-ioregsel = 0;
ioapic-irr = 0;
ioapic-id = 0;
+   rtc_irq_eoi_tracking_reset(ioapic);
update_handled_vectors(ioapic);
 }
 
@@ -494,6 +551,7 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state 
*state)
memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
update_handled_vectors(ioapic);
kvm_ioapic_make_eoibitmap_request(kvm);
+   kvm_rtc_eoi_tracking_restore_all(ioapic);
spin_unlock(ioapic-lock);
return 0;
 }
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 761e5b5..313fc4e 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -79,6 +79,7 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm 
*kvm)
return kvm-arch.vioapic;
 }
 
+void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
int short_hand, int dest, int dest_mode);
 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to

[PATCH v10 0/7] KVM: VMX: Add Posted Interrupt supporting

2013-04-11 Thread Yang Zhang

From: Yang Zhang yang.z.zh...@intel.com

The follwoing patches are adding the Posted Interrupt supporting to KVM:
The first patch enables the feature 'acknowledge interrupt on vmexit'.Since
it is required by Posted interrupt, we need to enable it firstly.

And the subsequent patches are adding the posted interrupt supporting:
Posted Interrupt allows APIC interrupts to inject into guest directly
without any vmexit.

- When delivering a interrupt to guest, if target vcpu is running,
  update Posted-interrupt requests bitmap and send a notification event
  to the vcpu. Then the vcpu will handle this interrupt automatically,
  without any software involvemnt.

- If target vcpu is not running or there already a notification event
  pending in the vcpu, do nothing. The interrupt will be handled by
  next vm entry

Changes from v9 to v10:
* Only check kvm_apic_hw_enable() in vcpu_sacn_ioapic().
* Remove repeated tracing in __apic_accept_irq().
* Rebase on top of KVM upstream + RTC eoi tracking patch.

Changes from v8 to v9:
* Add tracing in PI case when deliver interrupt.
* Scan ioapic when updating SPIV register.
* Rebase on top of KVM upstream + RTC eoi tracking patch.

Changes from v7 to v8:
* Remove unused memeber 'on' from struct pi_desc.
* Register a dummy function to sync_pir_to_irr is apicv is disabled.
* Minor fixup.
* Rebase on top of KVM upstream + RTC eoi tracking patch.

Yang Zhang (7):
  KVM: VMX: Enable acknowledge interupt on vmexit
  KVM: VMX: Register a new IPI for posted interrupt
  KVM: VMX: Check the posted interrupt capability
  KVM: Call common update function when ioapic entry changed.
  KVM: Set TMR when programming ioapic entry
  KVM: VMX: Add the algorithm of deliver posted interrupt
  KVM: VMX: Use posted interrupt to deliver virtual interrupt

 arch/ia64/kvm/lapic.h  |6 -
 arch/x86/include/asm/entry_arch.h  |4 +
 arch/x86/include/asm/hardirq.h |3 +
 arch/x86/include/asm/hw_irq.h  |1 +
 arch/x86/include/asm/irq_vectors.h |5 +
 arch/x86/include/asm/kvm_host.h|3 +
 arch/x86/include/asm/vmx.h |4 +
 arch/x86/kernel/entry_64.S |5 +
 arch/x86/kernel/irq.c  |   22 
 arch/x86/kernel/irqinit.c  |4 +
 arch/x86/kvm/lapic.c   |   58 +++---
 arch/x86/kvm/lapic.h   |2 +
 arch/x86/kvm/svm.c |   12 ++
 arch/x86/kvm/vmx.c |  207 +++-
 arch/x86/kvm/x86.c |   19 +++-
 include/linux/kvm_host.h   |4 +-
 virt/kvm/ioapic.c  |   32 --
 virt/kvm/ioapic.h  |7 +-
 virt/kvm/irq_comm.c|4 +-
 virt/kvm/kvm_main.c|5 +-
 20 files changed, 333 insertions(+), 74 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v10 1/7] KVM: VMX: Enable acknowledge interupt on vmexit

2013-04-11 Thread Yang Zhang

From: Yang Zhang yang.z.zh...@intel.com

The acknowledge interrupt on exit feature controls processor behavior
for external interrupt acknowledgement. When this control is set, the
processor acknowledges the interrupt controller to acquire the
interrupt vector on VM exit.

After enabling this feature, an interrupt which arrived when target cpu is
running in vmx non-root mode will be handled by vmx handler instead of handler
in idt. Currently, vmx handler only fakes an interrupt stack and jump to idt
table to let real handler to handle it. Further, we will recognize the interrupt
and only delivery the interrupt which not belong to current vcpu through idt 
table.
The interrupt which belonged to current vcpu will be handled inside vmx handler.
This will reduce the interrupt handle cost of KVM.

Also, interrupt enable logic is changed if this feature is turnning on:
Before this patch, hypervior call local_irq_enable() to enable it directly.
Now IF bit is set on interrupt stack frame, and will be enabled on a return from
interrupt handler if exterrupt interrupt exists. If no external interrupt, still
call local_irq_enable() to enable it.

Refer to Intel SDM volum 3, chapter 33.2.

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
---
 arch/x86/include/asm/kvm_host.h |1 +
 arch/x86/kvm/svm.c  |6 
 arch/x86/kvm/vmx.c  |   58 ---
 arch/x86/kvm/x86.c  |4 ++-
 4 files changed, 64 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b5a6462..8e95512 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -730,6 +730,7 @@ struct kvm_x86_ops {
int (*check_intercept)(struct kvm_vcpu *vcpu,
   struct x86_instruction_info *info,
   enum x86_intercept_stage stage);
+   void (*handle_external_intr)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7a46c1f..2f8fe3f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4233,6 +4233,11 @@ out:
return ret;
 }
 
+static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
+{
+   local_irq_enable();
+}
+
 static struct kvm_x86_ops svm_x86_ops = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -4328,6 +4333,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.set_tdp_cr3 = set_tdp_cr3,
 
.check_intercept = svm_check_intercept,
+   .handle_external_intr = svm_handle_external_intr,
 };
 
 static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 03f5746..7408d93 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -378,6 +378,7 @@ struct vcpu_vmx {
struct shared_msr_entry *guest_msrs;
int   nmsrs;
int   save_nmsrs;
+   unsigned long host_idt_base;
 #ifdef CONFIG_X86_64
u64   msr_host_kernel_gs_base;
u64   msr_guest_kernel_gs_base;
@@ -2627,7 +2628,8 @@ static __init int setup_vmcs_config(struct vmcs_config 
*vmcs_conf)
 #ifdef CONFIG_X86_64
min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
 #endif
-   opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT;
+   opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT |
+   VM_EXIT_ACK_INTR_ON_EXIT;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
_vmexit_control)  0)
return -EIO;
@@ -3879,7 +3881,7 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 
msr)
  * Note that host-state that does change is set elsewhere. E.g., host-state
  * that is set differently for each CPU is set in vmx_vcpu_load(), not here.
  */
-static void vmx_set_constant_host_state(void)
+static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
 {
u32 low32, high32;
unsigned long tmpl;
@@ -3907,6 +3909,7 @@ static void vmx_set_constant_host_state(void)
 
native_store_idt(dt);
vmcs_writel(HOST_IDTR_BASE, dt.address);   /* 22.2.4 */
+   vmx-host_idt_base = dt.address;
 
vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */
 
@@ -4039,7 +4042,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
vmcs_write16(HOST_FS_SELECTOR, 0);/* 22.2.4 */
vmcs_write16(HOST_GS_SELECTOR, 0);/* 22.2.4 */
-   vmx_set_constant_host_state();
+   vmx_set_constant_host_state(vmx);
 #ifdef CONFIG_X86_64
rdmsrl(MSR_FS_BASE, a);
vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */
@@ -6400,6 +6403,52 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx 
*vmx)
}
 }
 
+static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
+{
+   u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+
+   /*
+* If external interrupt exists, IF bit is set in rflags/eflags

[PATCH v10 2/7] KVM: VMX: Register a new IPI for posted interrupt

2013-04-11 Thread Yang Zhang

From: Yang Zhang yang.z.zh...@intel.com

Posted Interrupt feature requires a special IPI to deliver posted interrupt
to guest. And it should has a high priority so the interrupt will not be
blocked by others.
Normally, the posted interrupt will be consumed by vcpu if target vcpu is
running and transparent to OS. But in some cases, the interrupt will arrive
when target vcpu is scheduled out. And host will see it. So we need to
register a dump handler to handle it.

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
Acked-by: Ingo Molnar mi...@kernel.org
---
 arch/x86/include/asm/entry_arch.h  |4 
 arch/x86/include/asm/hardirq.h |3 +++
 arch/x86/include/asm/hw_irq.h  |1 +
 arch/x86/include/asm/irq_vectors.h |5 +
 arch/x86/kernel/entry_64.S |5 +
 arch/x86/kernel/irq.c  |   22 ++
 arch/x86/kernel/irqinit.c  |4 
 7 files changed, 44 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/entry_arch.h 
b/arch/x86/include/asm/entry_arch.h
index 40afa00..9bd4eca 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -19,6 +19,10 @@ BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
 
 BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
 
+#ifdef CONFIG_HAVE_KVM
+BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR)
+#endif
+
 /*
  * every pentium local APIC has two 'local interrupts', with a
  * soft-definable vector attached to both interrupts, one of
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 81f04ce..ab0ae1a 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -12,6 +12,9 @@ typedef struct {
unsigned int irq_spurious_count;
unsigned int icr_read_retry_count;
 #endif
+#ifdef CONFIG_HAVE_KVM
+   unsigned int kvm_posted_intr_ipis;
+#endif
unsigned int x86_platform_ipis; /* arch dependent */
unsigned int apic_perf_irqs;
unsigned int apic_irq_work_irqs;
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 10a78c3..1da97ef 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -28,6 +28,7 @@
 /* Interrupt handlers registered during init_IRQ */
 extern void apic_timer_interrupt(void);
 extern void x86_platform_ipi(void);
+extern void kvm_posted_intr_ipi(void);
 extern void error_interrupt(void);
 extern void irq_work_interrupt(void);
 
diff --git a/arch/x86/include/asm/irq_vectors.h 
b/arch/x86/include/asm/irq_vectors.h
index aac5fa6..5702d7e 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -102,6 +102,11 @@
  */
 #define X86_PLATFORM_IPI_VECTOR0xf7
 
+/* Vector for KVM to deliver posted interrupt IPI */
+#ifdef CONFIG_HAVE_KVM
+#define POSTED_INTR_VECTOR 0xf2
+#endif
+
 /*
  * IRQ work vector:
  */
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c1d01e6..7272089 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1166,6 +1166,11 @@ apicinterrupt LOCAL_TIMER_VECTOR \
 apicinterrupt X86_PLATFORM_IPI_VECTOR \
x86_platform_ipi smp_x86_platform_ipi
 
+#ifdef CONFIG_HAVE_KVM
+apicinterrupt POSTED_INTR_VECTOR \
+   kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
+#endif
+
 apicinterrupt THRESHOLD_APIC_VECTOR \
threshold_interrupt smp_threshold_interrupt
 apicinterrupt THERMAL_APIC_VECTOR \
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index e4595f1..6ae6ea1 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -228,6 +228,28 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
set_irq_regs(old_regs);
 }
 
+#ifdef CONFIG_HAVE_KVM
+/*
+ * Handler for POSTED_INTERRUPT_VECTOR.
+ */
+void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
+{
+   struct pt_regs *old_regs = set_irq_regs(regs);
+
+   ack_APIC_irq();
+
+   irq_enter();
+
+   exit_idle();
+
+   inc_irq_stat(kvm_posted_intr_ipis);
+
+   irq_exit();
+
+   set_irq_regs(old_regs);
+}
+#endif
+
 EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
 
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 7dc4e45..a2a1fbc 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -172,6 +172,10 @@ static void __init apic_intr_init(void)
 
/* IPI for X86 platform specific use */
alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi);
+#ifdef CONFIG_HAVE_KVM
+   /* IPI for KVM to deliver posted interrupt */
+   alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi);
+#endif
 
/* IPI vectors for APIC spurious and error interrupts */
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at

[PATCH v10 3/7] KVM: VMX: Check the posted interrupt capability

2013-04-11 Thread Yang Zhang

From: Yang Zhang yang.z.zh...@intel.com

Detect the posted interrupt feature. If it exists, then set it in vmcs_config.

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
---
 arch/x86/include/asm/vmx.h |4 ++
 arch/x86/kvm/vmx.c |   82 +---
 2 files changed, 66 insertions(+), 20 deletions(-)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index fc1c313..6f07f19 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -71,6 +71,7 @@
 #define PIN_BASED_NMI_EXITING   0x0008
 #define PIN_BASED_VIRTUAL_NMIS  0x0020
 #define PIN_BASED_VMX_PREEMPTION_TIMER  0x0040
+#define PIN_BASED_POSTED_INTR   0x0080
 
 #define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR0x0016
 
@@ -102,6 +103,7 @@
 /* VMCS Encodings */
 enum vmcs_field {
VIRTUAL_PROCESSOR_ID= 0x,
+   POSTED_INTR_NV  = 0x0002,
GUEST_ES_SELECTOR   = 0x0800,
GUEST_CS_SELECTOR   = 0x0802,
GUEST_SS_SELECTOR   = 0x0804,
@@ -136,6 +138,8 @@ enum vmcs_field {
VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x2013,
APIC_ACCESS_ADDR= 0x2014,
APIC_ACCESS_ADDR_HIGH   = 0x2015,
+   POSTED_INTR_DESC_ADDR   = 0x2016,
+   POSTED_INTR_DESC_ADDR_HIGH  = 0x2017,
EPT_POINTER = 0x201a,
EPT_POINTER_HIGH= 0x201b,
EOI_EXIT_BITMAP0= 0x201c,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7408d93..05da991 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -84,7 +84,8 @@ module_param(vmm_exclusive, bool, S_IRUGO);
 static bool __read_mostly fasteoi = 1;
 module_param(fasteoi, bool, S_IRUGO);
 
-static bool __read_mostly enable_apicv_reg_vid;
+static bool __read_mostly enable_apicv;
+module_param(enable_apicv, bool, S_IRUGO);
 
 /*
  * If nested=1, nested virtualization is supported, i.e., guests may use
@@ -366,6 +367,14 @@ struct nested_vmx {
struct page *apic_access_page;
 };
 
+#define POSTED_INTR_ON  0
+/* Posted-Interrupt Descriptor */
+struct pi_desc {
+   u32 pir[8]; /* Posted interrupt requested */
+   u32 control;/* bit 0 of control is outstanding notification bit */
+   u32 rsvd[7];
+} __aligned(64);
+
 struct vcpu_vmx {
struct kvm_vcpu   vcpu;
unsigned long host_rsp;
@@ -430,6 +439,9 @@ struct vcpu_vmx {
 
bool rdtscp_enabled;
 
+   /* Posted interrupt descriptor */
+   struct pi_desc pi_desc;
+
/* Support for a guest hypervisor (nested VMX) */
struct nested_vmx nested;
 };
@@ -785,6 +797,18 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void)
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
 }
 
+static inline bool cpu_has_vmx_posted_intr(void)
+{
+   return vmcs_config.pin_based_exec_ctrl  PIN_BASED_POSTED_INTR;
+}
+
+static inline bool cpu_has_vmx_apicv(void)
+{
+   return cpu_has_vmx_apic_register_virt() 
+   cpu_has_vmx_virtual_intr_delivery() 
+   cpu_has_vmx_posted_intr();
+}
+
 static inline bool cpu_has_vmx_flexpriority(void)
 {
return cpu_has_vmx_tpr_shadow() 
@@ -2552,12 +2576,6 @@ static __init int setup_vmcs_config(struct vmcs_config 
*vmcs_conf)
u32 _vmexit_control = 0;
u32 _vmentry_control = 0;
 
-   min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
-   opt = PIN_BASED_VIRTUAL_NMIS;
-   if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
-   _pin_based_exec_control)  0)
-   return -EIO;
-
min = CPU_BASED_HLT_EXITING |
 #ifdef CONFIG_X86_64
  CPU_BASED_CR8_LOAD_EXITING |
@@ -2634,6 +2652,17 @@ static __init int setup_vmcs_config(struct vmcs_config 
*vmcs_conf)
_vmexit_control)  0)
return -EIO;
 
+   min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
+   opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR;
+   if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
+   _pin_based_exec_control)  0)
+   return -EIO;
+
+   if (!(_cpu_based_2nd_exec_control 
+   SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) ||
+   !(_vmexit_control  VM_EXIT_ACK_INTR_ON_EXIT))
+   _pin_based_exec_control = ~PIN_BASED_POSTED_INTR;
+
min = 0;
opt = VM_ENTRY_LOAD_IA32_PAT;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
@@ -2812,11 +2841,10 @@ static __init int hardware_setup(void)
if (!cpu_has_vmx_ple())
ple_gap = 0;
 
-   if (!cpu_has_vmx_apic_register_virt() ||
-   !cpu_has_vmx_virtual_intr_delivery())
-   enable_apicv_reg_vid = 0;
+

[PATCH v10 4/7] KVM: Call common update function when ioapic entry changed.

2013-04-11 Thread Yang Zhang

From: Yang Zhang yang.z.zh...@intel.com

Both TMR and EOI exit bitmap need to be updated when ioapic changed
or vcpu's id/ldr/dfr changed. So use common function instead eoi exit
bitmap specific function.

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
---
 arch/ia64/kvm/lapic.h|6 --
 arch/x86/kvm/lapic.c |2 +-
 arch/x86/kvm/vmx.c   |3 +++
 arch/x86/kvm/x86.c   |   11 +++
 include/linux/kvm_host.h |4 ++--
 virt/kvm/ioapic.c|   22 +-
 virt/kvm/ioapic.h|6 ++
 virt/kvm/irq_comm.c  |4 ++--
 virt/kvm/kvm_main.c  |4 ++--
 9 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h
index c3e2935..c5f92a9 100644
--- a/arch/ia64/kvm/lapic.h
+++ b/arch/ia64/kvm/lapic.h
@@ -27,10 +27,4 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct 
kvm_lapic_irq *irq);
 #define kvm_apic_present(x) (true)
 #define kvm_lapic_enabled(x) (true)
 
-static inline bool kvm_apic_vid_enabled(void)
-{
-   /* IA64 has no apicv supporting, do nothing here */
-   return false;
-}
-
 #endif
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 6796218..34a8ca8 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -217,7 +217,7 @@ out:
if (old)
kfree_rcu(old, rcu);
 
-   kvm_ioapic_make_eoibitmap_request(kvm);
+   kvm_vcpu_request_scan_ioapic(kvm);
 }
 
 static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 05da991..5637a8a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6415,6 +6415,9 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, 
int max_irr)
 
 static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 {
+   if (!vmx_vm_has_apicv(vcpu-kvm))
+   return;
+
vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5b146d2..d8e9a32 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5649,13 +5649,16 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
 #endif
 }
 
-static void update_eoi_exitmap(struct kvm_vcpu *vcpu)
+static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
 {
u64 eoi_exit_bitmap[4];
 
+   if (!kvm_apic_hw_enabled(vcpu-arch.apic))
+   return;
+
memset(eoi_exit_bitmap, 0, 32);
 
-   kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap);
+   kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap);
kvm_x86_ops-load_eoi_exitmap(vcpu, eoi_exit_bitmap);
 }
 
@@ -5712,8 +5715,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_handle_pmu_event(vcpu);
if (kvm_check_request(KVM_REQ_PMI, vcpu))
kvm_deliver_pmi(vcpu);
-   if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu))
-   update_eoi_exitmap(vcpu);
+   if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
+   vcpu_scan_ioapic(vcpu);
}
 
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7bcdb6b..6f49d9d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -126,7 +126,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_MASTERCLOCK_UPDATE 19
 #define KVM_REQ_MCLOCK_INPROGRESS 20
 #define KVM_REQ_EPR_EXIT  21
-#define KVM_REQ_EOIBITMAP 22
+#define KVM_REQ_SCAN_IOAPIC   22
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID   1
@@ -572,7 +572,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
 void kvm_flush_remote_tlbs(struct kvm *kvm);
 void kvm_reload_remote_mmus(struct kvm *kvm);
 void kvm_make_mclock_inprogress_request(struct kvm *kvm);
-void kvm_make_update_eoibitmap_request(struct kvm *kvm);
+void kvm_make_scan_ioapic_request(struct kvm *kvm);
 
 long kvm_arch_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 97c67a5..f2157a9 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -193,15 +193,13 @@ static void update_handled_vectors(struct kvm_ioapic 
*ioapic)
smp_wmb();
 }
 
-void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
-   u64 *eoi_exit_bitmap)
+void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 {
struct kvm_ioapic *ioapic = vcpu-kvm-arch.vioapic;
union kvm_ioapic_redirect_entry *e;
int index;
 
spin_lock(ioapic-lock);
-   /* traverse ioapic entry to set eoi exit bitmap*/
for (index = 0; index  IOAPIC_NUM_PINS; index++) {
e =

[PATCH v10 5/7] KVM: Set TMR when programming ioapic entry

2013-04-11 Thread Yang Zhang

From: Yang Zhang yang.z.zh...@intel.com

We already know the trigger mode of a given interrupt when programming
the ioapice entry. So it's not necessary to set it in each interrupt
delivery.

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
---
 arch/x86/kvm/lapic.c |   15 +--
 arch/x86/kvm/lapic.h |1 +
 arch/x86/kvm/x86.c   |5 -
 virt/kvm/ioapic.c|   12 +---
 virt/kvm/ioapic.h|3 ++-
 5 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 34a8ca8..d197579 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -468,6 +468,15 @@ static inline int apic_find_highest_isr(struct kvm_lapic 
*apic)
return result;
 }
 
+void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr)
+{
+   struct kvm_lapic *apic = vcpu-arch.apic;
+   int i;
+
+   for (i = 0; i  8; i++)
+   apic_set_reg(apic, APIC_TMR + 0x10 * i, tmr[i]);
+}
+
 static void apic_update_ppr(struct kvm_lapic *apic)
 {
u32 tpr, isrv, ppr, old_ppr;
@@ -661,12 +670,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int 
delivery_mode,
if (dest_map)
__set_bit(vcpu-vcpu_id, dest_map);
 
-   if (trig_mode) {
-   apic_debug(level trig mode for vector %d, vector);
-   apic_set_vector(vector, apic-regs + APIC_TMR);
-   } else
-   apic_clear_vector(vector, apic-regs + APIC_TMR);
-
result = !apic_test_and_set_irr(vector, apic);
trace_kvm_apic_accept_irq(vcpu-vcpu_id, delivery_mode,
  trig_mode, vector, !result);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 16304b1..7fe0c91 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -53,6 +53,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
 u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
 void kvm_apic_set_version(struct kvm_vcpu *vcpu);
 
+void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr);
 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d8e9a32..6147d24 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5652,14 +5652,17 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
 static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
 {
u64 eoi_exit_bitmap[4];
+   u32 tmr[8];
 
if (!kvm_apic_hw_enabled(vcpu-arch.apic))
return;
 
memset(eoi_exit_bitmap, 0, 32);
+   memset(tmr, 0, 32);
 
-   kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap);
+   kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap, tmr);
kvm_x86_ops-load_eoi_exitmap(vcpu, eoi_exit_bitmap);
+   kvm_apic_update_tmr(vcpu, tmr);
 }
 
 static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index f2157a9..2d68297 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -193,7 +193,8 @@ static void update_handled_vectors(struct kvm_ioapic 
*ioapic)
smp_wmb();
 }
 
-void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
+void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
+   u32 *tmr)
 {
struct kvm_ioapic *ioapic = vcpu-kvm-arch.vioapic;
union kvm_ioapic_redirect_entry *e;
@@ -207,8 +208,13 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 
*eoi_exit_bitmap)
 kvm_irq_has_notifier(ioapic-kvm, KVM_IRQCHIP_IOAPIC,
 index) || index == RTC_GSI)) {
if (kvm_apic_match_dest(vcpu, NULL, 0,
-   e-fields.dest_id, e-fields.dest_mode))
-   __set_bit(e-fields.vector, (unsigned long 
*)eoi_exit_bitmap);
+   e-fields.dest_id, e-fields.dest_mode)) {
+   __set_bit(e-fields.vector,
+   (unsigned long *)eoi_exit_bitmap);
+   if (e-fields.trig_mode == IOAPIC_LEVEL_TRIG)
+   __set_bit(e-fields.vector,
+   (unsigned long *)tmr);
+   }
}
}
spin_unlock(ioapic-lock);
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 674a388..615d8c9 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -97,6 +97,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct 
kvm_lapic *src,
 int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
 int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
 void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
-void kvm_ioapic_scan_entry(struct

[PATCH v10 6/7] KVM: VMX: Add the algorithm of deliver posted interrupt

2013-04-11 Thread Yang Zhang

From: Yang Zhang yang.z.zh...@intel.com

Only deliver the posted interrupt when target vcpu is running
and there is no previous interrupt pending in pir.

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
---
 arch/x86/include/asm/kvm_host.h |2 +
 arch/x86/kvm/lapic.c|   13 
 arch/x86/kvm/lapic.h|1 +
 arch/x86/kvm/svm.c  |6 
 arch/x86/kvm/vmx.c  |   64 ++-
 virt/kvm/kvm_main.c |1 +
 6 files changed, 86 insertions(+), 1 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8e95512..842ea5a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -704,6 +704,8 @@ struct kvm_x86_ops {
void (*hwapic_isr_update)(struct kvm *kvm, int isr);
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
+   void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
+   void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*get_tdp_level)(void);
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index d197579..dbf74c9 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -318,6 +318,19 @@ static u8 count_vectors(void *bitmap)
return count;
 }
 
+void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
+{
+   u32 i, pir_val;
+   struct kvm_lapic *apic = vcpu-arch.apic;
+
+   for (i = 0; i = 7; i++) {
+   pir_val = xchg(pir[i], 0);
+   if (pir_val)
+   *((u32 *)(apic-regs + APIC_IRR + i * 0x10)) |= pir_val;
+   }
+}
+EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
+
 static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic)
 {
apic-irr_pending = true;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 7fe0c91..c730ac9 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -54,6 +54,7 @@ u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
 void kvm_apic_set_version(struct kvm_vcpu *vcpu);
 
 void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr);
+void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2f8fe3f..d6713e1 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3577,6 +3577,11 @@ static void svm_hwapic_isr_update(struct kvm *kvm, int 
isr)
return;
 }
 
+static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu)
+{
+   return;
+}
+
 static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
 {
struct vcpu_svm *svm = to_svm(vcpu);
@@ -4305,6 +4310,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.vm_has_apicv = svm_vm_has_apicv,
.load_eoi_exitmap = svm_load_eoi_exitmap,
.hwapic_isr_update = svm_hwapic_isr_update,
+   .sync_pir_to_irr = svm_sync_pir_to_irr,
 
.set_tss_addr = svm_set_tss_addr,
.get_tdp_level = get_npt_level,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5637a8a..314b2ed 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -375,6 +375,23 @@ struct pi_desc {
u32 rsvd[7];
 } __aligned(64);
 
+static bool pi_test_and_set_on(struct pi_desc *pi_desc)
+{
+   return test_and_set_bit(POSTED_INTR_ON,
+   (unsigned long *)pi_desc-control);
+}
+
+static bool pi_test_and_clear_on(struct pi_desc *pi_desc)
+{
+   return test_and_clear_bit(POSTED_INTR_ON,
+   (unsigned long *)pi_desc-control);
+}
+
+static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
+{
+   return test_and_set_bit(vector, (unsigned long *)pi_desc-pir);
+}
+
 struct vcpu_vmx {
struct kvm_vcpu   vcpu;
unsigned long host_rsp;
@@ -639,6 +656,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
 static bool guest_state_valid(struct kvm_vcpu *vcpu);
 static u32 vmx_segment_access_rights(struct kvm_segment *var);
+static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu);
 
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -2846,8 +2864,11 @@ static __init int hardware_setup(void)
 
if (enable_apicv)
kvm_x86_ops-update_cr8_intercept = NULL;
-   else
+   else {
kvm_x86_ops-hwapic_irr_update = NULL;
+   kvm_x86_ops-deliver_posted_interrupt = NULL;
+   kvm_x86_ops-sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
+   }
 
if (nested)

[PATCH v10 7/7] KVM: VMX: Use posted interrupt to deliver virtual interrupt

2013-04-11 Thread Yang Zhang

From: Yang Zhang yang.z.zh...@intel.com

If posted interrupt is avaliable, then uses it to inject virtual
interrupt to guest.

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
---
 arch/x86/kvm/lapic.c |   30 +++---
 arch/x86/kvm/vmx.c   |2 +-
 arch/x86/kvm/x86.c   |1 +
 3 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index dbf74c9..e29883c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -353,6 +353,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic 
*apic)
if (!apic-irr_pending)
return -1;
 
+   kvm_x86_ops-sync_pir_to_irr(apic-vcpu);
result = apic_search_irr(apic);
ASSERT(result == -1 || result = 16);
 
@@ -683,18 +684,25 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int 
delivery_mode,
if (dest_map)
__set_bit(vcpu-vcpu_id, dest_map);
 
-   result = !apic_test_and_set_irr(vector, apic);
-   trace_kvm_apic_accept_irq(vcpu-vcpu_id, delivery_mode,
- trig_mode, vector, !result);
-   if (!result) {
-   if (trig_mode)
-   apic_debug(level trig mode repeatedly for 
-   vector %d, vector);
-   break;
-   }
+   if (kvm_x86_ops-deliver_posted_interrupt) {
+   result = 1;
+   kvm_x86_ops-deliver_posted_interrupt(vcpu, vector);
+   } else {
+   result = !apic_test_and_set_irr(vector, apic);
 
-   kvm_make_request(KVM_REQ_EVENT, vcpu);
-   kvm_vcpu_kick(vcpu);
+   if (!result) {
+   if (trig_mode)
+   apic_debug(level trig mode repeatedly 
+   for vector %d, vector);
+   goto out;
+   }
+
+   kvm_make_request(KVM_REQ_EVENT, vcpu);
+   kvm_vcpu_kick(vcpu);
+   }
+out:
+   trace_kvm_apic_accept_irq(vcpu-vcpu_id, delivery_mode,
+   trig_mode, vector, !result);
break;
 
case APIC_DM_REMRD:
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 314b2ed..52b21da 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -84,7 +84,7 @@ module_param(vmm_exclusive, bool, S_IRUGO);
 static bool __read_mostly fasteoi = 1;
 module_param(fasteoi, bool, S_IRUGO);
 
-static bool __read_mostly enable_apicv;
+static bool __read_mostly enable_apicv = 1;
 module_param(enable_apicv, bool, S_IRUGO);
 
 /*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6147d24..628582f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2685,6 +2685,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
struct kvm_lapic_state *s)
 {
+   kvm_x86_ops-sync_pir_to_irr(vcpu);
memcpy(s-regs, vcpu-arch.apic-regs, sizeof *s);
 
return 0;
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] KVM: x86 emulator: Fix segment loading in VM86

2013-04-11 Thread Kevin Wolf

This fixes a regression introduced in commit 03ebebeb1 (KVM: x86
emulator: Leave segment limit and attributs alone in real mode).

The mentioned commit changed the segment descriptors for both real mode
and VM86 to only update the segment base instead of creating a
completely new descriptor with limit 0x so that unreal mode keeps
working across a segment register reload.

This leads to an invalid segment descriptor in the eyes of VMX, which
seems to be okay for real mode because KVM will fix it up before the
next VM entry or emulate the state, but it doesn't do this if the guest
is in VM86, so we end up with:

  KVM: entry failed, hardware error 0x8021

Fix this by effectively reverting commit 03ebebeb1 for VM86 and leaving
it only in place for real mode, which is where it's really needed.

Signed-off-by: Kevin Wolf kw...@redhat.com
---
 arch/x86/kvm/emulate.c | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a335cc6..069d799 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1578,12 +1578,21 @@ static int load_segment_descriptor(struct 
x86_emulate_ctxt *ctxt,
 
memset(seg_desc, 0, sizeof seg_desc);
 
-   if ((seg = VCPU_SREG_GS  ctxt-mode == X86EMUL_MODE_VM86)
-   || ctxt-mode == X86EMUL_MODE_REAL) {
-   /* set real mode segment descriptor */
+   if (ctxt-mode == X86EMUL_MODE_REAL) {
+   /* set real mode segment descriptor (keep limit etc. for
+* unreal mode) */
ctxt-ops-get_segment(ctxt, dummy, seg_desc, NULL, seg);
set_desc_base(seg_desc, selector  4);
goto load;
+   } else if (seg = VCPU_SREG_GS  ctxt-mode == X86EMUL_MODE_VM86) {
+   /* VM86 needs a clean new segment descriptor */
+   set_desc_base(seg_desc, selector  4);
+   set_desc_limit(seg_desc, 0x);
+   seg_desc.type = 3;
+   seg_desc.p = 1;
+   seg_desc.s = 1;
+   seg_desc.dpl = 3;
+   goto load;
}
 
rpl = selector  3;
-- 
1.8.1.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] KVM: x86 emulator: Fix segment loading in VM86

2013-04-11 Thread Gleb Natapov

On Thu, Apr 11, 2013 at 02:06:03PM +0200, Kevin Wolf wrote:
 This fixes a regression introduced in commit 03ebebeb1 (KVM: x86
 emulator: Leave segment limit and attributs alone in real mode).
 
 The mentioned commit changed the segment descriptors for both real mode
 and VM86 to only update the segment base instead of creating a
 completely new descriptor with limit 0x so that unreal mode keeps
 working across a segment register reload.
 
 This leads to an invalid segment descriptor in the eyes of VMX, which
 seems to be okay for real mode because KVM will fix it up before the
 next VM entry or emulate the state, but it doesn't do this if the guest
 is in VM86, so we end up with:
 
   KVM: entry failed, hardware error 0x8021
 
 Fix this by effectively reverting commit 03ebebeb1 for VM86 and leaving
 it only in place for real mode, which is where it's really needed.
 
 Signed-off-by: Kevin Wolf kw...@redhat.com
Applied, thanks.

 ---
  arch/x86/kvm/emulate.c | 15 ---
  1 file changed, 12 insertions(+), 3 deletions(-)
 
 diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
 index a335cc6..069d799 100644
 --- a/arch/x86/kvm/emulate.c
 +++ b/arch/x86/kvm/emulate.c
 @@ -1578,12 +1578,21 @@ static int load_segment_descriptor(struct 
 x86_emulate_ctxt *ctxt,
  
   memset(seg_desc, 0, sizeof seg_desc);
  
 - if ((seg = VCPU_SREG_GS  ctxt-mode == X86EMUL_MODE_VM86)
 - || ctxt-mode == X86EMUL_MODE_REAL) {
 - /* set real mode segment descriptor */
 + if (ctxt-mode == X86EMUL_MODE_REAL) {
 + /* set real mode segment descriptor (keep limit etc. for
 +  * unreal mode) */
   ctxt-ops-get_segment(ctxt, dummy, seg_desc, NULL, seg);
   set_desc_base(seg_desc, selector  4);
   goto load;
 + } else if (seg = VCPU_SREG_GS  ctxt-mode == X86EMUL_MODE_VM86) {
 + /* VM86 needs a clean new segment descriptor */
 + set_desc_base(seg_desc, selector  4);
 + set_desc_limit(seg_desc, 0x);
 + seg_desc.type = 3;
 + seg_desc.p = 1;
 + seg_desc.s = 1;
 + seg_desc.dpl = 3;
 + goto load;
   }
  
   rpl = selector  3;
 -- 
 1.8.1.4

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: RFC: vfio API changes needed for powerpc (v3)

2013-04-11 Thread Joerg Roedel

On Tue, Apr 09, 2013 at 01:22:15AM +, Yoder Stuart-B08248 wrote:
  What happens if a normal unmap call is done on the MSI iova?  Do we
  need a separate unmap?
 
 I was thinking a normal unmap on an MSI windows would be an error...but
 I'm not set on that.   I put the msi unmap there to make things symmetric,
 a normal unmap would work as well...and then we could drop the msi unmap.

Hmm, this API semantic isn't very clean. When you explicitly map the MSI
banks a clean API would also allow to unmap them. But that is not
possible in your design because the kernel is responsible for mapping
MSIs and you can't unmap a MSI bank that is in use by the kernel.

So since the kernel owns the MSI setup anyways it should also take care
of mapping the MSI banks. What is the reason to not let the kernel
allocate the MSI banks top-down from the end of the DMA window space?
Just let userspace know (or even set if needed) in advance how many of
the windows it configures the kernel will take for mapping MSI banks and
you are fine, no?


Joerg


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PULL 0/7] ppc patch queue 2013-03-22

2013-04-11 Thread Marcelo Tosatti

On Tue, Mar 26, 2013 at 12:59:04PM +1100, Paul Mackerras wrote:
 On Tue, Mar 26, 2013 at 03:33:12AM +0200, Gleb Natapov wrote:
  On Tue, Mar 26, 2013 at 12:35:09AM +0100, Alexander Graf wrote:
   I agree. So if it doesn't hurt to have the same commits in kvm/next and 
   kvm/master, I'd be more than happy to send another pull request with the 
   important fixes against kvm/master as well.
   
  If it will result in the same commit showing twice in the Linus tree in 
  3.10 we cannot do that.
 
 Why not?  In the circumstances it seems perfectly reasonable to me.
 Git should merge the branches without any problem, and even if it
 doesn't, Linus is good at fixing merge conflicts.
 
 Paul.

Yes, should avoid duplicate commits but its not fatal for them to exist.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PULL 0/7] ppc patch queue 2013-03-22

2013-04-11 Thread Alexander Graf


On 11.04.2013, at 15:45, Marcelo Tosatti wrote:

 On Tue, Mar 26, 2013 at 12:59:04PM +1100, Paul Mackerras wrote:
 On Tue, Mar 26, 2013 at 03:33:12AM +0200, Gleb Natapov wrote:
 On Tue, Mar 26, 2013 at 12:35:09AM +0100, Alexander Graf wrote:
 I agree. So if it doesn't hurt to have the same commits in kvm/next and 
 kvm/master, I'd be more than happy to send another pull request with the 
 important fixes against kvm/master as well.
 
 If it will result in the same commit showing twice in the Linus tree in 
 3.10 we cannot do that.
 
 Why not?  In the circumstances it seems perfectly reasonable to me.
 Git should merge the branches without any problem, and even if it
 doesn't, Linus is good at fixing merge conflicts.
 
 Paul.
 
 Yes, should avoid duplicate commits but its not fatal for them to exist.

So I may send a pull request against 3.9 with the 3 commits that already are in 
kvm/next?


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v3 4/5] KVM: nVMX: Fix conditions for interrupt injection

2013-04-11 Thread Jan Kiszka

On 2013-04-11 13:20, Gleb Natapov wrote:
 On Sun, Mar 24, 2013 at 07:44:47PM +0100, Jan Kiszka wrote:
 From: Jan Kiszka jan.kis...@siemens.com

 If we are in guest mode, L0 can only inject events into L2 if L1 has
 nothing pending. Otherwise, L0 would overwrite L1's events and they
 would get lost. But even if no injection of L1 is pending, we do not
 want L0 to interrupt unnecessarily an on going vmentry with all its side
 effects on the vmcs. Therefore, injection shall be disallowed during
 L1-L2 transitions. This check is conceptually independent of
 nested_exit_on_intr.

 If L1 traps external interrupts, then we also need to look at L1's
 idt_vectoring_info_field. If it is empty, we can kick the guest from L2
 to L1, just like the previous code worked.

 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---
  arch/x86/kvm/vmx.c |   28 
  1 files changed, 20 insertions(+), 8 deletions(-)

 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
 index d1bc834..30aa198 100644
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -4325,16 +4325,28 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
  
  static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
  {
 -if (is_guest_mode(vcpu)  nested_exit_on_intr(vcpu)) {
 +if (is_guest_mode(vcpu)) {
  struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 -if (to_vmx(vcpu)-nested.nested_run_pending ||
 -(vmcs12-idt_vectoring_info_field 
 - VECTORING_INFO_VALID_MASK))
 +
 +if (to_vmx(vcpu)-nested.nested_run_pending)
  return 0;
 -nested_vmx_vmexit(vcpu);
 -vmcs12-vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT;
 -vmcs12-vm_exit_intr_info = 0;
 -/* fall through to normal code, but now in L1, not L2 */
 +if (nested_exit_on_intr(vcpu)) {
 +/*
 + * Check if the idt_vectoring_info_field is free. We
 + * cannot raise EXIT_REASON_EXTERNAL_INTERRUPT if it
 + * isn't.
 + */
 +if (vmcs12-idt_vectoring_info_field 
 +VECTORING_INFO_VALID_MASK)
 +return 0;
 After patch 2 I do not see how this can be true. Now this case is
 handled by the common code: since event queue is not empty the code will not
 get here.

The event queue is unconditionally cleared (after being migrated to
vmcs12) in patch 2.

Jan




signature.asc
Description: OpenPGP digital signature

Re: [PATCH -v2] kvm: Emulate MOVBE

2013-04-11 Thread Gleb Natapov

On Thu, Apr 11, 2013 at 02:18:15AM +0200, Borislav Petkov wrote:
 On Wed, Apr 10, 2013 at 03:16:39PM +0300, Gleb Natapov wrote:
  Right, the question is how kernel can tell QEMU that the cpuid bit is
  supported but should not be set unless explicitly asked by an user.
 
 Actually, this seems to work with the patch below based on whether you
 have +movbe in the -cpu option or not.
 
The problem is that -cpu host will have it unconditionally and this is
definitely not what we want.

 Anyway, here's the second version with hopefully all comments and
 suggestions addressed.
 
Thanks, will review it later.

 Thanks.
 
 --
 From 612fc75a732ad16332f270b7c52a68c89e3565ca Mon Sep 17 00:00:00 2001
 From: Borislav Petkov b...@suse.de
 Date: Thu, 11 Apr 2013 02:06:30 +0200
 Subject: [PATCH] kvm: Emulate MOVBE
 
 This basically came from the need to be able to boot 32-bit Atom SMP
 guests on an AMD host, i.e. host which doesn't support MOVBE. As a
 matter of fact, qemu has since recently received MOVBE support but we
 cannot share that with kvm emulation and thus we have to do this in the
 host.
 
 We piggyback on the #UD path and emulate the MOVBE functionality. With
 it, an 8-core SMP guest boots in under 6 seconds.
 
 Also, requesting MOVBE emulation needs to happen explicitly to work,
 i.e. qemu -cpu n270,+movbe...
 
 Signed-off-by: Andre Przywara an...@andrep.de
 Signed-off-by: Borislav Petkov b...@suse.de
 ---
  arch/x86/kvm/cpuid.c   |  2 +-
  arch/x86/kvm/emulate.c | 39 +--
  2 files changed, 38 insertions(+), 3 deletions(-)
 
 diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
 index a20ecb5b6cbf..2d44fc4fd855 100644
 --- a/arch/x86/kvm/cpuid.c
 +++ b/arch/x86/kvm/cpuid.c
 @@ -273,7 +273,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, 
 u32 function,
   cpuid_mask(entry-ecx, 4);
   /* we support x2apic emulation even if host does not support
* it since we emulate x2apic in software */
 - entry-ecx |= F(X2APIC);
 + entry-ecx |= F(X2APIC) | F(MOVBE);
   break;
   /* function 2 entries are STATEFUL. That is, repeated cpuid commands
* may return different values. This forces us to get_cpu() before
 diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
 index a335cc6cde72..9011c7a656ad 100644
 --- a/arch/x86/kvm/emulate.c
 +++ b/arch/x86/kvm/emulate.c
 @@ -152,6 +152,7 @@
  #define Avx ((u64)1  43)  /* Advanced Vector Extensions */
  #define Fastop  ((u64)1  44)  /* Use opcode::u.fastop */
  #define NoWrite ((u64)1  45)  /* No writeback */
 +#define EmulateOnUD ((u64)1  46)  /* emulate if unsupported by the host */
  
  #define X2(x...) x, x
  #define X3(x...) X2(x), x
 @@ -3107,6 +3108,30 @@ static int em_mov(struct x86_emulate_ctxt *ctxt)
   return X86EMUL_CONTINUE;
  }
  
 +static int em_movbe(struct x86_emulate_ctxt *ctxt)
 +{
 + switch (ctxt-op_bytes) {
 + case 2:
 + *(u16 *)ctxt-dst.valptr = swab16(*(u16 *)ctxt-src.valptr);
 + break;
 + case 4:
 + *(u32 *)ctxt-dst.valptr = swab32(*(u32 *)ctxt-src.valptr);
 +
 + /*
 +  * clear upper dword for 32-bit operand size in 64-bit mode.
 +  */
 + if (ctxt-mode == X86EMUL_MODE_PROT64)
 + *((u32 *)ctxt-dst.valptr + 1) = 0x0;
 + break;
 + case 8:
 + *(u64 *)ctxt-dst.valptr = swab64(*(u64 *)ctxt-src.valptr);
 + break;
 + default:
 + return X86EMUL_PROPAGATE_FAULT;
 + }
 + return X86EMUL_CONTINUE;
 +}
 +
  static int em_cr_write(struct x86_emulate_ctxt *ctxt)
  {
   if (ctxt-ops-set_cr(ctxt, ctxt-modrm_reg, ctxt-src.val))
 @@ -4033,6 +4058,11 @@ static const struct opcode twobyte_table[256] = {
   N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
  };
  
 +static const struct opcode threebyte_table[] = {
 + [0xf0] = I(DstReg | SrcMem | ModRM | Mov | EmulateOnUD, em_movbe),
 + [0xf1] = I(DstMem | SrcReg | ModRM | Mov | EmulateOnUD, em_movbe),
 +};
 +
  #undef D
  #undef N
  #undef G
 @@ -4320,6 +4350,9 @@ done_prefixes:
   ctxt-twobyte = 1;
   ctxt-b = insn_fetch(u8, ctxt);
   opcode = twobyte_table[ctxt-b];
 +
 + if (ctxt-b == 0x38)
 + opcode = threebyte_table[insn_fetch(u8, ctxt)];
   }
   ctxt-d = opcode.flags;
  
 @@ -4376,8 +4409,10 @@ done_prefixes:
   if (ctxt-d == 0 || (ctxt-d  Undefined))
   return EMULATION_FAILED;
  
 - if (!(ctxt-d  VendorSpecific)  ctxt-only_vendor_specific_insn)
 - return EMULATION_FAILED;
 + if (!(ctxt-d  VendorSpecific)  ctxt-only_vendor_specific_insn) {
 + if (!(ctxt-d  EmulateOnUD))
 + return EMULATION_FAILED;
 + }
  
   if (mode == X86EMUL_MODE_PROT64  (ctxt-d  Stack))
   ctxt-op_bytes = 8;
 -- 
 1.8.2.135.g7b592fa

Re: [PATCH v3 4/5] KVM: nVMX: Fix conditions for interrupt injection

2013-04-11 Thread Gleb Natapov

On Thu, Apr 11, 2013 at 04:27:23PM +0200, Jan Kiszka wrote:
 On 2013-04-11 13:20, Gleb Natapov wrote:
  On Sun, Mar 24, 2013 at 07:44:47PM +0100, Jan Kiszka wrote:
  From: Jan Kiszka jan.kis...@siemens.com
 
  If we are in guest mode, L0 can only inject events into L2 if L1 has
  nothing pending. Otherwise, L0 would overwrite L1's events and they
  would get lost. But even if no injection of L1 is pending, we do not
  want L0 to interrupt unnecessarily an on going vmentry with all its side
  effects on the vmcs. Therefore, injection shall be disallowed during
  L1-L2 transitions. This check is conceptually independent of
  nested_exit_on_intr.
 
  If L1 traps external interrupts, then we also need to look at L1's
  idt_vectoring_info_field. If it is empty, we can kick the guest from L2
  to L1, just like the previous code worked.
 
  Signed-off-by: Jan Kiszka jan.kis...@siemens.com
  ---
   arch/x86/kvm/vmx.c |   28 
   1 files changed, 20 insertions(+), 8 deletions(-)
 
  diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
  index d1bc834..30aa198 100644
  --- a/arch/x86/kvm/vmx.c
  +++ b/arch/x86/kvm/vmx.c
  @@ -4325,16 +4325,28 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
   
   static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
   {
  -  if (is_guest_mode(vcpu)  nested_exit_on_intr(vcpu)) {
  +  if (is_guest_mode(vcpu)) {
 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
  -  if (to_vmx(vcpu)-nested.nested_run_pending ||
  -  (vmcs12-idt_vectoring_info_field 
  -   VECTORING_INFO_VALID_MASK))
  +
  +  if (to_vmx(vcpu)-nested.nested_run_pending)
 return 0;
  -  nested_vmx_vmexit(vcpu);
  -  vmcs12-vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT;
  -  vmcs12-vm_exit_intr_info = 0;
  -  /* fall through to normal code, but now in L1, not L2 */
  +  if (nested_exit_on_intr(vcpu)) {
  +  /*
  +   * Check if the idt_vectoring_info_field is free. We
  +   * cannot raise EXIT_REASON_EXTERNAL_INTERRUPT if it
  +   * isn't.
  +   */
  +  if (vmcs12-idt_vectoring_info_field 
  +  VECTORING_INFO_VALID_MASK)
  +  return 0;
  After patch 2 I do not see how this can be true. Now this case is
  handled by the common code: since event queue is not empty the code will not
  get here.
 
 The event queue is unconditionally cleared (after being migrated to
 vmcs12) in patch 2.
 
During vmexit, yes. But here we are in if(is_guest_mode(vcpu)).

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 3/4] kvm/ppc/e500: g2h_tlb1_map: clear old bit before setting new bit

2013-04-11 Thread Alexander Graf

From: Scott Wood scottw...@freescale.com

It's possible that we're using the same host TLB1 slot to map (a
presumably different portion of) the same guest TLB1 entry.  Clear
the bit in the map before setting it, so that if the esels are the same
the bit will remain set.

Signed-off-by: Scott Wood scottw...@freescale.com
Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/powerpc/kvm/e500_mmu_host.c |7 ---
 1 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 35fb80e..8e72b21 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -507,13 +507,14 @@ static int kvmppc_e500_tlb1_map_tlb1(struct 
kvmppc_vcpu_e500 *vcpu_e500,
if (unlikely(vcpu_e500-host_tlb1_nv = tlb1_max_shadow_size()))
vcpu_e500-host_tlb1_nv = 0;
 
-   vcpu_e500-tlb_refs[1][sesel] = *ref;
-   vcpu_e500-g2h_tlb1_map[esel] |= (u64)1  sesel;
-   vcpu_e500-gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP;
if (vcpu_e500-h2g_tlb1_rmap[sesel]) {
unsigned int idx = vcpu_e500-h2g_tlb1_rmap[sesel] - 1;
vcpu_e500-g2h_tlb1_map[idx] = ~(1ULL  sesel);
}
+
+   vcpu_e500-tlb_refs[1][sesel] = *ref;
+   vcpu_e500-gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP;
+   vcpu_e500-g2h_tlb1_map[esel] |= (u64)1  sesel;
vcpu_e500-h2g_tlb1_rmap[sesel] = esel + 1;
 
return sesel;
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 4/4] kvm/ppc/e500: eliminate tlb_refs

2013-04-11 Thread Alexander Graf

From: Scott Wood scottw...@freescale.com

Commit 523f0e5421c12610527c620b983b443f329e3a32 (KVM: PPC: E500:
Explicitly mark shadow maps invalid) began using E500_TLB_VALID
for guest TLB1 entries, and skipping invalidations if it's not set.

However, when E500_TLB_VALID was set for such entries, it was on a
fake local ref, and so the invalidations never happen.  gtlb_privs
is documented as being only for guest TLB0, though we already violate
that with E500_TLB_BITMAP.

Now that we have MMU notifiers, and thus don't need to actually
retain a reference to the mapped pages, get rid of tlb_refs, and
use gtlb_privs for E500_TLB_VALID in TLB1.

Since we can have more than one host TLB entry for a given tlbe_ref,
be careful not to clear existing flags that are relevant to other
host TLB entries when preparing a new host TLB entry.

Signed-off-by: Scott Wood scottw...@freescale.com
Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/powerpc/kvm/e500.h  |   24 
 arch/powerpc/kvm/e500_mmu_host.c |   75 +++---
 2 files changed, 30 insertions(+), 69 deletions(-)

diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index 41cefd4..33db48a 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -26,17 +26,20 @@
 #define E500_PID_NUM   3
 #define E500_TLB_NUM   2
 
-#define E500_TLB_VALID 1
-#define E500_TLB_BITMAP 2
+/* entry is mapped somewhere in host TLB */
+#define E500_TLB_VALID (1  0)
+/* TLB1 entry is mapped by host TLB1, tracked by bitmaps */
+#define E500_TLB_BITMAP(1  1)
+/* TLB1 entry is mapped by host TLB0 */
 #define E500_TLB_TLB0  (1  2)
 
 struct tlbe_ref {
-   pfn_t pfn;
-   unsigned int flags; /* E500_TLB_* */
+   pfn_t pfn;  /* valid only for TLB0, except briefly */
+   unsigned int flags; /* E500_TLB_* */
 };
 
 struct tlbe_priv {
-   struct tlbe_ref ref; /* TLB0 only -- TLB1 uses tlb_refs */
+   struct tlbe_ref ref;
 };
 
 #ifdef CONFIG_KVM_E500V2
@@ -63,17 +66,6 @@ struct kvmppc_vcpu_e500 {
 
unsigned int gtlb_nv[E500_TLB_NUM];
 
-   /*
-* information associated with each host TLB entry --
-* TLB1 only for now.  If/when guest TLB1 entries can be
-* mapped with host TLB0, this will be used for that too.
-*
-* We don't want to use this for guest TLB0 because then we'd
-* have the overhead of doing the translation again even if
-* the entry is still in the guest TLB (e.g. we swapped out
-* and back, and our host TLB entries got evicted).
-*/
-   struct tlbe_ref *tlb_refs[E500_TLB_NUM];
unsigned int host_tlb1_nv;
 
u32 svr;
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 8e72b21..1c6a9d7 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -193,8 +193,11 @@ void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 
*vcpu_e500, int tlbsel,
struct tlbe_ref *ref = vcpu_e500-gtlb_priv[tlbsel][esel].ref;
 
/* Don't bother with unmapped entries */
-   if (!(ref-flags  E500_TLB_VALID))
-   return;
+   if (!(ref-flags  E500_TLB_VALID)) {
+   WARN(ref-flags  (E500_TLB_BITMAP | E500_TLB_TLB0),
+%s: flags %x\n, __func__, ref-flags);
+   WARN_ON(tlbsel == 1  vcpu_e500-g2h_tlb1_map[esel]);
+   }
 
if (tlbsel == 1  ref-flags  E500_TLB_BITMAP) {
u64 tmp = vcpu_e500-g2h_tlb1_map[esel];
@@ -248,7 +251,7 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref 
*ref,
 pfn_t pfn)
 {
ref-pfn = pfn;
-   ref-flags = E500_TLB_VALID;
+   ref-flags |= E500_TLB_VALID;
 
if (tlbe_is_writable(gtlbe))
kvm_set_pfn_dirty(pfn);
@@ -257,6 +260,7 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref 
*ref,
 static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
 {
if (ref-flags  E500_TLB_VALID) {
+   /* FIXME: don't log bogus pfn for TLB1 */
trace_kvm_booke206_ref_release(ref-pfn, ref-flags);
ref-flags = 0;
}
@@ -274,36 +278,23 @@ static void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 
*vcpu_e500)
 
 static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
-   int tlbsel = 0;
-   int i;
-
-   for (i = 0; i  vcpu_e500-gtlb_params[tlbsel].entries; i++) {
-   struct tlbe_ref *ref =
-   vcpu_e500-gtlb_priv[tlbsel][i].ref;
-   kvmppc_e500_ref_release(ref);
-   }
-}
-
-static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500)
-{
-   int stlbsel = 1;
+   int tlbsel;
int i;
 
-   kvmppc_e500_tlbil_all(vcpu_e500);
-
-   for (i = 0; i  host_tlb_params[stlbsel].entries; i++) {
-   struct tlbe_ref *ref =
-   vcpu_e500-tlb_refs[stlbsel][i];
-

[PULL 3.9 0/4] ppc patch queue 2013-04-11 for 3.9

2013-04-11 Thread Alexander Graf

Hi Marcelo / Gleb,

This is my current patch queue for ppc against master for 3.9.  Please pull.

This patch set contains a number of patches fixing regressions in e500 KVM code.

Some of these patches (the top 3 ones) also went into kvm/next by accident, so
there will be duplicate git commits with these later. But IMHO I'd prefer to
have a working 3.9 release rather than a clean git history.

Alex


The following changes since commit 31880c37c11e28cb81c70757e38392b42e695dc6:
  Linus Torvalds (1):
Linux 3.9-rc6

are available in the git repository at:

  git://github.com/agraf/linux-2.6.git kvm-ppc-3.9

Scott Wood (4):
  kvm/powerpc/e500mc: fix tlb invalidation on cpu migration
  kvm/ppc/e500: h2g_tlb1_rmap: esel 0 is valid
  kvm/ppc/e500: g2h_tlb1_map: clear old bit before setting new bit
  kvm/ppc/e500: eliminate tlb_refs

 arch/powerpc/kvm/e500.h  |   24 ---
 arch/powerpc/kvm/e500_mmu_host.c |   84 -
 arch/powerpc/kvm/e500mc.c|7 +++-
 3 files changed, 41 insertions(+), 74 deletions(-)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2/4] kvm/ppc/e500: h2g_tlb1_rmap: esel 0 is valid

2013-04-11 Thread Alexander Graf

From: Scott Wood scottw...@freescale.com

Add one to esel values in h2g_tlb1_rmap, so that no mapping can be
distinguished from esel 0.  Note that we're not saved by the fact
that host esel 0 is reserved for non-KVM use, because KVM host esel
numbering is not the raw host numbering (see to_htlb1_esel).

Signed-off-by: Scott Wood scottw...@freescale.com
Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/powerpc/kvm/e500_mmu_host.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index a222edf..35fb80e 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -511,10 +511,10 @@ static int kvmppc_e500_tlb1_map_tlb1(struct 
kvmppc_vcpu_e500 *vcpu_e500,
vcpu_e500-g2h_tlb1_map[esel] |= (u64)1  sesel;
vcpu_e500-gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP;
if (vcpu_e500-h2g_tlb1_rmap[sesel]) {
-   unsigned int idx = vcpu_e500-h2g_tlb1_rmap[sesel];
+   unsigned int idx = vcpu_e500-h2g_tlb1_rmap[sesel] - 1;
vcpu_e500-g2h_tlb1_map[idx] = ~(1ULL  sesel);
}
-   vcpu_e500-h2g_tlb1_rmap[sesel] = esel;
+   vcpu_e500-h2g_tlb1_rmap[sesel] = esel + 1;
 
return sesel;
 }
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/4] kvm/powerpc/e500mc: fix tlb invalidation on cpu migration

2013-04-11 Thread Alexander Graf

From: Scott Wood scottw...@freescale.com

The existing check handles the case where we've migrated to a different
core than we last ran on, but it doesn't handle the case where we're
still on the same cpu we last ran on, but some other vcpu has run on
this cpu in the meantime.

Without this, guest segfaults (and other misbehavior) have been seen in
smp guests.

Cc: sta...@vger.kernel.org # 3.8.x
Signed-off-by: Scott Wood scottw...@freescale.com
Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/powerpc/kvm/e500mc.c |7 ++-
 1 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 1f89d26..2f4baa0 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -108,6 +108,8 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 
old_msr)
 {
 }
 
+static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu_on_cpu);
+
 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
@@ -136,8 +138,11 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
mtspr(SPRN_GDEAR, vcpu-arch.shared-dar);
mtspr(SPRN_GESR, vcpu-arch.shared-esr);
 
-   if (vcpu-arch.oldpir != mfspr(SPRN_PIR))
+   if (vcpu-arch.oldpir != mfspr(SPRN_PIR) ||
+   __get_cpu_var(last_vcpu_on_cpu) != vcpu) {
kvmppc_e500_tlbil_all(vcpu_e500);
+   __get_cpu_var(last_vcpu_on_cpu) = vcpu;
+   }
 
kvmppc_load_guest_fp(vcpu);
 }
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH -v2] kvm: Emulate MOVBE

2013-04-11 Thread Borislav Petkov

On Thu, Apr 11, 2013 at 05:28:18PM +0300, Gleb Natapov wrote:
 On Thu, Apr 11, 2013 at 02:18:15AM +0200, Borislav Petkov wrote:
  On Wed, Apr 10, 2013 at 03:16:39PM +0300, Gleb Natapov wrote:
   Right, the question is how kernel can tell QEMU that the cpuid bit is
   supported but should not be set unless explicitly asked by an user.
  
  Actually, this seems to work with the patch below based on whether you
  have +movbe in the -cpu option or not.
  
 The problem is that -cpu host will have it unconditionally and this is
 definitely not what we want.

Hmm, ok, I see what you mean. -cpu host boots the atom kernel just fine.

Well, with my limited qemu exposure, I'd guess
cpu_x86_parse_featurestr() would need to somehow say to
x86_cpu_realizefn that it has parsed a +movbe on the command line and
the latter has to keep that bit enabled when doing the checks against
kvm in kvm_check_features_against_host and filter_features_for_kvm().

Unless you have a better idea, that is.

Thanks.

-- 
Regards/Gruss,
Boris.

Sent from a fat crate under my desk. Formatting is fine.
--
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: KVM: x86: drop alignment checks from KVM_MSR_SYSTEM_TIME address

2013-04-11 Thread Marcelo Tosatti

On Sat, Mar 23, 2013 at 04:12:11PM +0200, Gleb Natapov wrote:
 On Fri, Mar 22, 2013 at 05:17:38PM -0700, Andrew Honig wrote:
  kvm_write_guest would work, but it will hurt performance a bit because
  it'll be doing the address translation each time the time is updated,
  which happens on most guest enters.
  
 Time updates are rare, so this should no be an issue. Marcelo?

Yes, performance is not an issue at this level.

  Another possibility would be to change kvm_gfn_to_hva_cache_init to
  accept a size parameter.  If the requested range is all on one page
  then it operates the same as it currently does.  If the address range
  is on more than one page then it falls back to kvm_write_guest.  This
  preserves the good performance for all cases that currently work,
  while still supporting the unlikely case of page straddling requests.
  It also makes it harder to write a security bugs for other callers of
  kvm_gfn_to_hva_cache_init by explicitly requiring a size parameter.
  
  I can write a patch if you like the idea.
 Nice idea. Send a patch please.
 
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: RFC: vfio API changes needed for powerpc (v3)

2013-04-11 Thread Yoder Stuart-B08248

 -Original Message-
 From: Joerg Roedel [mailto:j...@8bytes.org]
 Sent: Thursday, April 11, 2013 7:57 AM
 To: Yoder Stuart-B08248
 Cc: Wood Scott-B07421; kvm@vger.kernel.org; qemu-de...@nongnu.org; 
 io...@lists.linux-foundation.org;
 ag...@suse.de; Bhushan Bharat-R65777
 Subject: Re: RFC: vfio API changes needed for powerpc (v3)

 On Tue, Apr 09, 2013 at 01:22:15AM +, Yoder Stuart-B08248 wrote:
   What happens if a normal unmap call is done on the MSI iova?  Do we
   need a separate unmap?

  I was thinking a normal unmap on an MSI windows would be an error...but
  I'm not set on that.   I put the msi unmap there to make things symmetric,
  a normal unmap would work as well...and then we could drop the msi unmap.

 Hmm, this API semantic isn't very clean. When you explicitly map the MSI
 banks a clean API would also allow to unmap them. But that is not
 possible in your design because the kernel is responsible for mapping
 MSIs and you can't unmap a MSI bank that is in use by the kernel.

The mapping that the vfio API creates is specific only to the
assigned device.   So it can be unmapped without affecting
any other device... there is nothing else in the kernel making
the mapping in use.  Another device in use by the kernel using the
same MSI bank would have its own independent mapping.   So, these
mappings are not global but are device specific...just like normal
DMA memory mappings.

 So since the kernel owns the MSI setup anyways it should also take care
 of mapping the MSI banks. What is the reason to not let the kernel
 allocate the MSI banks top-down from the end of the DMA window space?
 Just let userspace know (or even set if needed) in advance how many of
 the windows it configures the kernel will take for mapping MSI banks and
 you are fine, no?

As designed the API lets user space determine the number of windows
needed for MSIs, so they can be set.  The only difference between
what we've proposed and what you described, I think, is that the proposal
allows user space to _which_ windows are used for which MSI banks.

Stuart

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 0/5] Usual batch of random ARM fixes for kvmtool

2013-04-11 Thread Will Deacon

Hello folks,

Here's the latest round of ARM fixes and updates for kvmtool. Most of
this is confined to the arm/ subdirectory, with the exception of a fix
to the virtio-mmio vq definitions due to the multi-queue work from
Sasha. I'm not terribly happy about that code though, since it seriously
increases the memory footprint of the guest.

Without multi-queue, we can boot Debian Wheezy to a prompt in 38MB. With
the new changes, that increases to 170MB! Any chance we can try and tackle
this regression please? I keep getting bitten by the OOM killer :(

Will

Marc Zyngier (4):
  kvm tools: arm: don't crash when no compatible CPU is found
  kvm tools: arm: add CPU compatible string to target structure
  kvm tools: arm: consolidate CPU node generation
  kvm tools: arm64: add support for AEM and Foundation models

Will Deacon (1):
  kvm tools: bump number of virtio MMIO vqueues

 tools/kvm/arm/aarch32/cortex-a15.c  | 39 ++--
 tools/kvm/arm/aarch64/cortex-a57.c  | 59 +
 tools/kvm/arm/fdt.c | 35 +--
 tools/kvm/arm/include/arm-common/kvm-cpu-arch.h |  6 ++-
 tools/kvm/arm/kvm-cpu.c | 11 +++--
 tools/kvm/include/kvm/virtio-mmio.h |  2 +-
 6 files changed, 70 insertions(+), 82 deletions(-)

-- 
1.8.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 3/5] kvm tools: arm: consolidate CPU node generation

2013-04-11 Thread Will Deacon

From: Marc Zyngier marc.zyng...@arm.com

Now that generate_cpu_nodes uses the cpu_compatible field to
output the compatible property, we can unify the A15 and A57
implementations, as they are strictly identical.

Move the function to fdt.c, together with most of the device
tree generation.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
Signed-off-by: Will Deacon will.dea...@arm.com
---
 tools/kvm/arm/aarch32/cortex-a15.c | 29 -
 tools/kvm/arm/aarch64/cortex-a57.c | 29 -
 tools/kvm/arm/fdt.c| 35 ---
 3 files changed, 32 insertions(+), 61 deletions(-)

diff --git a/tools/kvm/arm/aarch32/cortex-a15.c 
b/tools/kvm/arm/aarch32/cortex-a15.c
index 4030e53..ca65af7 100644
--- a/tools/kvm/arm/aarch32/cortex-a15.c
+++ b/tools/kvm/arm/aarch32/cortex-a15.c
@@ -8,34 +8,6 @@
 #include linux/byteorder.h
 #include linux/types.h
 
-#define CPU_NAME_MAX_LEN 8
-static void generate_cpu_nodes(void *fdt, struct kvm *kvm)
-{
-   int cpu;
-
-   _FDT(fdt_begin_node(fdt, cpus));
-   _FDT(fdt_property_cell(fdt, #address-cells, 0x1));
-   _FDT(fdt_property_cell(fdt, #size-cells, 0x0));
-
-   for (cpu = 0; cpu  kvm-nrcpus; ++cpu) {
-   char cpu_name[CPU_NAME_MAX_LEN];
-
-   snprintf(cpu_name, CPU_NAME_MAX_LEN, cpu@%d, cpu);
-
-   _FDT(fdt_begin_node(fdt, cpu_name));
-   _FDT(fdt_property_string(fdt, device_type, cpu));
-   _FDT(fdt_property_string(fdt, compatible, 
kvm-cpus[cpu]-cpu_compatible));
-
-   if (kvm-nrcpus  1)
-   _FDT(fdt_property_string(fdt, enable-method, psci));
-
-   _FDT(fdt_property_cell(fdt, reg, cpu));
-   _FDT(fdt_end_node(fdt));
-   }
-
-   _FDT(fdt_end_node(fdt));
-}
-
 static void generate_timer_nodes(void *fdt, struct kvm *kvm)
 {
u32 cpu_mask = (((1  kvm-nrcpus) - 1)  GIC_FDT_IRQ_PPI_CPU_SHIFT) \
@@ -66,7 +38,6 @@ static void generate_timer_nodes(void *fdt, struct kvm *kvm)
 
 static void generate_fdt_nodes(void *fdt, struct kvm *kvm, u32 gic_phandle)
 {
-   generate_cpu_nodes(fdt, kvm);
gic__generate_fdt_nodes(fdt, gic_phandle);
generate_timer_nodes(fdt, kvm);
 }
diff --git a/tools/kvm/arm/aarch64/cortex-a57.c 
b/tools/kvm/arm/aarch64/cortex-a57.c
index f636ef7..5b0dc4c 100644
--- a/tools/kvm/arm/aarch64/cortex-a57.c
+++ b/tools/kvm/arm/aarch64/cortex-a57.c
@@ -8,34 +8,6 @@
 #include linux/byteorder.h
 #include linux/types.h
 
-#define CPU_NAME_MAX_LEN 8
-static void generate_cpu_nodes(void *fdt, struct kvm *kvm)
-{
-   int cpu;
-
-   _FDT(fdt_begin_node(fdt, cpus));
-   _FDT(fdt_property_cell(fdt, #address-cells, 0x1));
-   _FDT(fdt_property_cell(fdt, #size-cells, 0x0));
-
-   for (cpu = 0; cpu  kvm-nrcpus; ++cpu) {
-   char cpu_name[CPU_NAME_MAX_LEN];
-
-   snprintf(cpu_name, CPU_NAME_MAX_LEN, cpu@%d, cpu);
-
-   _FDT(fdt_begin_node(fdt, cpu_name));
-   _FDT(fdt_property_string(fdt, device_type, cpu));
-   _FDT(fdt_property_string(fdt, compatible, 
kvm-cpus[cpu]-cpu_compatible));
-
-   if (kvm-nrcpus  1)
-   _FDT(fdt_property_string(fdt, enable-method, psci));
-
-   _FDT(fdt_property_cell(fdt, reg, cpu));
-   _FDT(fdt_end_node(fdt));
-   }
-
-   _FDT(fdt_end_node(fdt));
-}
-
 static void generate_timer_nodes(void *fdt, struct kvm *kvm)
 {
u32 cpu_mask = (((1  kvm-nrcpus) - 1)  GIC_FDT_IRQ_PPI_CPU_SHIFT) \
@@ -66,7 +38,6 @@ static void generate_timer_nodes(void *fdt, struct kvm *kvm)
 
 static void generate_fdt_nodes(void *fdt, struct kvm *kvm, u32 gic_phandle)
 {
-   generate_cpu_nodes(fdt, kvm);
gic__generate_fdt_nodes(fdt, gic_phandle);
generate_timer_nodes(fdt, kvm);
 }
diff --git a/tools/kvm/arm/fdt.c b/tools/kvm/arm/fdt.c
index 20e0308..c61bf58 100644
--- a/tools/kvm/arm/fdt.c
+++ b/tools/kvm/arm/fdt.c
@@ -41,6 +41,34 @@ static void dump_fdt(const char *dtb_file, void *fdt)
close(fd);
 }
 
+#define CPU_NAME_MAX_LEN 8
+static void generate_cpu_nodes(void *fdt, struct kvm *kvm)
+{
+   int cpu;
+
+   _FDT(fdt_begin_node(fdt, cpus));
+   _FDT(fdt_property_cell(fdt, #address-cells, 0x1));
+   _FDT(fdt_property_cell(fdt, #size-cells, 0x0));
+
+   for (cpu = 0; cpu  kvm-nrcpus; ++cpu) {
+   char cpu_name[CPU_NAME_MAX_LEN];
+
+   snprintf(cpu_name, CPU_NAME_MAX_LEN, cpu@%d, cpu);
+
+   _FDT(fdt_begin_node(fdt, cpu_name));
+   _FDT(fdt_property_string(fdt, device_type, cpu));
+   _FDT(fdt_property_string(fdt, compatible, 
kvm-cpus[cpu]-cpu_compatible));
+
+   if (kvm-nrcpus  1)
+   _FDT(fdt_property_string(fdt, enable-method, psci));
+
+   _FDT(fdt_property_cell(fdt, reg, cpu));
+   _FDT(fdt_end_node(fdt));
+   }
+
+

[PATCH 5/5] kvm tools: bump number of virtio MMIO vqueues

2013-04-11 Thread Will Deacon

Commit 4d789d4a2050 (kvm tools: Increase amount of possible interrupts
per PCI device) increased the maximum amount of virtio queues for the
PCI transport, but neglected to do the same for MMIO.

This patch makes the same change for virtio-mmio.

Cc: Sasha Levin sasha.le...@oracle.com
Reported-by: Marc Zyngier marc.zyng...@arm.com
Signed-off-by: Will Deacon will.dea...@arm.com
---

Sasha -- although this fixes a SEGV when booting a guest which tries to
use virtio-net over MMIO, it *drastically* increases the memory footprint
of a guest kernel to unacceptable levels (38MB - 170MB).

 tools/kvm/include/kvm/virtio-mmio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/kvm/include/kvm/virtio-mmio.h 
b/tools/kvm/include/kvm/virtio-mmio.h
index 983c8fc..4d6a671 100644
--- a/tools/kvm/include/kvm/virtio-mmio.h
+++ b/tools/kvm/include/kvm/virtio-mmio.h
@@ -4,7 +4,7 @@
 #include linux/types.h
 #include linux/virtio_mmio.h
 
-#define VIRTIO_MMIO_MAX_VQ 3
+#define VIRTIO_MMIO_MAX_VQ 32
 #define VIRTIO_MMIO_MAX_CONFIG 1
 #define VIRTIO_MMIO_IO_SIZE0x200
 
-- 
1.8.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2/5] kvm tools: arm: add CPU compatible string to target structure

2013-04-11 Thread Will Deacon

From: Marc Zyngier marc.zyng...@arm.com

Instead of hardcoding the CPU compatible string, store it in
the target descriptor. This allows similar CPUs to be managed
by the same backend, and yet have different compatible
properties.

Also remove a check for a condition that can never occur in both
A15 and A57 backends.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
Signed-off-by: Will Deacon will.dea...@arm.com
---
 tools/kvm/arm/aarch32/cortex-a15.c  | 12 
 tools/kvm/arm/aarch64/cortex-a57.c  | 12 
 tools/kvm/arm/include/arm-common/kvm-cpu-arch.h |  6 --
 tools/kvm/arm/kvm-cpu.c |  9 ++---
 4 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/tools/kvm/arm/aarch32/cortex-a15.c 
b/tools/kvm/arm/aarch32/cortex-a15.c
index 8031747..4030e53 100644
--- a/tools/kvm/arm/aarch32/cortex-a15.c
+++ b/tools/kvm/arm/aarch32/cortex-a15.c
@@ -20,16 +20,11 @@ static void generate_cpu_nodes(void *fdt, struct kvm *kvm)
for (cpu = 0; cpu  kvm-nrcpus; ++cpu) {
char cpu_name[CPU_NAME_MAX_LEN];
 
-   if (kvm-cpus[cpu]-cpu_type != KVM_ARM_TARGET_CORTEX_A15) {
-   pr_warning(Ignoring unknown type for CPU %d\n, cpu);
-   continue;
-   }
-
snprintf(cpu_name, CPU_NAME_MAX_LEN, cpu@%d, cpu);
 
_FDT(fdt_begin_node(fdt, cpu_name));
_FDT(fdt_property_string(fdt, device_type, cpu));
-   _FDT(fdt_property_string(fdt, compatible, arm,cortex-a15));
+   _FDT(fdt_property_string(fdt, compatible, 
kvm-cpus[cpu]-cpu_compatible));
 
if (kvm-nrcpus  1)
_FDT(fdt_property_string(fdt, enable-method, psci));
@@ -83,8 +78,9 @@ static int cortex_a15__vcpu_init(struct kvm_cpu *vcpu)
 }
 
 static struct kvm_arm_target target_cortex_a15 = {
-   .id = KVM_ARM_TARGET_CORTEX_A15,
-   .init   = cortex_a15__vcpu_init,
+   .id = KVM_ARM_TARGET_CORTEX_A15,
+   .compatible = arm,cortex-a15,
+   .init   = cortex_a15__vcpu_init,
 };
 
 static int cortex_a15__core_init(struct kvm *kvm)
diff --git a/tools/kvm/arm/aarch64/cortex-a57.c 
b/tools/kvm/arm/aarch64/cortex-a57.c
index 4fd11ba..f636ef7 100644
--- a/tools/kvm/arm/aarch64/cortex-a57.c
+++ b/tools/kvm/arm/aarch64/cortex-a57.c
@@ -20,16 +20,11 @@ static void generate_cpu_nodes(void *fdt, struct kvm *kvm)
for (cpu = 0; cpu  kvm-nrcpus; ++cpu) {
char cpu_name[CPU_NAME_MAX_LEN];
 
-   if (kvm-cpus[cpu]-cpu_type != KVM_ARM_TARGET_CORTEX_A57) {
-   pr_warning(Ignoring unknown type for CPU %d\n, cpu);
-   continue;
-   }
-
snprintf(cpu_name, CPU_NAME_MAX_LEN, cpu@%d, cpu);
 
_FDT(fdt_begin_node(fdt, cpu_name));
_FDT(fdt_property_string(fdt, device_type, cpu));
-   _FDT(fdt_property_string(fdt, compatible, arm,cortex-a57));
+   _FDT(fdt_property_string(fdt, compatible, 
kvm-cpus[cpu]-cpu_compatible));
 
if (kvm-nrcpus  1)
_FDT(fdt_property_string(fdt, enable-method, psci));
@@ -84,8 +79,9 @@ static int cortex_a57__vcpu_init(struct kvm_cpu *vcpu)
 }
 
 static struct kvm_arm_target target_cortex_a57 = {
-   .id = KVM_ARM_TARGET_CORTEX_A57,
-   .init   = cortex_a57__vcpu_init,
+   .id = KVM_ARM_TARGET_CORTEX_A57,
+   .compatible = arm,cortex-a57,
+   .init   = cortex_a57__vcpu_init,
 };
 
 static int cortex_a57__core_init(struct kvm *kvm)
diff --git a/tools/kvm/arm/include/arm-common/kvm-cpu-arch.h 
b/tools/kvm/arm/include/arm-common/kvm-cpu-arch.h
index 351fbe6..b514dd5 100644
--- a/tools/kvm/arm/include/arm-common/kvm-cpu-arch.h
+++ b/tools/kvm/arm/include/arm-common/kvm-cpu-arch.h
@@ -12,6 +12,7 @@ struct kvm_cpu {
 
unsigned long   cpu_id;
unsigned long   cpu_type;
+   const char  *cpu_compatible;
 
struct kvm  *kvm;
int vcpu_fd;
@@ -28,8 +29,9 @@ struct kvm_cpu {
 };
 
 struct kvm_arm_target {
-   u32 id;
-   int (*init)(struct kvm_cpu *vcpu);
+   u32 id;
+   const char  *compatible;
+   int (*init)(struct kvm_cpu *vcpu);
 };
 
 int kvm_cpu__register_kvm_arm_target(struct kvm_arm_target *target);
diff --git a/tools/kvm/arm/kvm-cpu.c b/tools/kvm/arm/kvm-cpu.c
index 2716690..6d4f306 100644
--- a/tools/kvm/arm/kvm-cpu.c
+++ b/tools/kvm/arm/kvm-cpu.c
@@ -30,6 +30,7 @@ int kvm_cpu__register_kvm_arm_target(struct kvm_arm_target 
*target)
 
 struct kvm_cpu *kvm_cpu__arch_init(struct kvm *kvm, unsigned long cpu_id)
 {
+   struct kvm_arm_target *target;
struct kvm_cpu *vcpu;
int coalesced_offset, mmap_size, err = -1;
unsigned int i;
@@ -58,13 +59,14 @@ struct kvm_cpu *kvm_cpu__arch_init(struct kvm *kvm,

[PATCH 4/5] kvm tools: arm64: add support for AEM and Foundation models

2013-04-11 Thread Will Deacon

From: Marc Zyngier marc.zyng...@arm.com

The ARMv8 architecture is supported by two publicly available
software models: the Architecture Enveloppe Model, and the
Foundation model.

Both are fairly similar to the Cortex-A57 from a kvm tools point of
view, so we can hijack the A57 implementation to register these
new targets.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
Signed-off-by: Will Deacon will.dea...@arm.com
---
 tools/kvm/arm/aarch64/cortex-a57.c | 20 +++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/tools/kvm/arm/aarch64/cortex-a57.c 
b/tools/kvm/arm/aarch64/cortex-a57.c
index 5b0dc4c..0c340fb 100644
--- a/tools/kvm/arm/aarch64/cortex-a57.c
+++ b/tools/kvm/arm/aarch64/cortex-a57.c
@@ -49,6 +49,22 @@ static int cortex_a57__vcpu_init(struct kvm_cpu *vcpu)
return 0;
 }
 
+/*
+ * As far as userspace is concerned, both of these implementations are
+ * extremely similar.
+ */
+static struct kvm_arm_target target_aem_v8 = {
+   .id = KVM_ARM_TARGET_AEM_V8,
+   .compatible = arm,arm-v8,
+   .init   = cortex_a57__vcpu_init,
+};
+
+static struct kvm_arm_target target_foundation_v8 = {
+   .id = KVM_ARM_TARGET_FOUNDATION_V8,
+   .compatible = arm,arm-v8,
+   .init   = cortex_a57__vcpu_init,
+};
+
 static struct kvm_arm_target target_cortex_a57 = {
.id = KVM_ARM_TARGET_CORTEX_A57,
.compatible = arm,cortex-a57,
@@ -57,6 +73,8 @@ static struct kvm_arm_target target_cortex_a57 = {
 
 static int cortex_a57__core_init(struct kvm *kvm)
 {
-   return kvm_cpu__register_kvm_arm_target(target_cortex_a57);
+   return (kvm_cpu__register_kvm_arm_target(target_aem_v8) ||
+   kvm_cpu__register_kvm_arm_target(target_foundation_v8) ||
+   kvm_cpu__register_kvm_arm_target(target_cortex_a57));
 }
 core_init(cortex_a57__core_init);
-- 
1.8.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/5] kvm tools: arm: don't crash when no compatible CPU is found

2013-04-11 Thread Will Deacon

From: Marc Zyngier marc.zyng...@arm.com

If the kernel against which kvm tools was compiled supports more
CPU types than kvm tools does, then we can hit a situation where
we dereference an empty target slot.

Just stepping over empty slots fixes the issue.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
Signed-off-by: Will Deacon will.dea...@arm.com
---
 tools/kvm/arm/kvm-cpu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/kvm/arm/kvm-cpu.c b/tools/kvm/arm/kvm-cpu.c
index 7a0eff45..2716690 100644
--- a/tools/kvm/arm/kvm-cpu.c
+++ b/tools/kvm/arm/kvm-cpu.c
@@ -56,6 +56,8 @@ struct kvm_cpu *kvm_cpu__arch_init(struct kvm *kvm, unsigned 
long cpu_id)
 
/* Find an appropriate target CPU type. */
for (i = 0; i  ARRAY_SIZE(kvm_arm_targets); ++i) {
+   if (!kvm_arm_targets[i])
+   continue;
vcpu_init.target = kvm_arm_targets[i]-id;
err = ioctl(vcpu-vcpu_fd, KVM_ARM_VCPU_INIT, vcpu_init);
if (!err)
-- 
1.8.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/5] Usual batch of random ARM fixes for kvmtool

2013-04-11 Thread Sasha Levin

On 04/11/2013 12:36 PM, Will Deacon wrote:
 Without multi-queue, we can boot Debian Wheezy to a prompt in 38MB. With
 the new changes, that increases to 170MB! Any chance we can try and tackle
 this regression please? I keep getting bitten by the OOM killer :(

That's definitely unwanted.

I'll look into it and try sending something out today/tomorrow.


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: kvmtool : [PATCH] PowerPc : Fix compilation for ppc64

2013-04-11 Thread Prerna Saxena

On 04/10/2013 09:05 PM, Sasha Levin wrote:
 Hm, what would LD create before this patch? I thought that the default
 would be to create a binary that corresponds to the platform you're
 building in, so if you build on ppc64 you'd get ppc64 binaries, no?
 

Hi Sasha,
Thanks for the prompt response.
Powerpc had historically supported 32 bit userspace on a 64 bit kernel,
before everything moved 64 bit.

I'd hit this issue since the default output of 'ld' was turning out to
be 'elf32-powerpc' on my ppc64 build machine. This was running ld-2.22.
I found that adding '--oformat=elf64-powerpc' to the Makefile helped me
tide over it, so I sent a patch to that end.
Today, I verified on another ppc64 machine that ld is automatically
choosing 'elf64-powerpc'. This machine is running 'ld-2.23'

So, this patch can be ignored, since it appears to be a toolchain
dependency. Or, we could put it in place, to ensure kvmtool builds dont
break even if the toolchain is not perfectly configured.
As you suggest :)

Regards,
Prerna

 
 
 On Wed, Apr 10, 2013 at 2:44 AM, Pekka Enberg penb...@kernel.org
 mailto:penb...@kernel.org wrote:
 
 On Tue, Apr 9, 2013 at 8:29 PM, Prerna Saxena
 pre...@linux.vnet.ibm.com mailto:pre...@linux.vnet.ibm.com wrote:
  'lkvm' compilation on ppc64 fails with the following error :
  ...[snip]..
LINK guest/init
LINK lkvm
  /usr/bin/ld: powerpc:common architecture of input file
 `guest/guest_init.o'
  is incompatible with powerpc:common64 output
  collect2: ld returned 1 exit status
  make: *** [lkvm] Error 1
 
  This patch corrects the error above, and enables 'lkvm' to compile
 on ppc64
  architecture.
 
  Signed-off-by: Prerna Saxena pre...@linux.vnet.ibm.com
 mailto:pre...@linux.vnet.ibm.com
  ---
   tools/kvm/Makefile |4 
   1 file changed, 4 insertions(+)
 
  diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
  index 0c59faa..269e29e 100644
  --- a/tools/kvm/Makefile
  +++ b/tools/kvm/Makefile
  @@ -335,7 +335,11 @@ $(PROGRAM_ALIAS): $(PROGRAM)
   $(GUEST_INIT): guest/init.c
  $(E)   LINK $@
  $(Q) $(CC) -static guest/init.c -o $@
  +ifeq ($(ARCH), powerpc)
  +   $(Q) $(LD) -r -b binary --oformat elf64-powerpc -o
 guest/guest_init.o $(GUEST_INIT)
  +else
  $(Q) $(LD) -r -b binary -o guest/guest_init.o $(GUEST_INIT)
  +endif
 
   $(DEPS):
 
  --
  1.7.10.4
 
 Michael, Sasha, comments?
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 mailto:majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 
 


-- 
Prerna Saxena

Linux Technology Centre,
IBM Systems and Technology Lab,
Bangalore, India

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] bookehv: Handle debug exception on guest exit

2013-04-11 Thread Kumar Gala


On Apr 5, 2013, at 2:53 AM, Bhushan Bharat-R65777 wrote:

 Hi Kumar/Benh,
 
 After further looking into the code I think that if we correct the vector 
 range below in DebugDebug handler then we do not need the change I provided 
 in this patch.
 
 Here is the snapshot for 32 bit (head_booke.h, same will be true for 64 bit):
 
 #define DEBUG_DEBUG_EXCEPTION 
 \
START_EXCEPTION(DebugDebug);  \
DEBUG_EXCEPTION_PROLOG;   \
  \
/*\
 * If there is a single step or branch-taken exception in an  \
 * exception entry sequence, it was probably meant to apply to\
 * the code where the exception occurred (since exception entry   \
 * doesn't turn off DE automatically).  We simulate the effect\
 * of turning off DE on entry to an exception handler by turning  \
 * off DE in the DSRR1 value and clearing the debug status.   \
 */   \
mfspr   r10,SPRN_DBSR;  /* check single-step/branch taken */  \
andis.  r10,r10,(DBSR_IC|DBSR_BT)@h;  \
beq+2f;   \
  \
lis r10,KERNELBASE@h;   /* check if exception in vectors */   \
ori r10,r10,KERNELBASE@l; \
cmplw   r12,r10;  \
blt+2f; /* addr below exception vectors */\
  \
lis r10,DebugDebug@h;\
ori r10,r10,DebugDebug@l;  
   \
 
 
   Here we assume all exception vector ends at DebugDebug, which is not 
 correct.
   We probably should get proper end by using some start_vector and 
 end_vector lebels
   or at least use end at Ehvpriv (which is last defined in 
 head_fsl_booke.S for PowerPC. Is that correct?
 
   
cmplw   r12,r10;  \
bgt+2f; /* addr above exception vectors */\
 
 Thanks
 -Bharat

I talked to Stuart and this general approach is good.  Just make sure to update 
both head_44x.S and head_fsl_booke.S.  Plus do this for both 
DEBUG_CRIT_EXCEPTION  DEBUG_DEBUG_EXCEPTION

- k--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] bookehv: Handle debug exception on guest exit

2013-04-11 Thread Stuart Yoder

On Thu, Apr 11, 2013 at 1:33 PM, Kumar Gala ga...@kernel.crashing.org wrote:

 On Apr 5, 2013, at 2:53 AM, Bhushan Bharat-R65777 wrote:

 Hi Kumar/Benh,

 After further looking into the code I think that if we correct the vector 
 range below in DebugDebug handler then we do not need the change I provided 
 in this patch.

 Here is the snapshot for 32 bit (head_booke.h, same will be true for 64 bit):

 #define DEBUG_DEBUG_EXCEPTION
  \
START_EXCEPTION(DebugDebug);  
 \
DEBUG_EXCEPTION_PROLOG;   
 \
  
 \
/*
 \
 * If there is a single step or branch-taken exception in an  
 \
 * exception entry sequence, it was probably meant to apply to
 \
 * the code where the exception occurred (since exception entry   
 \
 * doesn't turn off DE automatically).  We simulate the effect
 \
 * of turning off DE on entry to an exception handler by turning  
 \
 * off DE in the DSRR1 value and clearing the debug status.   
 \
 */   
 \
mfspr   r10,SPRN_DBSR;  /* check single-step/branch taken */  
 \
andis.  r10,r10,(DBSR_IC|DBSR_BT)@h;  
 \
beq+2f;   
 \
  
 \
lis r10,KERNELBASE@h;   /* check if exception in vectors */   
 \
ori r10,r10,KERNELBASE@l; 
 \
cmplw   r12,r10;  
 \
blt+2f; /* addr below exception vectors */
 \
  
 \
lis r10,DebugDebug@h;\
ori r10,r10,DebugDebug@l; 
\

 
   Here we assume all exception vector ends at DebugDebug, which is not 
 correct.
   We probably should get proper end by using some start_vector and 
 end_vector lebels
   or at least use end at Ehvpriv (which is last defined in 
 head_fsl_booke.S for PowerPC. Is that correct?


cmplw   r12,r10;  
 \
bgt+2f; /* addr above exception vectors */
 \

 Thanks
 -Bharat

 I talked to Stuart and this general approach is good.  Just make sure to 
 update both head_44x.S and head_fsl_booke.S.  Plus do this for both 
 DEBUG_CRIT_EXCEPTION  DEBUG_DEBUG_EXCEPTION

Also, it looks like 64-bit already handles this properly with symbols
identifying the
start/end of the vectors (exceptions-64e.S).

Stuart
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] bookehv: Handle debug exception on guest exit

2013-04-11 Thread Stuart Yoder

So the patch should look something like this (on a 3.8 kernel):

diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 5f051ee..92b675a 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -286,13 +286,13 @@ label:
andis.  r10,r10,(DBSR_IC|DBSR_BT)@h;  \
beq+2f;   \
  \
-   lis r10,KERNELBASE@h;   /* check if exception in vectors */   \
-   ori r10,r10,KERNELBASE@l; \
+   lis r10,interrupt_base@h;   /* check if exception in vectors */   \
+   ori r10,r10,interrupt_base@l;
cmplw   r12,r10;  \
blt+2f; /* addr below exception vectors */\
  \
-   lis r10,DebugDebug@h; \
-   ori r10,r10,DebugDebug@l; \
+   lis r10,interrupt_end@h;  \
+   ori r10,r10,interrupt_end@l;
cmplw   r12,r10;  \
bgt+2f; /* addr above exception vectors */\
  \
@@ -339,13 +339,13 @@ label:
andis.  r10,r10,(DBSR_IC|DBSR_BT)@h;  \
beq+2f;   \
  \
-   lis r10,KERNELBASE@h;   /* check if exception in vectors */   \
-   ori r10,r10,KERNELBASE@l; \
+   lis r10,interrupt_base@h;   /* check if exception in vectors */   \
+   ori r10,r10,interrupt_base@l;
cmplw   r12,r10;  \
blt+2f; /* addr below exception vectors */\
  \
-   lis r10,DebugCrit@h;  \
-   ori r10,r10,DebugCrit@l;  \
+   lis r10,interrupt_end@h;  \
+   ori r10,r10,interrupt_end@l;
cmplw   r12,r10;  \
bgt+2f; /* addr above exception vectors */\
  \


diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 7a2e5e4..97e2671 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -769,6 +769,8 @@ finish_tlb_load_47x:
 */
DEBUG_CRIT_EXCEPTION

+interrupt_end:
+
 /*
  * Global functions
  */


diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl
index 58925b6..2c3e31d 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -605,6 +605,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
/* Embedded Hypervisor Privilege */
EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_EE)

+interrupt_end:
+
 /*
  * Local functions
  */
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: kvmtool : [PATCH] PowerPc : Fix compilation for ppc64

2013-04-11 Thread Sasha Levin

On 04/11/2013 12:53 PM, Prerna Saxena wrote:
 On 04/10/2013 09:05 PM, Sasha Levin wrote:
 Hm, what would LD create before this patch? I thought that the default
 would be to create a binary that corresponds to the platform you're
 building in, so if you build on ppc64 you'd get ppc64 binaries, no?

 
 Hi Sasha,
 Thanks for the prompt response.
 Powerpc had historically supported 32 bit userspace on a 64 bit kernel,
 before everything moved 64 bit.
 
 I'd hit this issue since the default output of 'ld' was turning out to
 be 'elf32-powerpc' on my ppc64 build machine. This was running ld-2.22.
 I found that adding '--oformat=elf64-powerpc' to the Makefile helped me
 tide over it, so I sent a patch to that end.
 Today, I verified on another ppc64 machine that ld is automatically
 choosing 'elf64-powerpc'. This machine is running 'ld-2.23'
 
 So, this patch can be ignored, since it appears to be a toolchain
 dependency. Or, we could put it in place, to ensure kvmtool builds dont
 break even if the toolchain is not perfectly configured.
 As you suggest :)

What worries me with about this patch is that it will break build on 32bit
machines.

I don't know if those are even supported these days or not, but if they are -
we need something different to handle that.


Thanks,
Sasha

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: RFC: vfio API changes needed for powerpc (v3)

2013-04-11 Thread Scott Wood


On 04/11/2013 07:56:59 AM, Joerg Roedel wrote:

On Tue, Apr 09, 2013 at 01:22:15AM +, Yoder Stuart-B08248 wrote:
  What happens if a normal unmap call is done on the MSI iova?  Do  
we

  need a separate unmap?

 I was thinking a normal unmap on an MSI windows would be an  
error...but
 I'm not set on that.   I put the msi unmap there to make things  
symmetric,
 a normal unmap would work as well...and then we could drop the msi  
unmap.


Hmm, this API semantic isn't very clean. When you explicitly map the  
MSI

banks a clean API would also allow to unmap them. But that is not
possible in your design because the kernel is responsible for mapping
MSIs and you can't unmap a MSI bank that is in use by the kernel.


Why is it not possible to unmap them?  Once they've been mapped,  
they're just like any other IOMMU mapping.  If the user breaks MSI for  
their own devices by unmapping the MSI page, that's their problem.


So since the kernel owns the MSI setup anyways it should also take  
care

of mapping the MSI banks. What is the reason to not let the kernel
allocate the MSI banks top-down from the end of the DMA window space?


It's less flexible, and possibly more complicated.

-Scott
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

virtio-net mq vq initialization (was: [PATCH 0/5] Usual batch of random ARM fixes for kvmtool)

2013-04-11 Thread Sasha Levin

On 04/11/2013 12:36 PM, Will Deacon wrote:
 Hello folks,
 
 Here's the latest round of ARM fixes and updates for kvmtool. Most of
 this is confined to the arm/ subdirectory, with the exception of a fix
 to the virtio-mmio vq definitions due to the multi-queue work from
 Sasha. I'm not terribly happy about that code though, since it seriously
 increases the memory footprint of the guest.
 
 Without multi-queue, we can boot Debian Wheezy to a prompt in 38MB. With
 the new changes, that increases to 170MB! Any chance we can try and tackle
 this regression please? I keep getting bitten by the OOM killer :(

(cc Rusty, MST)

The spec defines the operation of a virtio-net device with regards to multiple
queues as follows:


Device Initialization

1. The initialization routine should identify the receive and 
transmission
virtqueues, up to N+1 of each kind. If VIRTIO_NET_F_MQ feature
bit is negotiated, N=max_virtqueue_pairs-1, otherwise identify N=0.

[...]

5. Only receiveq0, transmitq0 and controlq are used by default. To use 
more
queues driver must negotiate the VIRTIO_NET_F_MQ feature; initialize
up to max_virtqueue_pairs of each of transmit and receive queues; execute_
VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET command specifying
the number of the transmit and receive queues that is going to be
used and wait until the device consumes the controlq buffer and acks this
command.


And kvmtool follows that to the letter: It will initialize the maximum amount of
queues it can support during initialization and will start using them only when
the device tells it it should use them.

As Will has stated, this causes a memory issue since all the data structures 
that hold
all possible queues get initialized regardless of whether we actually need them 
or not,
which is quite troublesome for systems with small RAM.


Rusty, MST, would you be open to a spec and code change that would initialize 
the
RX/TX vqs on demand instead of on device initialization? Or is there an easier 
way
to work around this issue?


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] vhost_net: remove tx polling state

2013-04-11 Thread David Miller

From: Michael S. Tsirkin m...@redhat.com
Date: Thu, 11 Apr 2013 10:24:30 +0300

 On Thu, Apr 11, 2013 at 02:50:48PM +0800, Jason Wang wrote:
 After commit 2b8b328b61c799957a456a5a8dab8cc7dea68575 (vhost_net: handle 
 polling
 errors when setting backend), we in fact track the polling state through
 poll-wqh, so there's no need to duplicate the work with an extra
 vhost_net_polling_state. So this patch removes this and make the code 
 simpler.

 This patch also removes the all tx starting/stopping code in tx path 
 according
 to Michael's suggestion.

 Netperf test shows almost the same result in stream test, but gets 
 improvements
 on TCP_RR tests (both zerocopy or copy) especially on low load cases.

 Tested between multiqueue kvm guest and external host with two direct
 connected 82599s.
 ...
 Signed-off-by: Jason Wang jasow...@redhat.com

 Less code and better speed, what's not to like.
 Davem, could you pick this up for 3.10 please?

 Acked-by: Michael S. Tsirkin m...@redhat.com

Applied to net-next, thanks everyone.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/9] powerpc,kvm: fix imbalance srcu_read_[un]lock()

2013-04-11 Thread Paul E. McKenney

On Mon, Mar 18, 2013 at 08:26:48AM +1100, Paul Mackerras wrote:
 On Sat, Mar 16, 2013 at 12:50:49AM +0800, Lai Jiangshan wrote:
  At the point of up_out label in kvmppc_hv_setup_htab_rma(),
  srcu read lock is still held.
  
  We have to release it before return.
  
  Signed-off-by: Lai Jiangshan la...@cn.fujitsu.com
  Cc: Marcelo Tosatti mtosa...@redhat.com
  Cc: Gleb Natapov g...@redhat.com
  Cc: Alexander Graf ag...@suse.de
  Cc: Benjamin Herrenschmidt b...@kernel.crashing.org
  Cc: Paul Mackerras pau...@samba.org
  Cc: kvm@vger.kernel.org
  Cc: kvm-...@vger.kernel.org
  ---
   arch/powerpc/kvm/book3s_hv.c |2 +-
   1 files changed, 1 insertions(+), 1 deletions(-)
  
  diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
  index 80dcc53..c26740e 100644
  --- a/arch/powerpc/kvm/book3s_hv.c
  +++ b/arch/powerpc/kvm/book3s_hv.c
  @@ -1799,7 +1799,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu 
  *vcpu)
   
up_out:
  up_read(current-mm-mmap_sem);
  -   goto out;
  +   goto out_srcu;
 
 Acked-by: Paul Mackerras pau...@samba.org

Thank you both, queued for 3.11 (assuming no one has beat me to it).

Thanx, Paul

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v4 0/8] In-kernel XICS interrupt controller emulation

2013-04-11 Thread Paul Mackerras

I wrote:
 The series is based on Alex Graf's kvm-ppc-next branch with Scott
 Wood's recent patch series applied on top, together with the patch
 below to allow it to compile with CONFIG_KVM_MPIC=n.

And of course I forgot to include the patch.  Here it is.

Paul.

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 290a905..5306ca5 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -466,9 +466,13 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
kvmppc_remove_vcpu_debugfs(vcpu);
 
switch (vcpu-arch.irq_type) {
+#ifdef CONFIG_KVM_MPIC
case KVMPPC_IRQ_MPIC:
kvmppc_mpic_put(vcpu-arch.mpic);
break;
+#endif
+   default:
+   break;
}
 
kvmppc_core_vcpu_free(vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e325f5d..ca3adf9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2161,13 +2161,11 @@ out:
 static int kvm_ioctl_create_device(struct kvm *kvm,
   struct kvm_create_device *cd)
 {
-   bool test = cd-flags  KVM_CREATE_DEVICE_TEST;
-
switch (cd-type) {
 #ifdef CONFIG_KVM_MPIC
case KVM_DEV_TYPE_FSL_MPIC_20:
case KVM_DEV_TYPE_FSL_MPIC_42: {
-   if (test)
+   if (cd-flags  KVM_CREATE_DEVICE_TEST)
return 0;
 
return kvm_create_mpic(kvm, cd-type);
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 6/8] KVM: PPC: Book3S: Add support for ibm,int-on/off RTAS calls

2013-04-11 Thread Paul Mackerras

This adds support for the ibm,int-on and ibm,int-off RTAS calls to the
in-kernel XICS emulation and corrects the handling of the saved
priority by the ibm,set-xive RTAS call.  With this, ibm,int-off sets
the specified interrupt's priority in its saved_priority field and
sets the priority to 0xff (the least favoured value).  ibm,int-on
restores the saved_priority to the priority field, and ibm,set-xive
sets both the priority and the saved_priority to the specified
priority value.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_ppc.h |2 +
 arch/powerpc/kvm/book3s_rtas.c |   40 +
 arch/powerpc/kvm/book3s_xics.c |   86 +---
 arch/powerpc/kvm/book3s_xics.h |2 +-
 4 files changed, 114 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 1f7f5f6..e5a0614 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -173,6 +173,8 @@ extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
 extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
 extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 
priority);
 extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 
*priority);
+extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq);
+extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq);
 
 /*
  * Cuts out inst bits with ordering according to spec.
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
index 6a6c1fe..fc1a749 100644
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -64,6 +64,44 @@ out:
args-rets[0] = rc;
 }
 
+static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+   u32 irq;
+   int rc;
+
+   if (args-nargs != 1 || args-nret != 1) {
+   rc = -3;
+   goto out;
+   }
+
+   irq = args-args[0];
+
+   rc = kvmppc_xics_int_off(vcpu-kvm, irq);
+   if (rc)
+   rc = -3;
+out:
+   args-rets[0] = rc;
+}
+
+static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+   u32 irq;
+   int rc;
+
+   if (args-nargs != 1 || args-nret != 1) {
+   rc = -3;
+   goto out;
+   }
+
+   irq = args-args[0];
+
+   rc = kvmppc_xics_int_on(vcpu-kvm, irq);
+   if (rc)
+   rc = -3;
+out:
+   args-rets[0] = rc;
+}
+
 struct rtas_handler {
void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args);
char *name;
@@ -72,6 +110,8 @@ struct rtas_handler {
 static struct rtas_handler rtas_handlers[] = {
{ .name = ibm,set-xive, .handler = kvm_rtas_set_xive },
{ .name = ibm,get-xive, .handler = kvm_rtas_get_xive },
+   { .name = ibm,int-off,  .handler = kvm_rtas_int_off },
+   { .name = ibm,int-on,   .handler = kvm_rtas_int_on },
 };
 
 struct rtas_token_definition {
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 278eecc..d1ec4b0 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -120,6 +120,28 @@ static void ics_check_resend(struct kvmppc_xics *xics, 
struct kvmppc_ics *ics,
mutex_unlock(ics-lock);
 }
 
+static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
+  struct ics_irq_state *state,
+  u32 server, u32 priority, u32 saved_priority)
+{
+   bool deliver;
+
+   mutex_lock(ics-lock);
+
+   state-server = server;
+   state-priority = priority;
+   state-saved_priority = saved_priority;
+   deliver = false;
+   if ((state-masked_pending || state-resend)  priority != MASKED) {
+   state-masked_pending = 0;
+   deliver = true;
+   }
+
+   mutex_unlock(ics-lock);
+
+   return deliver;
+}
+
 int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority)
 {
struct kvmppc_xics *xics = kvm-arch.xics;
@@ -127,7 +149,6 @@ int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 
server, u32 priority)
struct kvmppc_ics *ics;
struct ics_irq_state *state;
u16 src;
-   bool deliver;
 
if (!xics)
return -ENODEV;
@@ -141,23 +162,11 @@ int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 
server, u32 priority)
if (!icp)
return -EINVAL;
 
-   mutex_lock(ics-lock);
-
XICS_DBG(set_xive %#x server %#x prio %#x MP:%d RS:%d\n,
 irq, server, priority,
 state-masked_pending, state-resend);
 
-   state-server = server;
-   state-priority = priority;
-   deliver = false;
-   if ((state-masked_pending || state-resend)  priority != MASKED) {
-   state-masked_pending = 0;
-   deliver = true;
-   }
-
-   mutex_unlock(ics-lock);
-
-   if (deliver)
+   if

[PATCH v4 0/8] In-kernel XICS interrupt controller emulation

2013-04-11 Thread Paul Mackerras

This is a repost of my patch series implementing in-kernel emulation
of the XICS interrupt controller architecture defined in PAPR (Power
Architecture Platform Requirements, the document that defines IBM's
pSeries platform architecture).  This version of the patch series uses
the device API as posted by Scott Wood.  I have structured the series
so that the API is added by the last two patches, so as to be able to
accommodate any future revisions to the device API with minimal
changes.

The series is based on Alex Graf's kvm-ppc-next branch with Scott
Wood's recent patch series applied on top, together with the patch
below to allow it to compile with CONFIG_KVM_MPIC=n.

The API defined here uses KVM_CREATE_DEVICE to create the XICS,
KVM_DEVICE_SET_ATTR/KVM_DEVICE_GET_ATTR to manipulate the interrupt
sources (for initialization and migration), a new KVM_CAP_IRQ_XICS
capability to connect vcpus to the XICS, a new identifier
KVM_REG_PPC_ICP_STATE for the one-reg interface to get and set
per-vcpu state, and the existing KVM_IRQ_LINE ioctl to assert and
deassert interrupt sources.

Paul.
--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 5/8] KVM: PPC: Book3S HV: Improve real-mode handling of external interrupts

2013-04-11 Thread Paul Mackerras

This streamlines our handling of external interrupts that come in
while we're in the guest.  First, when waking up a hardware thread
that was napping, we split off the napping due to H_CEDE case
earlier, and use the code that handles an external interrupt (0x500)
in the guest to handle that too.  Secondly, the code that handles
those external interrupts now checks if any other thread is exiting
to the host before bouncing an external interrupt to the guest, and
also checks that there is actually an external interrupt pending for
the guest before setting the LPCR MER bit (mediated external request).

This also makes sure that we clear the ceded flag when we handle a
wakeup from cede in real mode, and fixes a potential infinite loop
in kvmppc_run_vcpu() which can occur if we ever end up with the ceded
flag set but MSR[EE] off.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/reg.h  |1 +
 arch/powerpc/kvm/book3s_hv.c|5 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |  140 +--
 3 files changed, 81 insertions(+), 65 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index c9c67fc..7993224 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -290,6 +290,7 @@
 #define LPCR_PECE1 0x2000  /* decrementer can cause exit */
 #define LPCR_PECE2 0x1000  /* machine check etc can cause exit */
 #define   LPCR_MER 0x0800  /* Mediated External Exception */
+#define   LPCR_MER_SH  11
 #define   LPCR_LPES0x000c
 #define   LPCR_LPES0   0x0008  /* LPAR Env selector 0 */
 #define   LPCR_LPES1   0x0004  /* LPAR Env selector 1 */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index ceb3d81..c066b77 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1376,9 +1376,12 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, 
struct kvm_vcpu *vcpu)
break;
vc-runner = vcpu;
n_ceded = 0;
-   list_for_each_entry(v, vc-runnable_threads, arch.run_list)
+   list_for_each_entry(v, vc-runnable_threads, arch.run_list) {
if (!v-arch.pending_exceptions)
n_ceded += v-arch.ceded;
+   else
+   v-arch.ceded = 0;
+   }
if (n_ceded == vc-n_runnable)
kvmppc_vcore_blocked(vc);
else
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 4fa187f..3835963 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -97,50 +97,51 @@ kvm_start_guest:
li  r0,1
stb r0,PACA_NAPSTATELOST(r13)
 
-   /* get vcpu pointer, NULL if we have no vcpu to run */
-   ld  r4,HSTATE_KVM_VCPU(r13)
-   cmpdi   cr1,r4,0
+   /* were we napping due to cede? */
+   lbz r0,HSTATE_NAPPING(r13)
+   cmpwi   r0,0
+   bne kvm_end_cede
+
+   /*
+* We weren't napping due to cede, so this must be a secondary
+* thread being woken up to run a guest, or being woken up due
+* to a stray IPI.  (Or due to some machine check or hypervisor
+* maintenance interrupt while the core is in KVM.)
+*/
 
/* Check the wake reason in SRR1 to see why we got here */
mfspr   r3,SPRN_SRR1
rlwinm  r3,r3,44-31,0x7 /* extract wake reason field */
cmpwi   r3,4/* was it an external interrupt? */
-   bne 27f
-
-   /*
-* External interrupt - for now assume it is an IPI, since we
-* should never get any other interrupts sent to offline threads.
-* Only do this for secondary threads.
-*/
-   beq cr1,25f
-   lwz r3,VCPU_PTID(r4)
-   cmpwi   r3,0
-   beq 27f
-25:ld  r5,HSTATE_XICS_PHYS(r13)
-   li  r0,0xff
-   li  r6,XICS_MFRR
-   li  r7,XICS_XIRR
+   bne 27f /* if not */
+   ld  r5,HSTATE_XICS_PHYS(r13)
+   li  r7,XICS_XIRR/* if it was an external interrupt, */
lwzcix  r8,r5,r7/* get and ack the interrupt */
sync
clrldi. r9,r8,40/* get interrupt source ID. */
-   beq 27f /* none there? */
-   cmpwi   r9,XICS_IPI
-   bne 26f
+   beq 28f /* none there? */
+   cmpwi   r9,XICS_IPI /* was it an IPI? */
+   bne 29f
+   li  r0,0xff
+   li  r6,XICS_MFRR
stbcix  r0,r5,r6/* clear IPI */
-26:stwcix  r8,r5,r7/* EOI the interrupt */
-
-27:/* XXX should handle hypervisor maintenance interrupts etc. here */
+

[PATCH 8/8] KVM: PPC: Book 3S: Add API for in-kernel XICS emulation

2013-04-11 Thread Paul Mackerras

This adds the API for userspace to instantiate an XICS device in a VM
and connect VCPUs to it.  The API consists of a new device type for
the KVM_CREATE_DEVICE ioctl, a new capability KVM_CAP_IRQ_XICS, which
functions similarly to KVM_CAP_IRQ_MPIC, and the KVM_IRQ_LINE ioctl,
which is used to assert and deassert interrupt inputs of the XICS.

The XICS device has one attribute group, KVM_DEV_XICS_GRP_SOURCES.
Each attribute within this group corresponds to the state of one
interrupt source.  The attribute number is the same as the interrupt
source number.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 Documentation/virtual/kvm/api.txt  |8 ++
 Documentation/virtual/kvm/devices/xics.txt |   66 +
 arch/powerpc/kvm/book3s_xics.c |  206 +++-
 arch/powerpc/kvm/powerpc.c |   31 +
 include/linux/kvm_host.h   |1 +
 include/uapi/linux/kvm.h   |   14 ++
 virt/kvm/kvm_main.c|   14 ++
 7 files changed, 335 insertions(+), 5 deletions(-)
 create mode 100644 Documentation/virtual/kvm/devices/xics.txt

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 54bb6ad..db230f8 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2756,3 +2756,11 @@ Parameters: args[0] is the MPIC device fd
 args[1] is the MPIC CPU number for this vcpu
 
 This capability connects the vcpu to an in-kernel MPIC device.
+
+6.7 KVM_CAP_IRQ_XICS
+
+Architectures: ppc
+Parameters: args[0] is the XICS device fd
+args[1] is the XICS CPU number (server ID) for this vcpu
+
+This capability connects the vcpu to an in-kernel XICS device.
diff --git a/Documentation/virtual/kvm/devices/xics.txt 
b/Documentation/virtual/kvm/devices/xics.txt
new file mode 100644
index 000..4286493
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/xics.txt
@@ -0,0 +1,66 @@
+XICS interrupt controller
+
+Device type supported: KVM_DEV_TYPE_XICS
+
+Groups:
+  KVM_DEV_XICS_SOURCES
+  Attributes: One per interrupt source, indexed by the source number.
+
+This device emulates the XICS (eXternal Interrupt Controller
+Specification) defined in PAPR.  The XICS has a set of interrupt
+sources, each identified by a 20-bit source number, and a set of
+Interrupt Control Presentation (ICP) entities, also called servers,
+each associated with a virtual CPU.
+
+The ICP entities are created by enabling the KVM_CAP_IRQ_ARCH
+capability for each vcpu, specifying KVM_CAP_IRQ_XICS in args[0] and
+the interrupt server number (i.e. the vcpu number from the XICS's
+point of view) in args[1] of the kvm_enable_cap struct.  Each ICP has
+64 bits of state which can be read and written using the
+KVM_GET_ONE_REG and KVM_SET_ONE_REG ioctls on the vcpu.  The 64 bit
+state word has the following bitfields, starting at the
+least-significant end of the word:
+
+* Unused, 16 bits
+
+* Pending interrupt priority, 8 bits
+  Zero is the highest priority, 255 means no interrupt is pending.
+
+* Pending IPI (inter-processor interrupt) priority, 8 bits
+  Zero is the highest priority, 255 means no IPI is pending.
+
+* Pending interrupt source number, 24 bits
+  Zero means no interrupt pending, 2 means an IPI is pending
+
+* Current processor priority, 8 bits
+  Zero is the highest priority, meaning no interrupts can be
+  delivered, and 255 is the lowest priority.
+
+Each source has 64 bits of state that can be read and written using
+the KVM_GET_DEVICE_ATTR and KVM_SET_DEVICE_ATTR ioctls, specifying the
+KVM_DEV_XICS_SOURCES attribute group, with the attribute number being
+the interrupt source number.  The 64 bit state word has the following
+bitfields, starting from the least-significant end of the word:
+
+* Destination (server number), 32 bits
+  This specifies where the interrupt should be sent, and is the
+  interrupt server number specified for the destination vcpu.
+
+* Priority, 8 bits
+  This is the priority specified for this interrupt source, where 0 is
+  the highest priority and 255 is the lowest.  An interrupt with a
+  priority of 255 will never be delivered.
+
+* Level sensitive flag, 1 bit
+  This bit is 1 for a level-sensitive interrupt source, or 0 for
+  edge-sensitive (or MSI).
+
+* Masked flag, 1 bit
+  This bit is set to 1 if the interrupt is masked (cannot be delivered
+  regardless of its priority), for example by the ibm,int-off RTAS
+  call, or 0 if it is not masked.
+
+* Pending flag, 1 bit
+  This bit is 1 if the source has a pending interrupt, otherwise 0.
+
+Only one XICS instance may be created per VM.
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 4eb4f4b..eb58abf 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -11,6 +11,7 @@
 #include linux/kvm_host.h
 #include linux/err.h
 #include linux/gfp.h
+#include linux/anon_inodes.h
 
 #include asm/uaccess.h
 #include

[PATCH 4/8] KVM: PPC: Book3S HV: Add support for real mode ICP in XICS emulation

2013-04-11 Thread Paul Mackerras

From: Benjamin Herrenschmidt b...@kernel.crashing.org

This adds an implementation of the XICS hypercalls in real mode for HV
KVM, which allows us to avoid exiting the guest MMU context on all
threads for a variety of operations such as fetching a pending
interrupt, EOI of messages, IPIs, etc.

Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org
Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/kvm/Makefile   |1 +
 arch/powerpc/kvm/book3s_hv_rm_xics.c|  402 +++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |   10 +-
 arch/powerpc/kvm/book3s_xics.c  |   64 -
 arch/powerpc/kvm/book3s_xics.h  |   16 ++
 5 files changed, 475 insertions(+), 18 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s_hv_rm_xics.c

diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index cccd85f..24a2896 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -77,6 +77,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
book3s_hv_rm_mmu.o \
book3s_64_vio_hv.o \
book3s_hv_ras.o \
+   book3s_hv_rm_xics.o \
book3s_hv_builtin.o
 
 kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c 
b/arch/powerpc/kvm/book3s_hv_rm_xics.c
new file mode 100644
index 000..4cb7df8
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -0,0 +1,402 @@
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include linux/kernel.h
+#include linux/kvm_host.h
+#include linux/err.h
+
+#include asm/kvm_book3s.h
+#include asm/kvm_ppc.h
+#include asm/hvcall.h
+#include asm/xics.h
+#include asm/debug.h
+#include asm/synch.h
+#include asm/ppc-opcode.h
+
+#include book3s_xics.h
+
+#define DEBUG_PASSUP
+
+static inline void rm_writeb(unsigned long paddr, u8 val)
+{
+   __asm__ __volatile__(sync; stbcix %0,0,%1
+   : : r (val), r (paddr) : memory);
+}
+
+static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, struct kvm_vcpu 
*this_vcpu)
+{
+   struct kvmppc_icp *this_icp = this_vcpu-arch.icp;
+   unsigned long xics_phys;
+   int cpu;
+
+   /* Mark the target VCPU as having an interrupt pending */
+   vcpu-stat.queue_intr++;
+   set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, vcpu-arch.pending_exceptions);
+
+   /* Kick self ? Just set MER and return */
+   if (vcpu == this_vcpu) {
+   mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_MER);
+   return;
+   }
+
+   /* Check if the core is loaded, if not, too hard */
+   cpu = vcpu-cpu;
+   if (cpu  0 || cpu = nr_cpu_ids) {
+   this_icp-rm_action |= XICS_RM_KICK_VCPU;
+   this_icp-rm_kick_target = vcpu;
+   return;
+   }
+   /* In SMT cpu will always point to thread 0, we adjust it */
+   cpu += vcpu-arch.ptid;
+
+   /* Not too hard, then poke the target */
+   xics_phys = paca[cpu].kvm_hstate.xics_phys;
+   rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
+}
+
+static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
+{
+   /* Note: Only called on self ! */
+   clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 
vcpu-arch.pending_exceptions);
+   mtspr(SPRN_LPCR, mfspr(SPRN_LPCR)  ~LPCR_MER);
+}
+
+static inline bool icp_rm_try_update(struct kvmppc_icp *icp,
+union kvmppc_icp_state old,
+union kvmppc_icp_state new)
+{
+   struct kvm_vcpu *this_vcpu = local_paca-kvm_hstate.kvm_vcpu;
+   bool success;
+
+   /* Calculate new output value */
+   new.out_ee = (new.xisr  (new.pending_pri  new.cppr));
+
+   /* Attempt atomic update */
+   success = cmpxchg64(icp-state.raw, old.raw, new.raw) == old.raw;
+   if (!success)
+   goto bail;
+
+   /*
+* Check for output state update
+*
+* Note that this is racy since another processor could be updating
+* the state already. This is why we never clear the interrupt output
+* here, we only ever set it. The clear only happens prior to doing
+* an update and only by the processor itself. Currently we do it
+* in Accept (H_XIRR) and Up_Cppr (H_XPPR).
+*
+* We also do not try to figure out whether the EE state has changed,
+* we unconditionally set it if the new state calls for it. The reason
+* for that is that we opportunistically remove the pending interrupt
+* flag when raising CPPR, so we need to set it back here if an
+* interrupt is still pending.
+*/
+   if (new.out_ee)
+   icp_rm_set_vcpu_irq(icp-vcpu, this_vcpu);
+
+   /* Expose the

[PATCH 1/8] KVM: PPC: Book3S: Add infrastructure to implement kernel-side RTAS calls

2013-04-11 Thread Paul Mackerras

From: Michael Ellerman mich...@ellerman.id.au

For pseries machine emulation, in order to move the interrupt
controller code to the kernel, we need to intercept some RTAS
calls in the kernel itself.  This adds an infrastructure to allow
in-kernel handlers to be registered for RTAS services by name.
A new ioctl, KVM_PPC_RTAS_DEFINE_TOKEN, then allows userspace to
associate token values with those service names.  Then, when the
guest requests an RTAS service with one of those token values, it
will be handled by the relevant in-kernel handler rather than being
passed up to userspace as at present.

Signed-off-by: Michael Ellerman mich...@ellerman.id.au
Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org
Signed-off-by: Paul Mackerras pau...@samba.org
---
 Documentation/virtual/kvm/api.txt   |   19 
 arch/powerpc/include/asm/hvcall.h   |3 +
 arch/powerpc/include/asm/kvm_host.h |1 +
 arch/powerpc/include/asm/kvm_ppc.h  |4 +
 arch/powerpc/include/uapi/asm/kvm.h |6 ++
 arch/powerpc/kvm/Makefile   |1 +
 arch/powerpc/kvm/book3s_hv.c|   18 +++-
 arch/powerpc/kvm/book3s_pr.c|1 +
 arch/powerpc/kvm/book3s_pr_papr.c   |7 ++
 arch/powerpc/kvm/book3s_rtas.c  |  182 +++
 arch/powerpc/kvm/powerpc.c  |8 ++
 include/uapi/linux/kvm.h|3 +
 12 files changed, 252 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/kvm/book3s_rtas.c

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 4c326ae..4247d65 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2325,6 +2325,25 @@ and distributor interface, the ioctl must be called 
after calling
 KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs.  Calling
 this ioctl twice for any of the base addresses will return -EEXIST.
 
+4.82 KVM_PPC_RTAS_DEFINE_TOKEN
+
+Capability: KVM_CAP_PPC_RTAS
+Architectures: ppc
+Type: vm ioctl
+Parameters: struct kvm_rtas_token_args
+Returns: 0 on success, -1 on error
+
+Defines a token value for a RTAS (Run Time Abstraction Services)
+service in order to allow it to be handled in the kernel.  The
+argument struct gives the name of the service, which must be the name
+of a service that has a kernel-side implementation.  If the token
+value is non-zero, it will be associated with that service, and
+subsequent RTAS calls by the guest specifying that token will be
+handled by the kernel.  If the token value is 0, then any token
+associated with the service will be forgotten, and subsequent RTAS
+calls by the guest for that service will be passed to userspace to be
+handled.
+
 
 5. The kvm_run structure
 
diff --git a/arch/powerpc/include/asm/hvcall.h 
b/arch/powerpc/include/asm/hvcall.h
index 4bc2c3d..cf4df8e 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -270,6 +270,9 @@
 #define H_SET_MODE 0x31C
 #define MAX_HCALL_OPCODE   H_SET_MODE
 
+/* Platform specific hcalls, used by KVM */
+#define H_RTAS 0xf000
+
 #ifndef __ASSEMBLY__
 
 /**
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 2a2e235..8fe8ef5 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -255,6 +255,7 @@ struct kvm_arch {
 #endif /* CONFIG_KVM_BOOK3S_64_HV */
 #ifdef CONFIG_PPC_BOOK3S_64
struct list_head spapr_tce_tables;
+   struct list_head rtas_tokens;
 #endif
 };
 
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index f54707f..f4e66c4 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -166,6 +166,10 @@ extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, 
struct kvm_get_htab_fd *);
 
 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
 
+extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp);
+extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
+extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
+
 /*
  * Cuts out inst bits with ordering according to spec.
  * That means the leftmost bit is zero. All given bits are included.
diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index ef072b1..a599ea5 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -299,6 +299,12 @@ struct kvm_allocate_rma {
__u64 rma_size;
 };
 
+/* for KVM_CAP_PPC_RTAS */
+struct kvm_rtas_token_args {
+   char name[120];
+   __u64 token;/* Use a token of 0 to undefine a mapping */
+};
+
 struct kvm_book3e_206_tlb_entry {
__u32 mas8;
__u32 mas1;
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 4a2277a..d2c8a88 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -86,6 +86,7 @@ kvm-book3s_64-module-objs := \

[PATCH 4/8 v3] KVM: PPC: e500: Add support for TLBnPS registers

2013-04-11 Thread Mihai Caraman

Add support for TLBnPS registers available in MMU Architecture Version
(MAV) 2.0.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v3:
 - Add vcpu_ftr enum
 
v2:
 - Add vcpu generic function has_feature()

 Documentation/virtual/kvm/api.txt   |4 
 arch/powerpc/include/asm/kvm_host.h |1 +
 arch/powerpc/include/uapi/asm/kvm.h |4 
 arch/powerpc/kvm/e500.h |   18 ++
 arch/powerpc/kvm/e500_emulate.c |   10 ++
 arch/powerpc/kvm/e500_mmu.c |   22 ++
 6 files changed, 59 insertions(+), 0 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 1a76663..f045377 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1803,6 +1803,10 @@ registers, find a list below:
   PPC   | KVM_REG_PPC_TLB1CFG  | 32
   PPC   | KVM_REG_PPC_TLB2CFG  | 32
   PPC   | KVM_REG_PPC_TLB3CFG  | 32
+  PPC   | KVM_REG_PPC_TLB0PS   | 32
+  PPC   | KVM_REG_PPC_TLB1PS   | 32
+  PPC   | KVM_REG_PPC_TLB2PS   | 32
+  PPC   | KVM_REG_PPC_TLB3PS   | 32
 
 ARM registers are mapped using the lower 32 bits.  The upper 16 of that
 is the register group type, or coprocessor number:
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index e34f8fe..3b6cee3 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -502,6 +502,7 @@ struct kvm_vcpu_arch {
spinlock_t wdt_lock;
struct timer_list wdt_timer;
u32 tlbcfg[4];
+   u32 tlbps[4];
u32 mmucfg;
u32 epr;
u32 crit_save;
diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index 93d063f..91341d9 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -442,5 +442,9 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_TLB1CFG(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x94)
 #define KVM_REG_PPC_TLB2CFG(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x95)
 #define KVM_REG_PPC_TLB3CFG(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x96)
+#define KVM_REG_PPC_TLB0PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x97)
+#define KVM_REG_PPC_TLB1PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x98)
+#define KVM_REG_PPC_TLB2PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x99)
+#define KVM_REG_PPC_TLB3PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a)
 
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index b73ca7a..c2e5e98 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -23,6 +23,10 @@
 #include asm/mmu-book3e.h
 #include asm/tlb.h
 
+enum vcpu_ftr {
+   VCPU_FTR_MMU_V2
+};
+
 #define E500_PID_NUM   3
 #define E500_TLB_NUM   2
 
@@ -299,4 +303,18 @@ static inline unsigned int get_tlbmiss_tid(struct kvm_vcpu 
*vcpu)
 #define get_tlb_sts(gtlbe)  (MAS1_TS)
 #endif /* !BOOKE_HV */
 
+static inline bool has_feature(const struct kvm_vcpu *vcpu,
+  enum vcpu_ftr ftr)
+{
+   bool has_ftr;
+   switch (ftr) {
+   case VCPU_FTR_MMU_V2:
+   has_ftr = ((vcpu-arch.mmucfg  MMUCFG_MAVN) == MMUCFG_MAVN_V2);
+   break;
+   default:
+   return false;
+   }
+   return has_ftr;
+}
+
 #endif /* KVM_E500_H */
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index e78f353..12b8de2 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -284,6 +284,16 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int 
sprn, ulong *spr_val)
case SPRN_TLB1CFG:
*spr_val = vcpu-arch.tlbcfg[1];
break;
+   case SPRN_TLB0PS:
+   if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
+   return EMULATE_FAIL;
+   *spr_val = vcpu-arch.tlbps[0];
+   break;
+   case SPRN_TLB1PS:
+   if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
+   return EMULATE_FAIL;
+   *spr_val = vcpu-arch.tlbps[1];
+   break;
case SPRN_L1CSR0:
*spr_val = vcpu_e500-l1csr0;
break;
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index 08a5b0d..a863dc1 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -631,6 +631,13 @@ int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 
id,
i = id - KVM_REG_PPC_TLB0CFG;
*val = get_reg_val(id, vcpu-arch.tlbcfg[i]);
break;
+   case KVM_REG_PPC_TLB0PS:
+   case KVM_REG_PPC_TLB1PS:
+   case KVM_REG_PPC_TLB2PS:
+   case KVM_REG_PPC_TLB3PS:
+   i = id - KVM_REG_PPC_TLB0PS;
+   *val = get_reg_val(id, vcpu-arch.tlbps[i]);
+   break;
default:
r = -EINVAL;
break;
@@ -682,6 +689,16 @@ int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64

[PATCH 2/8 v3] KVM: PPC: e500: Expose MMU registers via ONE_REG

2013-04-11 Thread Mihai Caraman

MMU registers were exposed to user-space using sregs interface. Add them
to ONE_REG interface using kvmppc_get_one_reg/kvmppc_set_one_reg delegation
mechanism.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v3:
 - Fix case breaks
 
v2:
 - Restrict set_one_reg operation for MMU registers to HW values

 Documentation/virtual/kvm/api.txt   |   11 
 arch/powerpc/include/uapi/asm/kvm.h |   17 ++
 arch/powerpc/kvm/e500.c |6 ++-
 arch/powerpc/kvm/e500.h |4 ++
 arch/powerpc/kvm/e500_mmu.c |   94 +++
 arch/powerpc/kvm/e500mc.c   |6 ++-
 6 files changed, 134 insertions(+), 4 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 976eb65..1a76663 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1792,6 +1792,17 @@ registers, find a list below:
   PPC   | KVM_REG_PPC_TSR  | 32
   PPC   | KVM_REG_PPC_OR_TSR   | 32
   PPC   | KVM_REG_PPC_CLEAR_TSR| 32
+  PPC   | KVM_REG_PPC_MAS0 | 32
+  PPC   | KVM_REG_PPC_MAS1 | 32
+  PPC   | KVM_REG_PPC_MAS2 | 64
+  PPC   | KVM_REG_PPC_MAS7_3   | 64
+  PPC   | KVM_REG_PPC_MAS4 | 32
+  PPC   | KVM_REG_PPC_MAS6 | 32
+  PPC   | KVM_REG_PPC_MMUCFG   | 32
+  PPC   | KVM_REG_PPC_TLB0CFG  | 32
+  PPC   | KVM_REG_PPC_TLB1CFG  | 32
+  PPC   | KVM_REG_PPC_TLB2CFG  | 32
+  PPC   | KVM_REG_PPC_TLB3CFG  | 32
 
 ARM registers are mapped using the lower 32 bits.  The upper 16 of that
 is the register group type, or coprocessor number:
diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index c2ff99c..93d063f 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -426,4 +426,21 @@ struct kvm_get_htab_header {
 /* Debugging: Special instruction for software breakpoint */
 #define KVM_REG_PPC_DEBUG_INST (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8b)
 
+/* MMU registers */
+#define KVM_REG_PPC_MAS0   (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8c)
+#define KVM_REG_PPC_MAS1   (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8d)
+#define KVM_REG_PPC_MAS2   (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8e)
+#define KVM_REG_PPC_MAS7_3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8f)
+#define KVM_REG_PPC_MAS4   (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x90)
+#define KVM_REG_PPC_MAS6   (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x91)
+#define KVM_REG_PPC_MMUCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x92)
+/*
+ * TLBnCFG fields TLBnCFG_N_ENTRY and TLBnCFG_ASSOC can be changed only using
+ * KVM_CAP_SW_TLB ioctl
+ */
+#define KVM_REG_PPC_TLB0CFG(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x93)
+#define KVM_REG_PPC_TLB1CFG(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x94)
+#define KVM_REG_PPC_TLB2CFG(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x95)
+#define KVM_REG_PPC_TLB3CFG(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x96)
+
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 576010f..ce6b73c 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -428,13 +428,15 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct 
kvm_sregs *sregs)
 int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
union kvmppc_one_reg *val)
 {
-   return -EINVAL;
+   int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
+   return r;
 }
 
 int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
   union kvmppc_one_reg *val)
 {
-   return -EINVAL;
+   int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
+   return r;
 }
 
 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index 33db48a..b73ca7a 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -131,6 +131,10 @@ void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 
*vcpu_e500);
 void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 
+int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
+   union kvmppc_one_reg *val);
+int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
+  union kvmppc_one_reg *val);
 
 #ifdef CONFIG_KVM_E500V2
 unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index 5c44759..44f7762 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -596,6 +596,100 @@ int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, 
struct kvm_sregs *sregs)
return 0;
 }
 
+int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
+   union kvmppc_one_reg *val)
+{
+   int r = 0;
+   long int i;
+
+   switch (id) {
+   case KVM_REG_PPC_MAS0:
+   *val =

[PATCH 6/8 v3] KVM: PPC: e500: Remove E.PT and E.HV.LRAT categories from VCPUs

2013-04-11 Thread Mihai Caraman

Embedded.Page Table (E.PT) category is not supported yet in e6500 kernel.
Configure TLBnCFG to remove E.PT and E.HV.LRAT categories from VCPUs.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v3:
 - No change

 arch/powerpc/kvm/e500_mmu.c |4 
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index 1c1c5cb..c41a5a9 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -885,8 +885,12 @@ static int vcpu_mmu_init(struct kvm_vcpu *vcpu,
vcpu-arch.tlbps[0] = mfspr(SPRN_TLB0PS);
vcpu-arch.tlbps[1] = mfspr(SPRN_TLB1PS);
 
+   vcpu-arch.mmucfg = ~MMUCFG_LRAT;
+
/* Guest mmu emulation currently doesn't handle E.PT */
vcpu-arch.eptcfg = 0;
+   vcpu-arch.tlbcfg[0] = ~TLBnCFG_PT;
+   vcpu-arch.tlbcfg[1] = ~TLBnCFG_IND;
}
 
return 0;
-- 
1.7.4.1


--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 3/8 v3] KVM: PPC: e500: Move vcpu's MMU configuration to dedicated functions

2013-04-11 Thread Mihai Caraman

Vcpu's MMU default configuration and geometry update logic was buried in
a chunk of code. Move them to dedicated functions to add more clarity.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v3:
 - No change

v2:
 - Add better patch description

 arch/powerpc/kvm/e500_mmu.c |   60 +++---
 1 files changed, 38 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index 44f7762..08a5b0d 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -690,6 +690,20 @@ int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 
id,
return r;
 }
 
+static int vcpu_mmu_geometry_update(struct kvm_vcpu *vcpu,
+   struct kvm_book3e_206_tlb_params *params)
+{
+   vcpu-arch.tlbcfg[0] = ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+   if (params-tlb_sizes[0] = 2048)
+   vcpu-arch.tlbcfg[0] |= params-tlb_sizes[0];
+   vcpu-arch.tlbcfg[0] |= params-tlb_ways[0]  TLBnCFG_ASSOC_SHIFT;
+
+   vcpu-arch.tlbcfg[1] = ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+   vcpu-arch.tlbcfg[1] |= params-tlb_sizes[1];
+   vcpu-arch.tlbcfg[1] |= params-tlb_ways[1]  TLBnCFG_ASSOC_SHIFT;
+   return 0;
+}
+
 int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
  struct kvm_config_tlb *cfg)
 {
@@ -786,16 +800,8 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
vcpu_e500-gtlb_offset[0] = 0;
vcpu_e500-gtlb_offset[1] = params.tlb_sizes[0];
 
-   vcpu-arch.mmucfg = mfspr(SPRN_MMUCFG)  ~MMUCFG_LPIDSIZE;
-
-   vcpu-arch.tlbcfg[0] = ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
-   if (params.tlb_sizes[0] = 2048)
-   vcpu-arch.tlbcfg[0] |= params.tlb_sizes[0];
-   vcpu-arch.tlbcfg[0] |= params.tlb_ways[0]  TLBnCFG_ASSOC_SHIFT;
-
-   vcpu-arch.tlbcfg[1] = ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
-   vcpu-arch.tlbcfg[1] |= params.tlb_sizes[1];
-   vcpu-arch.tlbcfg[1] |= params.tlb_ways[1]  TLBnCFG_ASSOC_SHIFT;
+   /* Update vcpu's MMU geometry based on SW_TLB input */
+   vcpu_mmu_geometry_update(vcpu, params);
 
vcpu_e500-shared_tlb_pages = pages;
vcpu_e500-num_shared_tlb_pages = num_pages;
@@ -831,6 +837,27 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
return 0;
 }
 
+/* Vcpu's MMU default configuration */
+static int vcpu_mmu_init(struct kvm_vcpu *vcpu,
+  struct kvmppc_e500_tlb_params *params)
+{
+   /* Initialize RASIZE, PIDSIZE, NTLBS and MAVN fields with host values*/
+   vcpu-arch.mmucfg = mfspr(SPRN_MMUCFG)  ~MMUCFG_LPIDSIZE;
+
+   /* Initialize TLBnCFG fields with host values and SW_TLB geometry*/
+   vcpu-arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) 
+~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+   vcpu-arch.tlbcfg[0] |= params[0].entries;
+   vcpu-arch.tlbcfg[0] |= params[0].ways  TLBnCFG_ASSOC_SHIFT;
+
+   vcpu-arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) 
+~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+   vcpu-arch.tlbcfg[1] |= params[1].entries;
+   vcpu-arch.tlbcfg[1] |= params[1].ways  TLBnCFG_ASSOC_SHIFT;
+
+   return 0;
+}
+
 int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
struct kvm_vcpu *vcpu = vcpu_e500-vcpu;
@@ -875,18 +902,7 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 
*vcpu_e500)
if (!vcpu_e500-g2h_tlb1_map)
goto err;
 
-   /* Init TLB configuration register */
-   vcpu-arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) 
-~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
-   vcpu-arch.tlbcfg[0] |= vcpu_e500-gtlb_params[0].entries;
-   vcpu-arch.tlbcfg[0] |=
-   vcpu_e500-gtlb_params[0].ways  TLBnCFG_ASSOC_SHIFT;
-
-   vcpu-arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) 
-~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
-   vcpu-arch.tlbcfg[1] |= vcpu_e500-gtlb_params[1].entries;
-   vcpu-arch.tlbcfg[1] |=
-   vcpu_e500-gtlb_params[1].ways  TLBnCFG_ASSOC_SHIFT;
+   vcpu_mmu_init(vcpu, vcpu_e500-gtlb_params);
 
kvmppc_recalc_tlb1map_range(vcpu_e500);
return 0;
-- 
1.7.4.1


--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 5/8] KVM: PPC: e500: Add support for EPTCFG register

2013-04-11 Thread Mihai Caraman

EPTCFG register defined by E.PT is accessed unconditionally by Linux guests
in the presence of MAV 2.0. Emulate it now.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v3:
 - Initialize EPTCFG to 0 since E.PT is not supported now

 Documentation/virtual/kvm/api.txt   |1 +
 arch/powerpc/include/asm/kvm_host.h |1 +
 arch/powerpc/include/uapi/asm/kvm.h |1 +
 arch/powerpc/kvm/e500_emulate.c |9 +
 arch/powerpc/kvm/e500_mmu.c |   12 
 5 files changed, 24 insertions(+), 0 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index f045377..a1f2200 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1807,6 +1807,7 @@ registers, find a list below:
   PPC   | KVM_REG_PPC_TLB1PS   | 32
   PPC   | KVM_REG_PPC_TLB2PS   | 32
   PPC   | KVM_REG_PPC_TLB3PS   | 32
+  PPC   | KVM_REG_PPC_EPTCFG   | 32
 
 ARM registers are mapped using the lower 32 bits.  The upper 16 of that
 is the register group type, or coprocessor number:
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 3b6cee3..8a48e68 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -504,6 +504,7 @@ struct kvm_vcpu_arch {
u32 tlbcfg[4];
u32 tlbps[4];
u32 mmucfg;
+   u32 eptcfg;
u32 epr;
u32 crit_save;
struct kvmppc_booke_debug_reg dbg_reg;
diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index 91341d9..7f4d191 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -446,5 +446,6 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_TLB1PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x98)
 #define KVM_REG_PPC_TLB2PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x99)
 #define KVM_REG_PPC_TLB3PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a)
+#define KVM_REG_PPC_EPTCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b)
 
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index 12b8de2..b10a012 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -317,6 +317,15 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int 
sprn, ulong *spr_val)
case SPRN_MMUCFG:
*spr_val = vcpu-arch.mmucfg;
break;
+   case SPRN_EPTCFG:
+   if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
+   return EMULATE_FAIL;
+   /*
+* Legacy Linux guests access EPTCFG register even if the E.PT
+* category is disabled in the VM. Give them a chance to live.
+*/
+   *spr_val = vcpu-arch.eptcfg;
+   break;
 
/* extra exceptions */
case SPRN_IVOR32:
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index a863dc1..1c1c5cb 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -624,6 +624,9 @@ int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 
id,
case KVM_REG_PPC_MMUCFG:
*val = get_reg_val(id, vcpu-arch.mmucfg);
break;
+   case KVM_REG_PPC_EPTCFG:
+   *val = get_reg_val(id, vcpu-arch.eptcfg);
+   break;
case KVM_REG_PPC_TLB0CFG:
case KVM_REG_PPC_TLB1CFG:
case KVM_REG_PPC_TLB2CFG:
@@ -678,6 +681,12 @@ int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 
id,
r = -EINVAL;
break;
}
+   case KVM_REG_PPC_EPTCFG: {
+   u32 reg = set_reg_val(id, *val);
+   if (reg != vcpu-arch.eptcfg)
+   r = -EINVAL;
+   break;
+   }
case KVM_REG_PPC_TLB0CFG:
case KVM_REG_PPC_TLB1CFG:
case KVM_REG_PPC_TLB2CFG:
@@ -875,6 +884,9 @@ static int vcpu_mmu_init(struct kvm_vcpu *vcpu,
if (has_feature(vcpu, VCPU_FTR_MMU_V2)) {
vcpu-arch.tlbps[0] = mfspr(SPRN_TLB0PS);
vcpu-arch.tlbps[1] = mfspr(SPRN_TLB1PS);
+
+   /* Guest mmu emulation currently doesn't handle E.PT */
+   vcpu-arch.eptcfg = 0;
}
 
return 0;
-- 
1.7.4.1


--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 0/8 v3] KVM: PPC: e500: Enable FSL e6500 core

2013-04-11 Thread Mihai Caraman

Enable basic support for Freescale e6500 core, adding MAV 2.0 support.
Validated on T4240QDS platfrom. Altivec, Multithreading and HW Tablewalk
are not addressed by this patchset.

Mihai Caraman (8):
  KVM: PPC: Book3E: Refactor ONE_REG ioctl implementation
  KVM: PPC: e500: Expose MMU registers via ONE_REG
  KVM: PPC: e500: Move vcpu's MMU configuration to dedicated functions
  KVM: PPC: e500: Add support for TLBnPS registers
  KVM: PPC: e500: Add support for EPTCFG register
  KVM: PPC: e500: Remove E.PT and E.HV.LRAT categories from VCPUs
  KVM: PPC: e500mc: Enable e6500 cores
  KVM: PPC: e500: Add e6500 core to Kconfig description

 Documentation/virtual/kvm/api.txt   |   16 +++
 arch/powerpc/include/asm/kvm_host.h |2 +
 arch/powerpc/include/uapi/asm/kvm.h |   22 
 arch/powerpc/kvm/44x.c  |   12 ++
 arch/powerpc/kvm/Kconfig|6 +-
 arch/powerpc/kvm/booke.c|  102 ++-
 arch/powerpc/kvm/e500.c |   14 +++
 arch/powerpc/kvm/e500.h |   22 
 arch/powerpc/kvm/e500_emulate.c |   19 
 arch/powerpc/kvm/e500_mmu.c |  192 +++
 arch/powerpc/kvm/e500mc.c   |   16 +++
 11 files changed, 351 insertions(+), 72 deletions(-)

-- 
1.7.4.1


--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/8 v3] KVM: PPC: Book3E: Refactor ONE_REG ioctl implementation

2013-04-11 Thread Mihai Caraman

Refactor Book3E ONE_REG ioctl implementation to use kvmppc_get_one_reg/
kvmppc_set_one_reg delegation interface introduced by Book3S. This is
necessary for MMU SPRs which are platform specifics.

Get rid of useless case braces in the process.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v3:
 - Split ONE_REG ioctl refactoring in its own patch

 arch/powerpc/kvm/44x.c|   12 +
 arch/powerpc/kvm/booke.c  |  102 -
 arch/powerpc/kvm/e500.c   |   12 +
 arch/powerpc/kvm/e500mc.c |   12 +
 4 files changed, 91 insertions(+), 47 deletions(-)

diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 3d7fd21..2f5c6b6 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -124,6 +124,18 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct 
kvm_sregs *sregs)
return kvmppc_set_sregs_ivor(vcpu, sregs);
 }
 
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
+   union kvmppc_one_reg *val)
+{
+   return -EINVAL;
+}
+
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
+  union kvmppc_one_reg *val)
+{
+   return -EINVAL;
+}
+
 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 {
struct kvmppc_vcpu_44x *vcpu_44x;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index a49a68a..08f6540 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1412,117 +1412,125 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu 
*vcpu,
 
 int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 {
-   int r = -EINVAL;
+   int r = 0;
+   union kvmppc_one_reg val;
+   int size;
+   long int i;
+
+   size = one_reg_size(reg-id);
+   if (size  sizeof(val))
+   return -EINVAL;
 
switch (reg-id) {
case KVM_REG_PPC_IAC1:
case KVM_REG_PPC_IAC2:
case KVM_REG_PPC_IAC3:
-   case KVM_REG_PPC_IAC4: {
-   int iac = reg-id - KVM_REG_PPC_IAC1;
-   r = copy_to_user((u64 __user *)(long)reg-addr,
-vcpu-arch.dbg_reg.iac[iac], sizeof(u64));
+   case KVM_REG_PPC_IAC4:
+   i = reg-id - KVM_REG_PPC_IAC1;
+   val = get_reg_val(reg-id, vcpu-arch.dbg_reg.iac[i]);
break;
-   }
case KVM_REG_PPC_DAC1:
-   case KVM_REG_PPC_DAC2: {
-   int dac = reg-id - KVM_REG_PPC_DAC1;
-   r = copy_to_user((u64 __user *)(long)reg-addr,
-vcpu-arch.dbg_reg.dac[dac], sizeof(u64));
+   case KVM_REG_PPC_DAC2:
+   i = reg-id - KVM_REG_PPC_DAC1;
+   val = get_reg_val(reg-id, vcpu-arch.dbg_reg.dac[i]);
break;
-   }
case KVM_REG_PPC_EPR: {
u32 epr = get_guest_epr(vcpu);
-   r = put_user(epr, (u32 __user *)(long)reg-addr);
+   val = get_reg_val(reg-id, epr);
break;
}
 #if defined(CONFIG_64BIT)
case KVM_REG_PPC_EPCR:
-   r = put_user(vcpu-arch.epcr, (u32 __user *)(long)reg-addr);
+   val = get_reg_val(reg-id, vcpu-arch.epcr);
break;
 #endif
case KVM_REG_PPC_TCR:
-   r = put_user(vcpu-arch.tcr, (u32 __user *)(long)reg-addr);
+   val = get_reg_val(reg-id, vcpu-arch.tcr);
break;
case KVM_REG_PPC_TSR:
-   r = put_user(vcpu-arch.tsr, (u32 __user *)(long)reg-addr);
+   val = get_reg_val(reg-id, vcpu-arch.tsr);
break;
-   case KVM_REG_PPC_DEBUG_INST: {
-   u32 opcode = KVMPPC_INST_EHPRIV;
-   r = copy_to_user((u32 __user *)(long)reg-addr,
-opcode, sizeof(u32));
+   case KVM_REG_PPC_DEBUG_INST:
+   val = get_reg_val(reg-id, KVMPPC_INST_EHPRIV);
break;
-   }
default:
+   r = kvmppc_get_one_reg(vcpu, reg-id, val);
break;
}
+
+   if (r)
+   return r;
+
+   if (copy_to_user((char __user *)(unsigned long)reg-addr, val, size))
+   r = -EFAULT;
+
return r;
 }
 
 int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 {
-   int r = -EINVAL;
+   int r = 0;
+   union kvmppc_one_reg val;
+   int size;
+   long int i;
+
+   size = one_reg_size(reg-id);
+   if (size  sizeof(val))
+   return -EINVAL;
+
+   if (copy_from_user(val, (char __user *)(unsigned long)reg-addr, size))
+   return -EFAULT;
 
switch (reg-id) {
case KVM_REG_PPC_IAC1:
case KVM_REG_PPC_IAC2:
case KVM_REG_PPC_IAC3:
-   case KVM_REG_PPC_IAC4: {
-   int iac = reg-id - KVM_REG_PPC_IAC1;
-   r = copy_from_user(vcpu-arch.dbg_reg.iac[iac],
-(u64 __user

Re: [PULL 0/7] ppc patch queue 2013-03-22

2013-04-11 Thread Marcelo Tosatti

On Tue, Mar 26, 2013 at 12:59:04PM +1100, Paul Mackerras wrote:
 On Tue, Mar 26, 2013 at 03:33:12AM +0200, Gleb Natapov wrote:
  On Tue, Mar 26, 2013 at 12:35:09AM +0100, Alexander Graf wrote:
   I agree. So if it doesn't hurt to have the same commits in kvm/next and 
   kvm/master, I'd be more than happy to send another pull request with the 
   important fixes against kvm/master as well.
   
  If it will result in the same commit showing twice in the Linus tree in 
  3.10 we cannot do that.
 
 Why not?  In the circumstances it seems perfectly reasonable to me.
 Git should merge the branches without any problem, and even if it
 doesn't, Linus is good at fixing merge conflicts.
 
 Paul.

Yes, should avoid duplicate commits but its not fatal for them to exist.

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

1 2 >

1 - 100 of 110 matches

Mail list logo