Re: [PATCH net] vhost/net: fix heads usage of ubuf_info

2013-03-21 Thread Michael S. Tsirkin
On Sun, Mar 17, 2013 at 02:29:55PM -0400, David Miller wrote:
 From: Michael S. Tsirkin m...@redhat.com
 Date: Sun, 17 Mar 2013 14:46:09 +0200
 
  ubuf info allocator uses guest controlled head as an index,
  so a malicious guest could put the same head entry in the ring twice,
  and we will get two callbacks on the same value.
  To fix use upend_idx which is guaranteed to be unique.
  
  Reported-by: Rusty Russell ru...@rustcorp.com.au
  Signed-off-by: Michael S. Tsirkin m...@redhat.com
 
 Applied and queued up for -stable, thanks.
 
 And thankfully you got the stable URL wrong,

Yes I wrote sta...@kernel.org that's what an old copy
says here:
https://www.kernel.org/doc/Documentation/stable_kernel_rules.txt

I should have known better than look at it on the 'net.  The top
'Everything you ever wanted to know about Linux 2.6 -stable releases.'
is a big hint that it's stale.
Any idea who maintains this? Better update it or remove it or redirect to git.

 please do not CC:
 networking patches to stable, just make sure I apply them and in
 your post-commit text explicitly ask me to queue it up to my
 -stable queue.
 
 Thanks.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/4 v2] KVM :PPC: Userspace Debug support

2013-03-21 Thread Bharat Bhushan
From: Bharat Bhushan bharat.bhus...@freescale.com

This patchset adds the userspace debug support for booke/bookehv.
this is tested on powerpc e500v2/e500mc devices.

v1-v2
 - Debug registers are save/restore in vcpu_put/vcpu_get.
   Earlier the debug registers are saved/restored in guest entry/exit

Bharat Bhushan (4):
  Added ONE_REG interface for debug instruction
  KVM: PPC: debug stub interface parameter defined
  Rename EMULATE_DO_PAPR to EMULATE_EXIT_USER
  KVM: PPC: Add userspace debug stub support

 Documentation/virtual/kvm/api.txt |1 +
 arch/powerpc/include/asm/kvm_book3s.h |2 +
 arch/powerpc/include/asm/kvm_booke.h  |2 +
 arch/powerpc/include/asm/kvm_host.h   |   10 ++
 arch/powerpc/include/asm/kvm_ppc.h|2 +-
 arch/powerpc/include/uapi/asm/kvm.h   |   41 ++
 arch/powerpc/kvm/book3s.c |   12 ++
 arch/powerpc/kvm/book3s_emulate.c |4 +-
 arch/powerpc/kvm/book3s_pr.c  |4 +-
 arch/powerpc/kvm/booke.c  |  252 +++--
 arch/powerpc/kvm/e500_emulate.c   |   10 ++
 arch/powerpc/kvm/powerpc.c|6 -
 12 files changed, 323 insertions(+), 23 deletions(-)


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/4 v2] Added ONE_REG interface for debug instruction

2013-03-21 Thread Bharat Bhushan
This patch adds the one_reg interface to get the special instruction
to be used for setting software breakpoint from userspace.

Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
---
v2:
 - Corrected trap tw always opcode.

 Documentation/virtual/kvm/api.txt |1 +
 arch/powerpc/include/asm/kvm_book3s.h |2 ++
 arch/powerpc/include/asm/kvm_booke.h  |2 ++
 arch/powerpc/include/uapi/asm/kvm.h   |4 
 arch/powerpc/kvm/book3s.c |6 ++
 arch/powerpc/kvm/booke.c  |6 ++
 6 files changed, 21 insertions(+), 0 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index cce500a..dbfcc04 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1766,6 +1766,7 @@ registers, find a list below:
   PPC   | KVM_REG_PPC_TSR  | 32
   PPC   | KVM_REG_PPC_OR_TSR   | 32
   PPC   | KVM_REG_PPC_CLEAR_TSR| 32
+  PPC   | KVM_REG_PPC_DEBUG_INST| 32
 
 4.69 KVM_GET_ONE_REG
 
diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index 5a56e1c..bc81842 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -458,6 +458,8 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu 
*vcpu)
 #define OSI_SC_MAGIC_R40x77810F9B
 
 #define INS_DCBZ   0x7c0007ec
+/* TO = 31 for unconditional trap */
+#define INS_TW 0x7fe8
 
 /* LPIDs we support with this build -- runtime limit may be lower */
 #define KVMPPC_NR_LPIDS(LPID_RSVD + 1)
diff --git a/arch/powerpc/include/asm/kvm_booke.h 
b/arch/powerpc/include/asm/kvm_booke.h
index b7cd335..d3c1eb3 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -26,6 +26,8 @@
 /* LPIDs we support with this build -- runtime limit may be lower */
 #define KVMPPC_NR_LPIDS64
 
+#define KVMPPC_INST_EHPRIV 0x7c00021c
+
 static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
 {
vcpu-arch.gpr[num] = val;
diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index ef072b1..c2ff99c 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -422,4 +422,8 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_CLEAR_TSR  (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x88)
 #define KVM_REG_PPC_TCR(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x89)
 #define KVM_REG_PPC_TSR(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8a)
+
+/* Debugging: Special instruction for software breakpoint */
+#define KVM_REG_PPC_DEBUG_INST (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8b)
+
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index a4b6452..975a401 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -530,6 +530,12 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 
struct kvm_one_reg *reg)
val = get_reg_val(reg-id, vcpu-arch.vscr.u[3]);
break;
 #endif /* CONFIG_ALTIVEC */
+   case KVM_REG_PPC_DEBUG_INST: {
+   u32 opcode = INS_TW;
+   r = copy_to_user((u32 __user *)(long)reg-addr,
+opcode, sizeof(u32));
+   break;
+   }
default:
r = -EINVAL;
break;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 8b553c0..a41cd6d 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1448,6 +1448,12 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 
struct kvm_one_reg *reg)
case KVM_REG_PPC_TSR:
r = put_user(vcpu-arch.tsr, (u32 __user *)(long)reg-addr);
break;
+   case KVM_REG_PPC_DEBUG_INST: {
+   u32 opcode = KVMPPC_INST_EHPRIV;
+   r = copy_to_user((u32 __user *)(long)reg-addr,
+opcode, sizeof(u32));
+   break;
+   }
default:
break;
}
-- 
1.7.0.4


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/4 v2] KVM: PPC: debug stub interface parameter defined

2013-03-21 Thread Bharat Bhushan
From: Bharat Bhushan bharat.bhus...@freescale.com

This patch defines the interface parameter for KVM_SET_GUEST_DEBUG
ioctl support. Follow up patches will use this for setting up
hardware breakpoints, watchpoints and software breakpoints.

Also kvm_arch_vcpu_ioctl_set_guest_debug() is brought one level below.
This is because I am not sure what is required for book3s. So this ioctl
behaviour will not change for book3s.

Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
---
v2:
 - No Change

 arch/powerpc/include/uapi/asm/kvm.h |   23 +++
 arch/powerpc/kvm/book3s.c   |6 ++
 arch/powerpc/kvm/booke.c|6 ++
 arch/powerpc/kvm/powerpc.c  |6 --
 4 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index c2ff99c..15f9a00 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -272,8 +272,31 @@ struct kvm_debug_exit_arch {
 
 /* for KVM_SET_GUEST_DEBUG */
 struct kvm_guest_debug_arch {
+   struct {
+   /* H/W breakpoint/watchpoint address */
+   __u64 addr;
+   /*
+* Type denotes h/w breakpoint, read watchpoint, write
+* watchpoint or watchpoint (both read and write).
+*/
+#define KVMPPC_DEBUG_NOTYPE0x0
+#define KVMPPC_DEBUG_BREAKPOINT(1UL  1)
+#define KVMPPC_DEBUG_WATCH_WRITE   (1UL  2)
+#define KVMPPC_DEBUG_WATCH_READ(1UL  3)
+   __u32 type;
+   __u32 reserved;
+   } bp[16];
 };
 
+/* Debug related defines */
+/*
+ * kvm_guest_debug-control is a 32 bit field. The lower 16 bits are generic
+ * and upper 16 bits are architecture specific. Architecture specific defines
+ * that ioctl is for setting hardware breakpoint or software breakpoint.
+ */
+#define KVM_GUESTDBG_USE_SW_BP 0x0001
+#define KVM_GUESTDBG_USE_HW_BP 0x0002
+
 /* definition of registers in kvm_run */
 struct kvm_sync_regs {
 };
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 975a401..cb85d73 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -613,6 +613,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
return 0;
 }
 
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+   struct kvm_guest_debug *dbg)
+{
+   return -EINVAL;
+}
+
 void kvmppc_decrementer_func(unsigned long data)
 {
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index a41cd6d..1de93a8 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1527,6 +1527,12 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 
struct kvm_one_reg *reg)
return r;
 }
 
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+struct kvm_guest_debug *dbg)
+{
+   return -EINVAL;
+}
+
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
return -ENOTSUPP;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 934413c..4c94ca9 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -532,12 +532,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 #endif
 }
 
-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
-struct kvm_guest_debug *dbg)
-{
-   return -EINVAL;
-}
-
 static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
  struct kvm_run *run)
 {
-- 
1.7.0.4


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/4 v2] Rename EMULATE_DO_PAPR to EMULATE_EXIT_USER

2013-03-21 Thread Bharat Bhushan
From: Bharat Bhushan bharat.bhus...@freescale.com

Instruction emulation return EMULATE_DO_PAPR when it requires
exit to userspace on book3s. Similar return is required
for booke. EMULATE_DO_PAPR reads out to be confusing so it is
renamed to EMULATE_EXIT_USER.

Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
---
v2:
 - moved run-exit_reason and vcpu-arch.hcall_needed to emulator.

 arch/powerpc/include/asm/kvm_ppc.h |2 +-
 arch/powerpc/kvm/book3s_emulate.c  |4 +++-
 arch/powerpc/kvm/book3s_pr.c   |4 +---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 44a657a..8b81468 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -44,7 +44,7 @@ enum emulation_result {
EMULATE_DO_DCR,   /* kvm_run filled with DCR request */
EMULATE_FAIL, /* can't emulate this instruction */
EMULATE_AGAIN,/* something went wrong. go again */
-   EMULATE_DO_PAPR,  /* kvm_run filled with PAPR request */
+   EMULATE_EXIT_USER,/* emulation requires exit to user-space */
 };
 
 extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/kvm/book3s_emulate.c 
b/arch/powerpc/kvm/book3s_emulate.c
index 836c569..1f6344c 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -194,7 +194,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
run-papr_hcall.args[i] = gpr;
}
 
-   emulated = EMULATE_DO_PAPR;
+   run-exit_reason = KVM_EXIT_PAPR_HCALL;
+   vcpu-arch.hcall_needed = 1;
+   emulated = EMULATE_EXIT_USER;
break;
}
 #endif
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 73ed11c..2e8bd53 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -760,9 +760,7 @@ program_interrupt:
run-exit_reason = KVM_EXIT_MMIO;
r = RESUME_HOST_NV;
break;
-   case EMULATE_DO_PAPR:
-   run-exit_reason = KVM_EXIT_PAPR_HCALL;
-   vcpu-arch.hcall_needed = 1;
+   case EMULATE_EXIT_USER:
r = RESUME_HOST_NV;
break;
default:
-- 
1.7.0.4


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/4 v2] KVM: PPC: Add userspace debug stub support

2013-03-21 Thread Bharat Bhushan
From: Bharat Bhushan bharat.bhus...@freescale.com

This patch adds the debug stub support on booke/bookehv.
Now QEMU debug stub can use hw breakpoint, watchpoint and
software breakpoint to debug guest.

Debug registers are saved/restored on vcpu_put()/vcpu_get().
Also the debug registers are saved restored only if guest is using
debug resources.

Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
---
v2:
 - save/restore in vcpu_get()/vcpu_put()
 - some more minor cleanup based on review comments.

 arch/powerpc/include/asm/kvm_host.h |   10 ++
 arch/powerpc/include/uapi/asm/kvm.h |   22 +++-
 arch/powerpc/kvm/booke.c|  252 ---
 arch/powerpc/kvm/e500_emulate.c |   10 ++
 4 files changed, 272 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index f4ba881..8571952 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -504,7 +504,17 @@ struct kvm_vcpu_arch {
u32 mmucfg;
u32 epr;
u32 crit_save;
+   /* guest debug registers*/
struct kvmppc_booke_debug_reg dbg_reg;
+   /* shadow debug registers */
+   struct kvmppc_booke_debug_reg shadow_dbg_reg;
+   /* host debug registers*/
+   struct kvmppc_booke_debug_reg host_dbg_reg;
+   /*
+* Flag indicating that debug registers are used by guest
+* and requires save restore.
+   */
+   bool debug_save_restore;
 #endif
gpa_t paddr_accessed;
gva_t vaddr_accessed;
diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index 15f9a00..d7ce449 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -25,6 +25,7 @@
 /* Select powerpc specific features in linux/kvm.h */
 #define __KVM_HAVE_SPAPR_TCE
 #define __KVM_HAVE_PPC_SMT
+#define __KVM_HAVE_GUEST_DEBUG
 
 struct kvm_regs {
__u64 pc;
@@ -267,7 +268,24 @@ struct kvm_fpu {
__u64 fpr[32];
 };
 
+/*
+ * Defines for h/w breakpoint, watchpoint (read, write or both) and
+ * software breakpoint.
+ * These are used as type in KVM_SET_GUEST_DEBUG ioctl and status
+ * for KVM_DEBUG_EXIT.
+ */
+#define KVMPPC_DEBUG_NONE  0x0
+#define KVMPPC_DEBUG_BREAKPOINT(1UL  1)
+#define KVMPPC_DEBUG_WATCH_WRITE   (1UL  2)
+#define KVMPPC_DEBUG_WATCH_READ(1UL  3)
 struct kvm_debug_exit_arch {
+   __u64 address;
+   /*
+* exiting to userspace because of h/w breakpoint, watchpoint
+* (read, write or both) and software breakpoint.
+*/
+   __u32 status;
+   __u32 reserved;
 };
 
 /* for KVM_SET_GUEST_DEBUG */
@@ -279,10 +297,6 @@ struct kvm_guest_debug_arch {
 * Type denotes h/w breakpoint, read watchpoint, write
 * watchpoint or watchpoint (both read and write).
 */
-#define KVMPPC_DEBUG_NOTYPE0x0
-#define KVMPPC_DEBUG_BREAKPOINT(1UL  1)
-#define KVMPPC_DEBUG_WATCH_WRITE   (1UL  2)
-#define KVMPPC_DEBUG_WATCH_READ(1UL  3)
__u32 type;
__u32 reserved;
} bp[16];
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 1de93a8..bf20056 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -133,6 +133,30 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
 #endif
 }
 
+static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
+{
+   /* Synchronize guest's desire to get debug interrupts into shadow MSR */
+#ifndef CONFIG_KVM_BOOKE_HV
+   vcpu-arch.shadow_msr = ~MSR_DE;
+   vcpu-arch.shadow_msr |= vcpu-arch.shared-msr  MSR_DE;
+#endif
+
+   /* Force enable debug interrupts when user space wants to debug */
+   if (vcpu-guest_debug) {
+#ifdef CONFIG_KVM_BOOKE_HV
+   /*
+* Since there is no shadow MSR, sync MSR_DE into the guest
+* visible MSR. Do not allow guest to change MSR[DE].
+*/
+   vcpu-arch.shared-msr |= MSR_DE;
+   mtspr(SPRN_MSRP, mfspr(SPRN_MSRP) | MSRP_DEP);
+#else
+   vcpu-arch.shadow_msr |= MSR_DE;
+   vcpu-arch.shared-msr = ~MSR_DE;
+#endif
+   }
+}
+
 /*
  * Helper function for full MSR writes.  No need to call this if only
  * EE/CE/ME/DE/RI are changing.
@@ -150,6 +174,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
kvmppc_mmu_msr_notify(vcpu, old_msr);
kvmppc_vcpu_sync_spe(vcpu);
kvmppc_vcpu_sync_fpu(vcpu);
+   kvmppc_vcpu_sync_debug(vcpu);
 }
 
 static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
@@ -736,6 +761,9 @@ static int emulation_exit(struct kvm_run *run, struct 
kvm_vcpu *vcpu)
run-exit_reason = KVM_EXIT_DCR;
return RESUME_HOST;
 
+   case EMULATE_EXIT_USER:
+   return RESUME_HOST;
+
case 

Re: [PATCH v4 5/7] KVM: Recalculate destination vcpu map

2013-03-21 Thread Gleb Natapov
On Thu, Mar 21, 2013 at 05:39:46AM +, Zhang, Yang Z wrote:
 Gleb Natapov wrote on 2013-03-21:
  On Thu, Mar 21, 2013 at 05:30:32AM +, Zhang, Yang Z wrote:
  Gleb Natapov wrote on 2013-03-21:
  On Thu, Mar 21, 2013 at 03:42:46AM +, Zhang, Yang Z wrote:
  Gleb Natapov wrote on 2013-03-20:
  On Wed, Mar 20, 2013 at 07:36:17PM +0800, Yang Zhang wrote:
  From: Yang Zhang yang.z.zh...@intel.com
  
  Update RTC interrrupt's destination vcpu map when ioapic entry of RTC
  or apic register (id, ldr, dfr) is changed.
  
  Signed-off-by: Yang Zhang yang.z.zh...@intel.com
  ---
   virt/kvm/ioapic.c |9 +++--
   1 files changed, 7 insertions(+), 2 deletions(-)
  diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
  index ddf9414..91b4c08 100644
  --- a/virt/kvm/ioapic.c
  +++ b/virt/kvm/ioapic.c
  @@ -121,6 +121,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu
  *vcpu,
   { struct kvm_ioapic *ioapic = vcpu-kvm-arch.vioapic;union
   kvm_ioapic_redirect_entry *e; +   unsigned long *rtc_map =
   ioapic-rtc_status.vcpu_map;  struct kvm_lapic_irq irqe;  int 
  index;
  @@ -130,15 +131,19 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu
  *vcpu,
 if (!e-fields.mask 
 (e-fields.trig_mode == IOAPIC_LEVEL_TRIG ||
  kvm_irq_has_notifier(ioapic-kvm, 
  KVM_IRQCHIP_IOAPIC,
  -   index))) {
  +   index) || index == 8)) {
 irqe.dest_id = e-fields.dest_id;
 irqe.vector = e-fields.vector;
 irqe.dest_mode = e-fields.dest_mode;
 irqe.shorthand = 0;
   
 if (kvm_apic_match_dest(vcpu, NULL, 
  irqe.shorthand,
  -  irqe.dest_id, 
  irqe.dest_mode))
  +  irqe.dest_id, 
  irqe.dest_mode)) {
 __set_bit(irqe.vector, eoi_exit_bitmap);
  +  if (index == 8)
  +  __set_bit(vcpu-vcpu_id, 
  rtc_map);
  +  } else if (index == 8)
  +  __clear_bit(vcpu-vcpu_id, rtc_map);
  rtc_map bitmap is accessed from different vcpus simultaneously so access
  has to be atomic. We also have a race:
  
  vcpu0   iothread
  ioapic config changes
  request scan ioapic
   inject rtc interrupt
   use old vcpu mask
  scan_ioapic()
  recalculate vcpu mask
  
  So this approach (suggested by me :() will not work.
  
  Need to think about it some more. May be your idea of building a bitmap
  while injecting the interrupt is the way to go indeed: pass a pointer to
  a bitmap to kvm_irq_delivery_to_apic() and build it there. Pass NULL
  pointer if caller does not need to track vcpus.
  Or, we can block inject rtc interrupt during recalculate vcpu map.
  
  if(need_eoi  0  in_recalculating)
  return coalesced
  
  This should be ||. Then you need to maintain in_recalculating and
  recalculations requests may overlap. Too complex and fragile.
  It should not be too complex. How about the following logic?
  
  when make scan ioapic request:
  kvm_vcpu_scan_ioapic()
  {
  kvm_for_each_vcpu()
 in_recalculating++;
  }
  
  Then on each vcpu's request handler:
  vcpu_scan_ioapic()
  {
  in_recalculating--;
  }
  
  kvm_vcpu_scan_ioapic() can be called more often then vcpu_scan_ioapic()
 Ok. I see your point. Maybe we need to rollback to old idea.
 
 Can you pick the first two patches? If rollback to old way, it will not touch 
 those code.
 
First patch is great, but drop no longer needed irqe there. I do not see
the point of the second patch if the map will be built during injection.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH v4 5/7] KVM: Recalculate destination vcpu map

2013-03-21 Thread Zhang, Yang Z
Gleb Natapov wrote on 2013-03-21:
 On Thu, Mar 21, 2013 at 05:39:46AM +, Zhang, Yang Z wrote:
 Gleb Natapov wrote on 2013-03-21:
 On Thu, Mar 21, 2013 at 05:30:32AM +, Zhang, Yang Z wrote:
 Gleb Natapov wrote on 2013-03-21:
 On Thu, Mar 21, 2013 at 03:42:46AM +, Zhang, Yang Z wrote:
 Gleb Natapov wrote on 2013-03-20:
 On Wed, Mar 20, 2013 at 07:36:17PM +0800, Yang Zhang wrote:
 From: Yang Zhang yang.z.zh...@intel.com
 
 Update RTC interrrupt's destination vcpu map when ioapic entry of RTC
 or apic register (id, ldr, dfr) is changed.
 
 Signed-off-by: Yang Zhang yang.z.zh...@intel.com
 ---
  virt/kvm/ioapic.c |9 +++--
  1 files changed, 7 insertions(+), 2 deletions(-)
 diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
 index ddf9414..91b4c08 100644
 --- a/virt/kvm/ioapic.c
 +++ b/virt/kvm/ioapic.c
 @@ -121,6 +121,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu
 *vcpu,
  { struct kvm_ioapic *ioapic = vcpu-kvm-arch.vioapic;union
  kvm_ioapic_redirect_entry *e; +   unsigned long *rtc_map =
  ioapic-rtc_status.vcpu_map;  struct kvm_lapic_irq irqe;  int
 index;
 @@ -130,15 +131,19 @@ void kvm_ioapic_scan_entry(struct
 kvm_vcpu
 *vcpu,
if (!e-fields.mask 
(e-fields.trig_mode == IOAPIC_LEVEL_TRIG ||
 kvm_irq_has_notifier(ioapic-kvm,
 KVM_IRQCHIP_IOAPIC,
 -   index))) {
 +   index) || index == 8)) {
irqe.dest_id = e-fields.dest_id;
irqe.vector = e-fields.vector;
irqe.dest_mode = e-fields.dest_mode;
irqe.shorthand = 0;
  
if (kvm_apic_match_dest(vcpu, NULL, 
 irqe.shorthand,
 -  irqe.dest_id, 
 irqe.dest_mode))
 +  irqe.dest_id, 
 irqe.dest_mode)) {
__set_bit(irqe.vector, eoi_exit_bitmap);
 +  if (index == 8)
 +  __set_bit(vcpu-vcpu_id, 
 rtc_map);
 +  } else if (index == 8)
 +  __clear_bit(vcpu-vcpu_id, rtc_map);
 rtc_map bitmap is accessed from different vcpus simultaneously so
 access has to be atomic. We also have a race:
 
 vcpu0   iothread
 ioapic config changes
 request scan ioapic
  inject rtc interrupt
  use old vcpu mask
 scan_ioapic()
 recalculate vcpu mask
 
 So this approach (suggested by me :() will not work.
 
 Need to think about it some more. May be your idea of building a
 bitmap while injecting the interrupt is the way to go indeed: pass
 a pointer to a bitmap to kvm_irq_delivery_to_apic() and build it
 there. Pass NULL pointer if caller does not need to track vcpus.
 Or, we can block inject rtc interrupt during recalculate vcpu map.
 
 if(need_eoi  0  in_recalculating)
 return coalesced
 
 This should be ||. Then you need to maintain in_recalculating and
 recalculations requests may overlap. Too complex and fragile.
 It should not be too complex. How about the following logic?
 
 when make scan ioapic request:
 kvm_vcpu_scan_ioapic()
 {
 kvm_for_each_vcpu()
in_recalculating++;
 }
 
 Then on each vcpu's request handler:
 vcpu_scan_ioapic()
 {
 in_recalculating--;
 }
 
 kvm_vcpu_scan_ioapic() can be called more often then vcpu_scan_ioapic()
 Ok. I see your point. Maybe we need to rollback to old idea.
 
 Can you pick the first two patches? If rollback to old way, it will not
 touch those code.
 
 First patch is great, but drop no longer needed irqe there. I do not see
 the point of the second patch if the map will be built during injection.
Sure. I will resend the first patch.
And we need to rebuild TMR when ioapic entry changed. So the second patch will 
be used at that time. But it's ok to send it with APICv patch.

Best regards,
Yang


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH v4 5/7] KVM: Recalculate destination vcpu map

2013-03-21 Thread Zhang, Yang Z
Gleb Natapov wrote on 2013-03-20:
 On Wed, Mar 20, 2013 at 07:36:17PM +0800, Yang Zhang wrote:
 From: Yang Zhang yang.z.zh...@intel.com
 
 Update RTC interrrupt's destination vcpu map when ioapic entry of RTC
 or apic register (id, ldr, dfr) is changed.
 
 Signed-off-by: Yang Zhang yang.z.zh...@intel.com
 ---
  virt/kvm/ioapic.c |9 +++--
  1 files changed, 7 insertions(+), 2 deletions(-)
 diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
 index ddf9414..91b4c08 100644
 --- a/virt/kvm/ioapic.c
 +++ b/virt/kvm/ioapic.c
 @@ -121,6 +121,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu,
  {   struct kvm_ioapic *ioapic = vcpu-kvm-arch.vioapic;union
  kvm_ioapic_redirect_entry *e; + unsigned long *rtc_map =
  ioapic-rtc_status.vcpu_map;struct kvm_lapic_irq irqe;  int 
 index;
 @@ -130,15 +131,19 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu,
  if (!e-fields.mask 
  (e-fields.trig_mode == IOAPIC_LEVEL_TRIG ||
   kvm_irq_has_notifier(ioapic-kvm, KVM_IRQCHIP_IOAPIC,
 - index))) {
 + index) || index == 8)) {
  irqe.dest_id = e-fields.dest_id;
  irqe.vector = e-fields.vector;
  irqe.dest_mode = e-fields.dest_mode;
  irqe.shorthand = 0;
  
  if (kvm_apic_match_dest(vcpu, NULL, irqe.shorthand,
 -irqe.dest_id, irqe.dest_mode))
 +irqe.dest_id, irqe.dest_mode)) {
  __set_bit(irqe.vector, eoi_exit_bitmap);
 +if (index == 8)
 +__set_bit(vcpu-vcpu_id, rtc_map);
 +} else if (index == 8)
 +__clear_bit(vcpu-vcpu_id, rtc_map);
 rtc_map bitmap is accessed from different vcpus simultaneously so access
 has to be atomic. We also have a race:
 
 vcpu0   iothread
 ioapic config changes
 request scan ioapic
  inject rtc interrupt
  use old vcpu mask
 scan_ioapic()
 recalculate vcpu mask
 
 So this approach (suggested by me :() will not work.
 
 Need to think about it some more. May be your idea of building a bitmap
 while injecting the interrupt is the way to go indeed: pass a pointer to
 a bitmap to kvm_irq_delivery_to_apic() and build it there. Pass NULL
 pointer if caller does not need to track vcpus.
How about build it in kvm_apic_set_irq()? It should be more straightforward.

Best regards,
Yang

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 5/7] KVM: Recalculate destination vcpu map

2013-03-21 Thread Gleb Natapov
On Thu, Mar 21, 2013 at 07:02:39AM +, Zhang, Yang Z wrote:
 Gleb Natapov wrote on 2013-03-20:
  On Wed, Mar 20, 2013 at 07:36:17PM +0800, Yang Zhang wrote:
  From: Yang Zhang yang.z.zh...@intel.com
  
  Update RTC interrrupt's destination vcpu map when ioapic entry of RTC
  or apic register (id, ldr, dfr) is changed.
  
  Signed-off-by: Yang Zhang yang.z.zh...@intel.com
  ---
   virt/kvm/ioapic.c |9 +++--
   1 files changed, 7 insertions(+), 2 deletions(-)
  diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
  index ddf9414..91b4c08 100644
  --- a/virt/kvm/ioapic.c
  +++ b/virt/kvm/ioapic.c
  @@ -121,6 +121,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu,
   { struct kvm_ioapic *ioapic = vcpu-kvm-arch.vioapic;union
   kvm_ioapic_redirect_entry *e; +   unsigned long *rtc_map =
   ioapic-rtc_status.vcpu_map;  struct kvm_lapic_irq irqe;  int 
  index;
  @@ -130,15 +131,19 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu,
 if (!e-fields.mask 
 (e-fields.trig_mode == IOAPIC_LEVEL_TRIG ||
  kvm_irq_has_notifier(ioapic-kvm, KVM_IRQCHIP_IOAPIC,
  -   index))) {
  +   index) || index == 8)) {
 irqe.dest_id = e-fields.dest_id;
 irqe.vector = e-fields.vector;
 irqe.dest_mode = e-fields.dest_mode;
 irqe.shorthand = 0;
   
 if (kvm_apic_match_dest(vcpu, NULL, irqe.shorthand,
  -  irqe.dest_id, irqe.dest_mode))
  +  irqe.dest_id, irqe.dest_mode)) {
 __set_bit(irqe.vector, eoi_exit_bitmap);
  +  if (index == 8)
  +  __set_bit(vcpu-vcpu_id, rtc_map);
  +  } else if (index == 8)
  +  __clear_bit(vcpu-vcpu_id, rtc_map);
  rtc_map bitmap is accessed from different vcpus simultaneously so access
  has to be atomic. We also have a race:
  
  vcpu0   iothread
  ioapic config changes
  request scan ioapic
   inject rtc interrupt
   use old vcpu mask
  scan_ioapic()
  recalculate vcpu mask
  
  So this approach (suggested by me :() will not work.
  
  Need to think about it some more. May be your idea of building a bitmap
  while injecting the interrupt is the way to go indeed: pass a pointer to
  a bitmap to kvm_irq_delivery_to_apic() and build it there. Pass NULL
  pointer if caller does not need to track vcpus.
 How about build it in kvm_apic_set_irq()? It should be more straightforward.
 
Sure, pass a pointer there. Just do not access ioapic directly.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: allow host header to be included even for !CONFIG_KVM

2013-03-21 Thread Gleb Natapov
On Wed, Mar 20, 2013 at 06:58:41PM -0500, Scott Wood wrote:
 On 03/14/2013 07:13:46 PM, Kevin Hilman wrote:
 The new context tracking subsystem unconditionally includes kvm_host.h
 headers for the guest enter/exit macros.  This causes a compile
 failure when KVM is not enabled.
 
 Fix by adding an IS_ENABLED(CONFIG_KVM) check to kvm_host so it can
 be included/compiled even when KVM is not enabled.
 
 Cc: Frederic Weisbecker fweis...@gmail.com
 Signed-off-by: Kevin Hilman khil...@linaro.org
 ---
 Applies on v3.9-rc2
 
  include/linux/kvm_host.h | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)
 
 This broke the PPC non-KVM build, which was relying on stub
 functions in kvm_ppc.h, which relies on struct vcpu in kvm_host.h.
 
 Why can't the entirety kvm_host.h be included regardless of
 CONFIG_KVM, just like most other feature-specific headers?  Why
 can't the if/else just go around the functions that you want to stub
 out for non-KVM builds?
 
Kevin,

 What compilation failure this patch fixes? I presume something ARM
related.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] vfio: make local function vfio_pci_intx_unmask_handler() static

2013-03-21 Thread Wei Yongjun
From: Wei Yongjun yongjun_...@trendmicro.com.cn

vfio_pci_intx_unmask_handler() was not declared. It should be static.

Signed-off-by: Wei Yongjun yongjun_...@trendmicro.com.cn
---
 drivers/vfio/pci/vfio_pci_intrs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/pci/vfio_pci_intrs.c 
b/drivers/vfio/pci/vfio_pci_intrs.c
index a965091..865175e 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -287,7 +287,8 @@ void vfio_pci_intx_mask(struct vfio_pci_device *vdev)
  * a signal is necessary, which can then be handled via a work queue
  * or directly depending on the caller.
  */
-int vfio_pci_intx_unmask_handler(struct vfio_pci_device *vdev, void *unused)
+static int vfio_pci_intx_unmask_handler(struct vfio_pci_device *vdev,
+   void *unused)
 {
struct pci_dev *pdev = vdev-pdev;
unsigned long flags;

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


KVM next tree merge onto current Linus master

2013-03-21 Thread Alexander Graf
Hi guys,

Just recently this really important patch got pulled into Linus' tree for 3.9:

commit 1674400aaee5b466c595a8fc310488263ce888c7
Author: Anton Blanchard an...@samba.org
Date:   Tue Mar 12 01:51:51 2013 +

powerpc: Fix -mcmodel=medium breakage in prom_init.c

Commit 5ac47f7a6efb (powerpc: Relocate prom_init.c on 64bit) made
prom_init.c position independent by manually relocating its entries
in the TOC.

We get the address of the TOC entries with the __prom_init_toc_start
linker symbol. If __prom_init_toc_start ends up as an entry in the
TOC then we need to add an offset to get the current address. This is
the case for older toolchains.

On the other hand, if we have a newer toolchain that supports
-mcmodel=medium then __prom_init_toc_start will be created by a
relative offset from r2 (the TOC pointer). Since r2 has already been
relocated, nothing more needs to be done.  Adding an offset in this
case is wrong and Aaro Koskinen and Alexander Graf have noticed noticed
G5 and OpenBIOS breakage.

Alan Modra suggested we just use r2 to get at the TOC which is simpler
and works with both old and new toolchains.

Reported-by: Alexander Graf ag...@suse.de
Signed-off-by: Anton Blanchard an...@samba.org
Tested-by: Aaro Koskinen aaro.koski...@iki.fi
Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org


Without that commit, I can not boot my G5, thus I can't run automated tests on 
it against my queue.

Could you please merge kvm/next against linus/master, so that I can base my 
trees against that?


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 6/6] kvm/ppc/mpic: in-kernel MPIC emulation

2013-03-21 Thread Alexander Graf

On 14.02.2013, at 06:49, Scott Wood wrote:

 Hook the MPIC code up to the KVM interfaces, add locking, etc.
 
 TODO: irqfd support
 
 Signed-off-by: Scott Wood scottw...@freescale.com

Could you please split this patch up on your next respin? Also please make sure 
you don't have #if 0'ed code in here. Just return to user space with an error 
when you encounter something you don't know how to handle.


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/9] KVM: PPC: Book3S: Add infrastructure to implement kernel-side RTAS calls

2013-03-21 Thread Alexander Graf

On 15.02.2013, at 00:59, Paul Mackerras wrote:

 From: Michael Ellerman mich...@ellerman.id.au
 
 For pseries machine emulation, in order to move the interrupt
 controller code to the kernel, we need to intercept some RTAS
 calls in the kernel itself.  This adds an infrastructure to allow
 in-kernel handlers to be registered for RTAS services by name.
 A new ioctl, KVM_PPC_RTAS_DEFINE_TOKEN, then allows userspace to
 associate token values with those service names.  Then, when the
 guest requests an RTAS service with one of those token values, it
 will be handled by the relevant in-kernel handler rather than being
 passed up to userspace as at present.
 
 Signed-off-by: Michael Ellerman mich...@ellerman.id.au
 Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org
 Signed-off-by: Paul Mackerras pau...@samba.org
 ---
 Documentation/virtual/kvm/api.txt   |   19 
 arch/powerpc/include/asm/hvcall.h   |3 +
 arch/powerpc/include/asm/kvm_host.h |1 +
 arch/powerpc/include/asm/kvm_ppc.h  |4 +
 arch/powerpc/include/uapi/asm/kvm.h |6 ++
 arch/powerpc/kvm/Makefile   |1 +
 arch/powerpc/kvm/book3s_hv.c|   18 +++-
 arch/powerpc/kvm/book3s_pr_papr.c   |7 ++
 arch/powerpc/kvm/book3s_rtas.c  |  182 +++
 arch/powerpc/kvm/powerpc.c  |9 +-
 include/uapi/linux/kvm.h|3 +
 11 files changed, 251 insertions(+), 2 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s_rtas.c
 
 diff --git a/Documentation/virtual/kvm/api.txt 
 b/Documentation/virtual/kvm/api.txt
 index c2534c3..d3e2d60 100644
 --- a/Documentation/virtual/kvm/api.txt
 +++ b/Documentation/virtual/kvm/api.txt
 @@ -2122,6 +2122,25 @@ header; first `n_valid' valid entries with contents 
 from the data
 written, then `n_invalid' invalid entries, invalidating any previously
 valid entries found.
 
 +4.79 KVM_PPC_RTAS_DEFINE_TOKEN
 +
 +Capability: KVM_CAP_PPC_RTAS
 +Architectures: ppc
 +Type: vm ioctl
 +Parameters: struct kvm_rtas_token_args
 +Returns: 0 on success, -1 on error
 +
 +Defines a token value for a RTAS (Run Time Abstraction Services)
 +service in order to allow it to be handled in the kernel.  The
 +argument struct gives the name of the service, which must be the name
 +of a service that has a kernel-side implementation.  If the token
 +value is non-zero, it will be associated with that service, and
 +subsequent RTAS calls by the guest specifying that token will be
 +handled by the kernel.  If the token value is 0, then any token
 +associated with the service will be forgotten, and subsequent RTAS
 +calls by the guest for that service will be passed to userspace to be
 +handled.
 +
 
 5. The kvm_run structure
 
 diff --git a/arch/powerpc/include/asm/hvcall.h 
 b/arch/powerpc/include/asm/hvcall.h
 index 7a86706..9ea22b2 100644
 --- a/arch/powerpc/include/asm/hvcall.h
 +++ b/arch/powerpc/include/asm/hvcall.h
 @@ -269,6 +269,9 @@
 #define H_GET_MPP_X   0x314
 #define MAX_HCALL_OPCODE  H_GET_MPP_X
 
 +/* Platform specific hcalls, used by KVM */
 +#define H_RTAS   0xf000

How about you define a different hcall ID for this? Then QEMU would create its 
rtas entry blob such that KVM-routed RTAS handling goes to KVM directly.


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/9] KVM: PPC: Remove unused argument to kvmppc_core_dequeue_external

2013-03-21 Thread Alexander Graf

On 15.02.2013, at 01:00, Paul Mackerras wrote:

 Currently kvmppc_core_dequeue_external() takes a struct kvm_interrupt *
 argument and does nothing with it, in any of its implementations.
 This removes it in order to make things easier for forthcoming
 in-kernel interrupt controller emulation code.
 
 Signed-off-by: Paul Mackerras pau...@samba.org

Thanks, applied to kvm-ppc-queue.


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/9] KVM: PPC: Book3S: Add kernel emulation for the XICS interrupt controller

2013-03-21 Thread Alexander Graf

On 25.02.2013, at 01:59, Paul Mackerras wrote:

 On Wed, Feb 20, 2013 at 04:58:54PM -0300, Marcelo Tosatti wrote:
 
 This is probably a stupid question, but why the
 KVM_SET_IRQCHIP/KVM_SET_GSI_ROUTING interface is not appropriate for
 your purposes?
 
 x86 sets up a default GSI-IRQCHIP PIN mapping on creation (during
 KVM_SET_IRQCHIP), but it can be modified with KVM_SET_GSI_ROUTING.
 
 So, I see Scott already answered from the point of view of his MPIC
 emulation stuff, but I'll answer too from the point of view of my XICS
 emulation code.
 
 My understanding, possibly imperfect, is that in a real system the
 routing of GSIs to IOAPICs would either be hardwired or set up by the
 BIOS, described in ACPI tables, and not modified by the operating
 system.  Is that correct?  So my belief is that the GSI routing is
 fundamentally distinct from and handled differently from the routing
 of interrupts to CPUs, which is fully under the control of the OS.

It's a different layer. I guess there's really some confusion on names here :). 
I'm always confused when I read sources and you apparently get confused when 
you read about GSIs.

GSIs are an ACPI concept. It's not x86 specific, it's also not APIC specific. 
It's just a global name space for IRQs.

Imagine you have 2 MPICs in your system. But you only want to use a single 
token / numer to access any IRQ on any chip. That's where GSIs come into play. 
They map different irqchip IRQs onto a flat number space. To speak with x86 
names:

Virtualization perspective:

  QEMU - GSI - IOAPIC - LAPIC - CPU

Device perspective:

  Device irq line - IOAPIC - LAPIC - CPU


The IOAPIC is the piece of hardware that interrupt lines get attached to. You 
connect a pin on it to an irq pin of your device. That talks to the LAPIC to 
actually schedule interrupts on target CPUs. The LAPIC then fetches interrupts 
and pulls the CPU's interrupt line.

Of course, things are slightly more complicated in the x86 world, as everything 
behind the IOAPIC also carries a payload defining which pin actually got 
triggered, but you get the idea.

So really just consider GSIs as a global flat number space for irqchip pins.


Alex

 In the XICS model we have a set of interrupt sources, each identified
 by a 24-bit number.  Control operations on an interrupt source just
 identify the source by its number.  Thus the interrupt source number
 is like a GSI, but we don't need to map that to a different space
 (e.g. IOAPIC identifier and input number) in order to operate on it,
 we can just operate on it directly.
 
 Paul.
 --
 To unsubscribe from this list: send the line unsubscribe kvm-ppc in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] virtio-blk: Set default serial id

2013-03-21 Thread Paolo Bonzini
Il 20/03/2013 09:34, Asias He ha scritto:
 The solution is to do persistent naming either by really passing -device
 virtio-blk-pci,serial= or with udev inside the guest using the bus
 address (PCI devfn) like the new persistent network interface naming for
 Linux.
 
 '-virtio-blk-pci,serial=' specified by user would be persistent, but pci
 id might be changed as well.

Yes, but '-device virtio-blk-pci,addr=10.0' would be persistent too. :)
 Also, management may try to keep addresses persistent (libvirt does).

Paolo

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] ARM: EXYNOS5440: DTS: Add virtual GIC DT bindings

2013-03-21 Thread Giridhar Maruthy
Exynos5440 has GIC which has virtualization support
in them. These are used by KVM.

Signed-off-by: Giridhar Maruthy giridha...@samsung.com
---
 arch/arm/boot/dts/exynos5440.dtsi |6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/exynos5440.dtsi 
b/arch/arm/boot/dts/exynos5440.dtsi
index c374a31..25c6134 100644
--- a/arch/arm/boot/dts/exynos5440.dtsi
+++ b/arch/arm/boot/dts/exynos5440.dtsi
@@ -26,7 +26,11 @@
compatible = arm,cortex-a15-gic;
#interrupt-cells = 3;
interrupt-controller;
-   reg = 0x2E1000 0x1000, 0x2E2000 0x1000;
+   reg =   0x2E1000 0x1000,
+   0x2E2000 0x1000,
+   0x2E4000 0x2000,
+   0x2E6000 0x2000;
+   interrupts = 1 9 0xf04;
};
 
cpus {
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Fwd: [PATCH] ARM: EXYNOS5440: DTS: Add virtual GIC DT bindings

2013-03-21 Thread Giridhar Maruthy
This patch is similar to the ones already submitted by Alexander Graf
and also in Christoffer's tree in linux-kvm-arm for Arndale board,
while this is required for exynos5440.




Exynos5440 has GIC which has virtualization support
in them. These are used by KVM.

Signed-off-by: Giridhar Maruthy giridha...@samsung.com
---
 arch/arm/boot/dts/exynos5440.dtsi |6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/exynos5440.dtsi
b/arch/arm/boot/dts/exynos5440.dtsi
index c374a31..25c6134 100644
--- a/arch/arm/boot/dts/exynos5440.dtsi
+++ b/arch/arm/boot/dts/exynos5440.dtsi
@@ -26,7 +26,11 @@
compatible = arm,cortex-a15-gic;
#interrupt-cells = 3;
interrupt-controller;
-   reg = 0x2E1000 0x1000, 0x2E2000 0x1000;
+   reg =   0x2E1000 0x1000,
+   0x2E2000 0x1000,
+   0x2E4000 0x2000,
+   0x2E6000 0x2000;
+   interrupts = 1 9 0xf04;
};

cpus {
--
1.7.9.5
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: PPC: e500: Expose MMU registers via ONE_REG

2013-03-21 Thread Alexander Graf

On 19.03.2013, at 18:26, Scott Wood wrote:

 On 03/19/2013 12:17:11 PM, Mihai Caraman wrote:
 diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
 index 66b6e31..b77b855 100644
 --- a/arch/powerpc/kvm/e500_mmu.c
 +++ b/arch/powerpc/kvm/e500_mmu.c
 @@ -596,6 +596,95 @@ int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, 
 struct kvm_sregs *sregs)
  return 0;
 }
 +int kvmppc_get_one_reg_500_tlb(struct kvm_vcpu *vcpu, u64 id,
 +union kvmppc_one_reg *val)
 
 s/500/e500/
 
 +int kvmppc_set_one_reg_500_tlb(struct kvm_vcpu *vcpu, u64 id,
 +   union kvmppc_one_reg *val)
 +{
 +int r = 0;
 +long int i;
 +
 +switch (id) {
 +case KVM_REG_PPC_MAS0:
 +vcpu-arch.shared-mas0 = set_reg_val(id, *val);
 +break;
 +case KVM_REG_PPC_MAS1:
 +vcpu-arch.shared-mas1 = set_reg_val(id, *val);
 +break;
 +case KVM_REG_PPC_MAS2:
 +vcpu-arch.shared-mas2 = set_reg_val(id, *val);
 +break;
 +case KVM_REG_PPC_MAS7_3:
 +vcpu-arch.shared-mas7_3 = set_reg_val(id, *val);
 +break;
 +case KVM_REG_PPC_MAS4:
 +vcpu-arch.shared-mas4 = set_reg_val(id, *val);
 +break;
 +case KVM_REG_PPC_MAS6:
 +vcpu-arch.shared-mas6 = set_reg_val(id, *val);
 +break;
 +case KVM_REG_PPC_MMUCFG: {
 +u32 mmucfg = set_reg_val(id, *val);
 +vcpu-arch.mmucfg = mmucfg  ~MMUCFG_LPIDSIZE;
 +break;
 +}
 
 Do we really want to allow arbitrary MMUCFG changes?  It won't magically make 
 us able to support larger RAs, PIDs, different MAVN, etc.

Only if we update the actual shadow mmu configuration as well.

 
 +case KVM_REG_PPC_TLB0CFG:
 +case KVM_REG_PPC_TLB1CFG:
 +case KVM_REG_PPC_TLB2CFG:
 +case KVM_REG_PPC_TLB3CFG: {
 +u32 tlbncfg = set_reg_val(id, *val);
 +u32 geometry_mask = TLBnCFG_N_ENTRY | TLBnCFG_ASSOC;
 +i = id - KVM_REG_PPC_TLB0CFG;
 +
 +/* MMU geometry (way/size) can be set only using SW_TLB */
 +if ((vcpu-arch.tlbcfg[i]  geometry_mask) !=
 +(tlbncfg  geometry_mask))
 +r = -EINVAL;
 +
 +vcpu-arch.tlbcfg[i] = set_reg_val(id, *val);
 +break;
 +}
 
 Likewise -- just because QEMU sets a bit here doesn't mean KVM can support it.
 
 I thought the initial plan for setting these config registers was to accept 
 it if it exactly matches what KVM already has, and give an error otherwise -- 
 thus allowing for the possibliity of accepting certain specific updates in 
 the future.

Yes, that was the idea :).


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: PPC: e500: Add separate functions for vcpu's MMU configuration

2013-03-21 Thread Alexander Graf

On 19.03.2013, at 18:16, Mihai Caraman wrote:

 Move vcpu's MMU default configuration and geometry update into their own
 functions.

Mind to explain why?


Alex

 
 Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
 ---
 arch/powerpc/kvm/e500_mmu.c |   59 +++
 1 files changed, 37 insertions(+), 22 deletions(-)
 
 diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
 index 5c44759..66b6e31 100644
 --- a/arch/powerpc/kvm/e500_mmu.c
 +++ b/arch/powerpc/kvm/e500_mmu.c
 @@ -596,6 +596,20 @@ int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, 
 struct kvm_sregs *sregs)
   return 0;
 }
 
 +static int vcpu_mmu_geometry_update(struct kvm_vcpu *vcpu,
 + struct kvm_book3e_206_tlb_params *params)
 +{
 + vcpu-arch.tlbcfg[0] = ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
 + if (params-tlb_sizes[0] = 2048)
 + vcpu-arch.tlbcfg[0] |= params-tlb_sizes[0];
 + vcpu-arch.tlbcfg[0] |= params-tlb_ways[0]  TLBnCFG_ASSOC_SHIFT;
 +
 + vcpu-arch.tlbcfg[1] = ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
 + vcpu-arch.tlbcfg[1] |= params-tlb_sizes[1];
 + vcpu-arch.tlbcfg[1] |= params-tlb_ways[1]  TLBnCFG_ASSOC_SHIFT;
 + return 0;   
 +}
 +
 int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
 struct kvm_config_tlb *cfg)
 {
 @@ -692,16 +706,8 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
   vcpu_e500-gtlb_offset[0] = 0;
   vcpu_e500-gtlb_offset[1] = params.tlb_sizes[0];
 
 - vcpu-arch.mmucfg = mfspr(SPRN_MMUCFG)  ~MMUCFG_LPIDSIZE;
 -
 - vcpu-arch.tlbcfg[0] = ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
 - if (params.tlb_sizes[0] = 2048)
 - vcpu-arch.tlbcfg[0] |= params.tlb_sizes[0];
 - vcpu-arch.tlbcfg[0] |= params.tlb_ways[0]  TLBnCFG_ASSOC_SHIFT;
 -
 - vcpu-arch.tlbcfg[1] = ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
 - vcpu-arch.tlbcfg[1] |= params.tlb_sizes[1];
 - vcpu-arch.tlbcfg[1] |= params.tlb_ways[1]  TLBnCFG_ASSOC_SHIFT;
 + /* Update vcpu's MMU geometry based on SW_TLB input */
 + vcpu_mmu_geometry_update(vcpu, params);
 
   vcpu_e500-shared_tlb_pages = pages;
   vcpu_e500-num_shared_tlb_pages = num_pages;
 @@ -737,6 +743,26 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
   return 0;
 }
 
 +/* vcpu's MMU default configuration */
 +static int vcpu_mmu_init(struct kvm_vcpu *vcpu,
 +struct kvmppc_e500_tlb_params *params)
 +{
 + /* Initialize RASIZE, PIDSIZE, NTLBS and MAVN fields with host values*/
 + vcpu-arch.mmucfg = mfspr(SPRN_MMUCFG)  ~MMUCFG_LPIDSIZE;
 +
 + /* Initialize IPROT field with host value*/
 + vcpu-arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) 
 +  ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
 + vcpu-arch.tlbcfg[0] |= params[0].entries;
 + vcpu-arch.tlbcfg[0] |= params[0].ways  TLBnCFG_ASSOC_SHIFT;
 +
 + vcpu-arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) 
 +  ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
 + vcpu-arch.tlbcfg[1] |= params[1].entries;
 + vcpu-arch.tlbcfg[1] |= params[1].ways  TLBnCFG_ASSOC_SHIFT;
 + return 0;
 +}
 +
 int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
   struct kvm_vcpu *vcpu = vcpu_e500-vcpu;
 @@ -781,18 +807,7 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 
 *vcpu_e500)
   if (!vcpu_e500-g2h_tlb1_map)
   goto err;
 
 - /* Init TLB configuration register */
 - vcpu-arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) 
 -  ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
 - vcpu-arch.tlbcfg[0] |= vcpu_e500-gtlb_params[0].entries;
 - vcpu-arch.tlbcfg[0] |=
 - vcpu_e500-gtlb_params[0].ways  TLBnCFG_ASSOC_SHIFT;
 -
 - vcpu-arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) 
 -  ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
 - vcpu-arch.tlbcfg[1] |= vcpu_e500-gtlb_params[1].entries;
 - vcpu-arch.tlbcfg[1] |=
 - vcpu_e500-gtlb_params[1].ways  TLBnCFG_ASSOC_SHIFT;
 + vcpu_mmu_init(vcpu, vcpu_e500-gtlb_params);
 
   kvmppc_recalc_tlb1map_range(vcpu_e500);
   return 0;
 -- 
 1.7.4.1
 
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm-ppc in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/2] KVM: MMU: Make the meaning of kvm_mmu_free_some_pages() clearer

2013-03-21 Thread Takuya Yoshikawa
Takuya Yoshikawa (2):
  KVM: MMU: Move kvm_mmu_free_some_pages() into kvm_mmu_alloc_page()
  KVM: MMU: Rename kvm_mmu_free_some_pages() to make_mmu_pages_available()

 arch/x86/kvm/mmu.c |   16 +---
 arch/x86/kvm/mmu.h |6 --
 arch/x86/kvm/paging_tmpl.h |1 -
 3 files changed, 9 insertions(+), 14 deletions(-)

-- 
1.7.5.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] KVM: MMU: Move kvm_mmu_free_some_pages() into kvm_mmu_alloc_page()

2013-03-21 Thread Takuya Yoshikawa
What this function is doing is to ensure that the number of shadow pages
does not exceed the maximum limit stored in n_max_mmu_pages: so this is
placed at every code path that can reach kvm_mmu_alloc_page().

Although it might have some sense to spread this function in each such
code path when it could be called before taking mmu_lock, the rule was
changed not to do so.

Taking this background into account, this patch moves it into
kvm_mmu_alloc_page() and simplifies the code.

Note: the unlikely hint in kvm_mmu_free_some_pages() guarantees that the
overhead of this function is almost zero except when we actually need to
allocate some shadow pages, so we do not need to care about calling it
multiple times in one path by doing kvm_mmu_get_page() a few times.

Signed-off-by: Takuya Yoshikawa yoshikawa_takuya...@lab.ntt.co.jp
---
 arch/x86/kvm/mmu.c |9 +++--
 arch/x86/kvm/paging_tmpl.h |1 -
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index c1a9b7b..38f34c5 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1505,6 +1505,9 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct 
kvm_vcpu *vcpu,
   u64 *parent_pte, int direct)
 {
struct kvm_mmu_page *sp;
+
+   kvm_mmu_free_some_pages(vcpu);
+
sp = mmu_memory_cache_alloc(vcpu-arch.mmu_page_header_cache);
sp-spt = mmu_memory_cache_alloc(vcpu-arch.mmu_page_cache);
if (!direct)
@@ -2842,7 +2845,6 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, 
u32 error_code,
spin_lock(vcpu-kvm-mmu_lock);
if (mmu_notifier_retry(vcpu-kvm, mmu_seq))
goto out_unlock;
-   kvm_mmu_free_some_pages(vcpu);
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, gfn, pfn, level);
r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn,
@@ -2920,7 +2922,6 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 
if (vcpu-arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
spin_lock(vcpu-kvm-mmu_lock);
-   kvm_mmu_free_some_pages(vcpu);
sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL,
  1, ACC_ALL, NULL);
++sp-root_count;
@@ -2932,7 +2933,6 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 
ASSERT(!VALID_PAGE(root));
spin_lock(vcpu-kvm-mmu_lock);
-   kvm_mmu_free_some_pages(vcpu);
sp = kvm_mmu_get_page(vcpu, i  (30 - PAGE_SHIFT),
  i  30,
  PT32_ROOT_LEVEL, 1, ACC_ALL,
@@ -2971,7 +2971,6 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
ASSERT(!VALID_PAGE(root));
 
spin_lock(vcpu-kvm-mmu_lock);
-   kvm_mmu_free_some_pages(vcpu);
sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL,
  0, ACC_ALL, NULL);
root = __pa(sp-spt);
@@ -3005,7 +3004,6 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
return 1;
}
spin_lock(vcpu-kvm-mmu_lock);
-   kvm_mmu_free_some_pages(vcpu);
sp = kvm_mmu_get_page(vcpu, root_gfn, i  30,
  PT32_ROOT_LEVEL, 0,
  ACC_ALL, NULL);
@@ -3311,7 +3309,6 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t 
gpa, u32 error_code,
spin_lock(vcpu-kvm-mmu_lock);
if (mmu_notifier_retry(vcpu-kvm, mmu_seq))
goto out_unlock;
-   kvm_mmu_free_some_pages(vcpu);
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, gfn, pfn, level);
r = __direct_map(vcpu, gpa, write, map_writable,
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 105dd5b..af143f0 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -627,7 +627,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t 
addr, u32 error_code,
goto out_unlock;
 
kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
-   kvm_mmu_free_some_pages(vcpu);
if (!force_pt_level)
transparent_hugepage_adjust(vcpu, walker.gfn, pfn, level);
r = FNAME(fetch)(vcpu, addr, walker, write_fault,
-- 
1.7.5.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] KVM: MMU: Rename kvm_mmu_free_some_pages() to make_mmu_pages_available()

2013-03-21 Thread Takuya Yoshikawa
The current name kvm_mmu_free_some_pages should be used for something
that actually frees some shadow pages, as we expect from the name, but
what the function is doing is to make some, KVM_MIN_FREE_MMU_PAGES,
shadow pages available: it does nothing when there are enough.

This patch changes the name to reflect this meaning better; while doing
this renaming, the code in the wrapper function is inlined into the main
body since the whole function will be inlined into the only caller now.

Signed-off-by: Takuya Yoshikawa yoshikawa_takuya...@lab.ntt.co.jp
---
 arch/x86/kvm/mmu.c |9 +++--
 arch/x86/kvm/mmu.h |6 --
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 38f34c5..633e30c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1501,12 +1501,14 @@ static void drop_parent_pte(struct kvm_mmu_page *sp,
mmu_spte_clear_no_track(parent_pte);
 }
 
+static void make_mmu_pages_available(struct kvm_vcpu *vcpu);
+
 static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
   u64 *parent_pte, int direct)
 {
struct kvm_mmu_page *sp;
 
-   kvm_mmu_free_some_pages(vcpu);
+   make_mmu_pages_available(vcpu);
 
sp = mmu_memory_cache_alloc(vcpu-arch.mmu_page_header_cache);
sp-spt = mmu_memory_cache_alloc(vcpu-arch.mmu_page_cache);
@@ -4010,10 +4012,13 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, 
gva_t gva)
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
 
-void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
+static void make_mmu_pages_available(struct kvm_vcpu *vcpu)
 {
LIST_HEAD(invalid_list);
 
+   if (likely(kvm_mmu_available_pages(vcpu-kvm) = 
KVM_MIN_FREE_MMU_PAGES))
+   return;
+
while (kvm_mmu_available_pages(vcpu-kvm)  KVM_REFILL_PAGES) {
if (!prepare_zap_oldest_mmu_page(vcpu-kvm, invalid_list))
break;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 3b1ad00..2adcbc2 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -64,12 +64,6 @@ static inline unsigned int kvm_mmu_available_pages(struct 
kvm *kvm)
return 0;
 }
 
-static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
-{
-   if (unlikely(kvm_mmu_available_pages(vcpu-kvm) 
KVM_MIN_FREE_MMU_PAGES))
-   __kvm_mmu_free_some_pages(vcpu);
-}
-
 static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
 {
if (likely(vcpu-arch.mmu.root_hpa != INVALID_PAGE))
-- 
1.7.5.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 0/6] Use eoi to track RTC interrupt delivery status

2013-03-21 Thread Yang Zhang
From: Yang Zhang yang.z.zh...@intel.com

Current interrupt coalescing logci which only used by RTC has conflict
with Posted Interrupt.

This patch introduces a new mechinism to use eoi to track interrupt:
When delivering an interrupt to vcpu, the pending_eoi set to number of
vcpu that received the interrupt. And decrease it when each vcpu writing
eoi. No subsequent RTC interrupt can deliver to vcpu until all vcpus
write eoi.

Changes from v4 to v5
* Calculate destination vcpu on interrupt injection not hook into ioapic 
  modification.
* Rebase on top of KVM.

Changes from v3 to v4
* Call kvm_apic_match_dest() to check destination vcpu.
* Update RTC interrrupt's destination vcpu map when ioapic entry of RTC
  or apic register (id, ldr, dfr) is changed.

Changes from v2 to v3:
* Remove unused viarable irq_ack_notifier.
* Acquire ioapic-lock before calculte destination vcpu map.
* Copy vcpu_map to expected_eoi_timap on each RTC irq and clear it on eoi.

Yang Zhang (6):
  KVM: Add vcpu info to ioapic_update_eoi()
  KVM: Introduce struct rtc_status
  KVM : Return destination vcpu on interrupt injection
  KVM: Add reset/restore rtc_status support
  KVM : Force vmexit with virtual interrupt delivery
  KVM: Use eoi to track RTC interrupt delivery status

 arch/x86/kvm/lapic.c |   31 ---
 arch/x86/kvm/lapic.h |7 ++-
 virt/kvm/ioapic.c|  103 ++
 virt/kvm/ioapic.h|   13 +-
 virt/kvm/irq_comm.c  |   12 +++---
 5 files changed, 142 insertions(+), 24 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 1/6] KVM: Add vcpu info to ioapic_update_eoi()

2013-03-21 Thread Yang Zhang
From: Yang Zhang yang.z.zh...@intel.com

Add vcpu info to ioapic_update_eoi, so we can know which vcpu
issued this EOI.

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
---
 arch/x86/kvm/lapic.c |2 +-
 virt/kvm/ioapic.c|   12 ++--
 virt/kvm/ioapic.h|3 ++-
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index a8e9369..d3e322a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -786,7 +786,7 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int 
vector)
trigger_mode = IOAPIC_LEVEL_TRIG;
else
trigger_mode = IOAPIC_EDGE_TRIG;
-   kvm_ioapic_update_eoi(apic-vcpu-kvm, vector, trigger_mode);
+   kvm_ioapic_update_eoi(apic-vcpu, vector, trigger_mode);
}
 }
 
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index ce82b94..ed6f111 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -264,8 +264,8 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int 
irq_source_id)
spin_unlock(ioapic-lock);
 }
 
-static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
-int trigger_mode)
+static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
+   struct kvm_ioapic *ioapic, int vector, int trigger_mode)
 {
int i;
 
@@ -304,12 +304,12 @@ bool kvm_ioapic_handles_vector(struct kvm *kvm, int 
vector)
return test_bit(vector, ioapic-handled_vectors);
 }
 
-void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
+void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode)
 {
-   struct kvm_ioapic *ioapic = kvm-arch.vioapic;
+   struct kvm_ioapic *ioapic = vcpu-kvm-arch.vioapic;
 
spin_lock(ioapic-lock);
-   __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
+   __kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode);
spin_unlock(ioapic-lock);
 }
 
@@ -407,7 +407,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, 
gpa_t addr, int len,
break;
 #ifdef CONFIG_IA64
case IOAPIC_REG_EOI:
-   __kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG);
+   __kvm_ioapic_update_eoi(NULL, ioapic, data, IOAPIC_LEVEL_TRIG);
break;
 #endif
 
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 0400a46..2fc61a5 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -70,7 +70,8 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm 
*kvm)
 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
int short_hand, int dest, int dest_mode);
 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
-void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
+void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
+   int trigger_mode);
 bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector);
 int kvm_ioapic_init(struct kvm *kvm);
 void kvm_ioapic_destroy(struct kvm *kvm);
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 3/6] KVM : Calculate destination vcpu on interrupt injection

2013-03-21 Thread Yang Zhang
From: Yang Zhang yang.z.zh...@intel.com

Add a new parameter to know vcpus who received the interrupt.

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
---
 arch/x86/kvm/lapic.c |   21 -
 arch/x86/kvm/lapic.h |5 +++--
 virt/kvm/ioapic.c|2 +-
 virt/kvm/ioapic.h|2 +-
 virt/kvm/irq_comm.c  |   12 ++--
 5 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index d3e322a..5f6b1d0 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -433,10 +433,21 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 int vector, int level, int trig_mode);
 
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
+static void kvm_set_irq_dest_map(struct kvm_vcpu *vcpu, unsigned long 
*dest_map)
+{
+   if (!kvm_lapic_enabled(vcpu))
+   return;
+   __set_bit(vcpu-vcpu_id, dest_map);
+}
+
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
+   unsigned long *dest_map)
 {
struct kvm_lapic *apic = vcpu-arch.apic;
 
+   if (dest_map)
+   kvm_set_irq_dest_map(vcpu, dest_map);
+
return __apic_accept_irq(apic, irq-delivery_mode, irq-vector,
irq-level, irq-trig_mode);
 }
@@ -611,7 +622,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct 
kvm_lapic *source,
 }
 
 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
-   struct kvm_lapic_irq *irq, int *r)
+   struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
 {
struct kvm_apic_map *map;
unsigned long bitmap = 1;
@@ -622,7 +633,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct 
kvm_lapic *src,
*r = -1;
 
if (irq-shorthand == APIC_DEST_SELF) {
-   *r = kvm_apic_set_irq(src-vcpu, irq);
+   *r = kvm_apic_set_irq(src-vcpu, irq, dest_map);
return true;
}
 
@@ -667,7 +678,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct 
kvm_lapic *src,
continue;
if (*r  0)
*r = 0;
-   *r += kvm_apic_set_irq(dst[i]-vcpu, irq);
+   *r += kvm_apic_set_irq(dst[i]-vcpu, irq, dest_map);
}
 
ret = true;
@@ -852,7 +863,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
   irq.vector);
 
-   kvm_irq_delivery_to_apic(apic-vcpu-kvm, apic, irq);
+   kvm_irq_delivery_to_apic(apic-vcpu-kvm, apic, irq, NULL);
 }
 
 static u32 apic_get_tmcct(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 2c721b9..967519c 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -55,11 +55,12 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu);
 
 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
+   unsigned long *dest_map);
 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
 
 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
-   struct kvm_lapic_irq *irq, int *r);
+   struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
 
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index ed6f111..4767fa6 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -217,7 +217,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int 
irq)
irqe.level = 1;
irqe.shorthand = 0;
 
-   return kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe);
+   return kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe, NULL);
 }
 
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 6e5c88f..14e5289 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -88,7 +88,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, 
int irq_source_id,
 void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
 int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
-   struct kvm_lapic_irq *irq);
+   struct kvm_lapic_irq *irq, unsigned long *dest_map);
 int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
 int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
 void kvm_ioapic_make_eoibitmap_request(struct kvm *kvm);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c

[PATCH v5 2/6] KVM: Introduce struct rtc_status

2013-03-21 Thread Yang Zhang
From: Yang Zhang yang.z.zh...@intel.com

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
---
 virt/kvm/ioapic.h |8 
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 2fc61a5..6e5c88f 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -34,6 +34,11 @@ struct kvm_vcpu;
 #defineIOAPIC_INIT 0x5
 #defineIOAPIC_EXTINT   0x7
 
+struct rtc_status {
+   int pending_eoi;
+   DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS);
+};
+
 struct kvm_ioapic {
u64 base_address;
u32 ioregsel;
@@ -47,6 +52,9 @@ struct kvm_ioapic {
void (*ack_notifier)(void *opaque, int irq);
spinlock_t lock;
DECLARE_BITMAP(handled_vectors, 256);
+#ifdef CONFIG_X86
+   struct rtc_status rtc_status;
+#endif
 };
 
 #ifdef DEBUG
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 5/6] KVM: Force vmexit with virtual interrupt delivery

2013-03-21 Thread Yang Zhang
From: Yang Zhang yang.z.zh...@intel.com

Need the EOI to track interrupt deliver status, so force vmexit
on EOI for rtc interrupt when enabling virtual interrupt delivery.

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
---
 virt/kvm/ioapic.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 4c77832..a5ee8bf 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -165,7 +165,7 @@ void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
if (!e-fields.mask 
(e-fields.trig_mode == IOAPIC_LEVEL_TRIG ||
 kvm_irq_has_notifier(ioapic-kvm, KVM_IRQCHIP_IOAPIC,
-index))) {
+index) || index == 8)) {
irqe.dest_id = e-fields.dest_id;
irqe.vector = e-fields.vector;
irqe.dest_mode = e-fields.dest_mode;
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 6/6] KVM: Use eoi to track RTC interrupt delivery status

2013-03-21 Thread Yang Zhang
From: Yang Zhang yang.z.zh...@intel.com

Current interrupt coalescing logci which only used by RTC has conflict
with Posted Interrupt.
This patch introduces a new mechinism to use eoi to track interrupt:
When delivering an interrupt to vcpu, the pending_eoi set to number of
vcpu that received the interrupt. And decrease it when each vcpu writing
eoi. No subsequent RTC interrupt can deliver to vcpu until all vcpus
write eoi.

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
---
 virt/kvm/ioapic.c |   53 -
 1 files changed, 52 insertions(+), 1 deletions(-)

diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index a5ee8bf..3aec0a2 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -109,6 +109,29 @@ static void rtc_irq_restore(struct kvm_ioapic *ioapic)
}
ioapic-rtc_status.pending_eoi = pending_eoi;
 }
+
+static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
+   struct rtc_status *rtc_status, int irq)
+{
+   if (irq != 8)
+   return;
+
+   if (test_and_clear_bit(vcpu-vcpu_id, rtc_status-dest_map))
+   --rtc_status-pending_eoi;
+
+   WARN_ON(rtc_status-pending_eoi  0);
+}
+
+static bool rtc_irq_check(struct kvm_ioapic *ioapic, int irq)
+{
+   if (irq != 8)
+   return false;
+
+   if (ioapic-rtc_status.pending_eoi  0)
+   return true; /* coalesced */
+
+   return false;
+}
 #else
 static void rtc_irq_reset(struct kvm_ioapic *ioapic)
 {
@@ -119,6 +142,17 @@ static void rtc_irq_restore(struct kvm_ioapic *ioapic)
 {
return;
 }
+
+static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
+   struct rtc_status *rtc_status, int irq)
+{
+   return;
+}
+
+static bool rtc_irq_check(struct kvm_ioapic *ioapic, int irq)
+{
+   return false;
+}
 #endif
 
 static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
@@ -236,6 +270,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int 
irq)
 {
union kvm_ioapic_redirect_entry *entry = ioapic-redirtbl[irq];
struct kvm_lapic_irq irqe;
+   int ret;
 
ioapic_debug(dest=%x dest_mode=%x delivery_mode=%x 
 vector=%x trig_mode=%x\n,
@@ -251,7 +286,16 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int 
irq)
irqe.level = 1;
irqe.shorthand = 0;
 
-   return kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe, NULL);
+#ifdef CONFIG_X86
+   if (irq == 8) {
+   ret = kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe,
+   ioapic-rtc_status.dest_map);
+   ioapic-rtc_status.pending_eoi = ret;
+   } else
+#endif
+   ret = kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe, NULL);
+
+   return ret;
 }
 
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
@@ -275,6 +319,11 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, 
int irq_source_id,
ret = 1;
} else {
int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
+
+   if (rtc_irq_check(ioapic, irq)) {
+   ret = 0; /* coalesced */
+   goto out;
+   }
ioapic-irr |= mask;
if ((edge  old_irr != ioapic-irr) ||
(!edge  !entry.fields.remote_irr))
@@ -282,6 +331,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, 
int irq_source_id,
else
ret = 0; /* report coalesced interrupt */
}
+out:
trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
spin_unlock(ioapic-lock);
 
@@ -309,6 +359,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
if (ent-fields.vector != vector)
continue;
 
+   rtc_irq_ack_eoi(vcpu, ioapic-rtc_status, i);
/*
 * We are dropping lock while calling ack notifiers because ack
 * notifier callbacks for assigned devices call into IOAPIC
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 4/6] KVM: Add reset/restore rtc_status support

2013-03-21 Thread Yang Zhang
From: Yang Zhang yang.z.zh...@intel.com

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
---
 arch/x86/kvm/lapic.c |8 
 arch/x86/kvm/lapic.h |2 ++
 virt/kvm/ioapic.c|   35 +++
 3 files changed, 45 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 5f6b1d0..158e0a3 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void *bitmap)
return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
 }
 
+bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
+{
+   struct kvm_lapic *apic = vcpu-arch.apic;
+
+   return apic_test_vector(vector, apic-regs + APIC_ISR) ||
+   apic_test_vector(vector, apic-regs + APIC_IRR);
+}
+
 static inline void apic_set_vector(int vec, void *bitmap)
 {
set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 967519c..004d2ad 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -170,4 +170,6 @@ static inline bool kvm_apic_has_events(struct kvm_vcpu 
*vcpu)
return vcpu-arch.apic-pending_events;
 }
 
+bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
+
 #endif
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 4767fa6..8f9c62b 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -87,6 +87,39 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic 
*ioapic,
return result;
 }
 
+#ifdef CONFIG_X86
+static void rtc_irq_reset(struct kvm_ioapic *ioapic)
+{
+   ioapic-rtc_status.pending_eoi = 0;
+   bitmap_zero(ioapic-rtc_status.dest_map, KVM_MAX_VCPUS);
+}
+
+static void rtc_irq_restore(struct kvm_ioapic *ioapic)
+{
+   struct kvm_vcpu *vcpu;
+   int vector, i, pending_eoi = 0, rtc_pin = 8;
+
+   vector = ioapic-redirtbl[rtc_pin].fields.vector;
+   kvm_for_each_vcpu(i, vcpu, ioapic-kvm) {
+   if (kvm_apic_pending_eoi(vcpu, vector)) {
+   pending_eoi++;
+   set_bit(vcpu-vcpu_id, ioapic-rtc_status.dest_map);
+   }
+   }
+   ioapic-rtc_status.pending_eoi = pending_eoi;
+}
+#else
+static void rtc_irq_reset(struct kvm_ioapic *ioapic)
+{
+   return;
+}
+
+static void rtc_irq_restore(struct kvm_ioapic *ioapic)
+{
+   return;
+}
+#endif
+
 static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
 {
union kvm_ioapic_redirect_entry *pent;
@@ -428,6 +461,7 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
ioapic-ioregsel = 0;
ioapic-irr = 0;
ioapic-id = 0;
+   rtc_irq_reset(ioapic);
update_handled_vectors(ioapic);
 }
 
@@ -493,6 +527,7 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state 
*state)
spin_lock(ioapic-lock);
memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
update_handled_vectors(ioapic);
+   rtc_irq_restore(ioapic);
kvm_ioapic_make_eoibitmap_request(kvm);
spin_unlock(ioapic-lock);
return 0;
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH] KVM: PPC: e500: Expose MMU registers via ONE_REG

2013-03-21 Thread Caraman Mihai Claudiu-B02008


 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Thursday, March 21, 2013 12:07 PM
 To: Wood Scott-B07421
 Cc: Caraman Mihai Claudiu-B02008; kvm-...@vger.kernel.org; linuxppc-
 d...@lists.ozlabs.org; kvm@vger.kernel.org
 Subject: Re: [PATCH] KVM: PPC: e500: Expose MMU registers via ONE_REG
 
 
 On 19.03.2013, at 18:26, Scott Wood wrote:
 
  On 03/19/2013 12:17:11 PM, Mihai Caraman wrote:
  diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
  index 66b6e31..b77b855 100644
  --- a/arch/powerpc/kvm/e500_mmu.c
  +++ b/arch/powerpc/kvm/e500_mmu.c
  @@ -596,6 +596,95 @@ int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu
 *vcpu, struct kvm_sregs *sregs)
 return 0;
  }
  +int kvmppc_get_one_reg_500_tlb(struct kvm_vcpu *vcpu, u64 id,
  +  union kvmppc_one_reg *val)
 
  s/500/e500/
 
  +int kvmppc_set_one_reg_500_tlb(struct kvm_vcpu *vcpu, u64 id,
  + union kvmppc_one_reg *val)
  +{
  +  int r = 0;
  +  long int i;
  +
  +  switch (id) {
  +  case KVM_REG_PPC_MAS0:
  +  vcpu-arch.shared-mas0 = set_reg_val(id, *val);
  +  break;
  +  case KVM_REG_PPC_MAS1:
  +  vcpu-arch.shared-mas1 = set_reg_val(id, *val);
  +  break;
  +  case KVM_REG_PPC_MAS2:
  +  vcpu-arch.shared-mas2 = set_reg_val(id, *val);
  +  break;
  +  case KVM_REG_PPC_MAS7_3:
  +  vcpu-arch.shared-mas7_3 = set_reg_val(id, *val);
  +  break;
  +  case KVM_REG_PPC_MAS4:
  +  vcpu-arch.shared-mas4 = set_reg_val(id, *val);
  +  break;
  +  case KVM_REG_PPC_MAS6:
  +  vcpu-arch.shared-mas6 = set_reg_val(id, *val);
  +  break;
  +  case KVM_REG_PPC_MMUCFG: {
  +  u32 mmucfg = set_reg_val(id, *val);
  +  vcpu-arch.mmucfg = mmucfg  ~MMUCFG_LPIDSIZE;
  +  break;
  +  }
 
  Do we really want to allow arbitrary MMUCFG changes?  It won't
 magically make us able to support larger RAs, PIDs, different MAVN, etc.

Not magically, some changes e.g TLBnCFG_IND or TLBnPS require just a kvm
check other changes e.g. TLBnCFG_MAVN require additional support and we
might not implement all of them. Until then this code should do the job:

/* MMU registers can be set only to the configuration supported by KVM 
*/
case KVM_REG_PPC_MMUCFG: {
if (set_reg_val(id, *val) != vcpu-arch.mmucfg)
r = -EINVAL;
break;
}

 
 Only if we update the actual shadow mmu configuration as well.

These registers (MMUCFG, EPTCFG, TLBnCFG, TLBnPS) are read-only (and shared
between e6500 threads), we can only emulate them.

-Mike


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: PPC: e500: Expose MMU registers via ONE_REG

2013-03-21 Thread Alexander Graf

On 21.03.2013, at 12:02, Caraman Mihai Claudiu-B02008 wrote:

 
 
 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Thursday, March 21, 2013 12:07 PM
 To: Wood Scott-B07421
 Cc: Caraman Mihai Claudiu-B02008; kvm-...@vger.kernel.org; linuxppc-
 d...@lists.ozlabs.org; kvm@vger.kernel.org
 Subject: Re: [PATCH] KVM: PPC: e500: Expose MMU registers via ONE_REG
 
 
 On 19.03.2013, at 18:26, Scott Wood wrote:
 
 On 03/19/2013 12:17:11 PM, Mihai Caraman wrote:
 diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
 index 66b6e31..b77b855 100644
 --- a/arch/powerpc/kvm/e500_mmu.c
 +++ b/arch/powerpc/kvm/e500_mmu.c
 @@ -596,6 +596,95 @@ int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu
 *vcpu, struct kvm_sregs *sregs)
return 0;
 }
 +int kvmppc_get_one_reg_500_tlb(struct kvm_vcpu *vcpu, u64 id,
 +  union kvmppc_one_reg *val)
 
 s/500/e500/
 
 +int kvmppc_set_one_reg_500_tlb(struct kvm_vcpu *vcpu, u64 id,
 + union kvmppc_one_reg *val)
 +{
 +  int r = 0;
 +  long int i;
 +
 +  switch (id) {
 +  case KVM_REG_PPC_MAS0:
 +  vcpu-arch.shared-mas0 = set_reg_val(id, *val);
 +  break;
 +  case KVM_REG_PPC_MAS1:
 +  vcpu-arch.shared-mas1 = set_reg_val(id, *val);
 +  break;
 +  case KVM_REG_PPC_MAS2:
 +  vcpu-arch.shared-mas2 = set_reg_val(id, *val);
 +  break;
 +  case KVM_REG_PPC_MAS7_3:
 +  vcpu-arch.shared-mas7_3 = set_reg_val(id, *val);
 +  break;
 +  case KVM_REG_PPC_MAS4:
 +  vcpu-arch.shared-mas4 = set_reg_val(id, *val);
 +  break;
 +  case KVM_REG_PPC_MAS6:
 +  vcpu-arch.shared-mas6 = set_reg_val(id, *val);
 +  break;
 +  case KVM_REG_PPC_MMUCFG: {
 +  u32 mmucfg = set_reg_val(id, *val);
 +  vcpu-arch.mmucfg = mmucfg  ~MMUCFG_LPIDSIZE;
 +  break;
 +  }
 
 Do we really want to allow arbitrary MMUCFG changes?  It won't
 magically make us able to support larger RAs, PIDs, different MAVN, etc.
 
 Not magically, some changes e.g TLBnCFG_IND or TLBnPS require just a kvm
 check other changes e.g. TLBnCFG_MAVN require additional support and we
 might not implement all of them. Until then this code should do the job:
 
   /* MMU registers can be set only to the configuration supported by KVM 
 */
   case KVM_REG_PPC_MMUCFG: {
   if (set_reg_val(id, *val) != vcpu-arch.mmucfg)
   r = -EINVAL;
   break;
   }

Yes :).

 
 
 Only if we update the actual shadow mmu configuration as well.
 
 These registers (MMUCFG, EPTCFG, TLBnCFG, TLBnPS) are read-only (and shared
 between e6500 threads), we can only emulate them.

We need to change the behavior of the shadow mmu as well. It's not about the 
registers, but the actually exposed TLBs. If you configure 4 TLBs, and you 
announce to the guest that you can do 4 TLBs, you better emulate 4 TLBs :).


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH] KVM: PPC: e500: Add separate functions for vcpu's MMU configuration

2013-03-21 Thread Caraman Mihai Claudiu-B02008
 -Original Message-
 From: kvm-ppc-ow...@vger.kernel.org [mailto:kvm-ppc-
 ow...@vger.kernel.org] On Behalf Of Alexander Graf
 Sent: Thursday, March 21, 2013 12:07 PM
 To: Caraman Mihai Claudiu-B02008
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; linuxppc-
 d...@lists.ozlabs.org
 Subject: Re: [PATCH] KVM: PPC: e500: Add separate functions for vcpu's
 MMU configuration
 
 
 On 19.03.2013, at 18:16, Mihai Caraman wrote:
 
  Move vcpu's MMU default configuration and geometry update into their
 own
  functions.
 
 Mind to explain why?

You requested a separate function for clearing TLBnCFG_IND bit (E.PT removal)
to self-document the code. The existing logic (that TLBnCFG_IND relies on)
was buried in a chunk of code and I thought this will add more clarity.
If you don't agree I would document the code at least.

-Mike

 
 
 Alex
 
 
  Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
  ---
  arch/powerpc/kvm/e500_mmu.c |   59 +++-
 ---
  1 files changed, 37 insertions(+), 22 deletions(-)
 
  diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
  index 5c44759..66b6e31 100644
  --- a/arch/powerpc/kvm/e500_mmu.c
  +++ b/arch/powerpc/kvm/e500_mmu.c
  @@ -596,6 +596,20 @@ int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu
 *vcpu, struct kvm_sregs *sregs)
  return 0;
  }
 
  +static int vcpu_mmu_geometry_update(struct kvm_vcpu *vcpu,
  +   struct kvm_book3e_206_tlb_params *params)
  +{
  +   vcpu-arch.tlbcfg[0] = ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
  +   if (params-tlb_sizes[0] = 2048)
  +   vcpu-arch.tlbcfg[0] |= params-tlb_sizes[0];
  +   vcpu-arch.tlbcfg[0] |= params-tlb_ways[0]  TLBnCFG_ASSOC_SHIFT;
  +
  +   vcpu-arch.tlbcfg[1] = ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
  +   vcpu-arch.tlbcfg[1] |= params-tlb_sizes[1];
  +   vcpu-arch.tlbcfg[1] |= params-tlb_ways[1]  TLBnCFG_ASSOC_SHIFT;
  +   return 0;
  +}
  +
  int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
struct kvm_config_tlb *cfg)
  {
  @@ -692,16 +706,8 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu
 *vcpu,
  vcpu_e500-gtlb_offset[0] = 0;
  vcpu_e500-gtlb_offset[1] = params.tlb_sizes[0];
 
  -   vcpu-arch.mmucfg = mfspr(SPRN_MMUCFG)  ~MMUCFG_LPIDSIZE;
  -
  -   vcpu-arch.tlbcfg[0] = ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
  -   if (params.tlb_sizes[0] = 2048)
  -   vcpu-arch.tlbcfg[0] |= params.tlb_sizes[0];
  -   vcpu-arch.tlbcfg[0] |= params.tlb_ways[0]  TLBnCFG_ASSOC_SHIFT;
  -
  -   vcpu-arch.tlbcfg[1] = ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
  -   vcpu-arch.tlbcfg[1] |= params.tlb_sizes[1];
  -   vcpu-arch.tlbcfg[1] |= params.tlb_ways[1]  TLBnCFG_ASSOC_SHIFT;
  +   /* Update vcpu's MMU geometry based on SW_TLB input */
  +   vcpu_mmu_geometry_update(vcpu, params);
 
  vcpu_e500-shared_tlb_pages = pages;
  vcpu_e500-num_shared_tlb_pages = num_pages;
  @@ -737,6 +743,26 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu
 *vcpu,
  return 0;
  }
 
  +/* vcpu's MMU default configuration */
  +static int vcpu_mmu_init(struct kvm_vcpu *vcpu,
  +  struct kvmppc_e500_tlb_params *params)
  +{
  +   /* Initialize RASIZE, PIDSIZE, NTLBS and MAVN fields with host
 values*/
  +   vcpu-arch.mmucfg = mfspr(SPRN_MMUCFG)  ~MMUCFG_LPIDSIZE;
  +
  +   /* Initialize IPROT field with host value*/
  +   vcpu-arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) 
  +~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
  +   vcpu-arch.tlbcfg[0] |= params[0].entries;
  +   vcpu-arch.tlbcfg[0] |= params[0].ways  TLBnCFG_ASSOC_SHIFT;
  +
  +   vcpu-arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) 
  +~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
  +   vcpu-arch.tlbcfg[1] |= params[1].entries;
  +   vcpu-arch.tlbcfg[1] |= params[1].ways  TLBnCFG_ASSOC_SHIFT;
  +   return 0;
  +}
  +
  int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
  {
  struct kvm_vcpu *vcpu = vcpu_e500-vcpu;
  @@ -781,18 +807,7 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500
 *vcpu_e500)
  if (!vcpu_e500-g2h_tlb1_map)
  goto err;
 
  -   /* Init TLB configuration register */
  -   vcpu-arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) 
  -~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
  -   vcpu-arch.tlbcfg[0] |= vcpu_e500-gtlb_params[0].entries;
  -   vcpu-arch.tlbcfg[0] |=
  -   vcpu_e500-gtlb_params[0].ways  TLBnCFG_ASSOC_SHIFT;
  -
  -   vcpu-arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) 
  -~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
  -   vcpu-arch.tlbcfg[1] |= vcpu_e500-gtlb_params[1].entries;
  -   vcpu-arch.tlbcfg[1] |=
  -   vcpu_e500-gtlb_params[1].ways  TLBnCFG_ASSOC_SHIFT;
  +   vcpu_mmu_init(vcpu, vcpu_e500-gtlb_params);
 
  kvmppc_recalc_tlb1map_range(vcpu_e500);
  return 0;
  --
  1.7.4.1
 
 
  --
  To unsubscribe from this list: send the line unsubscribe kvm-ppc in
  the body of a message to majord...@vger.kernel.org
  More majordomo info at  

[PATCH] KVM: Call kvm_apic_match_dest() to check destination vcpu

2013-03-21 Thread Yang Zhang
From: Yang Zhang yang.z.zh...@intel.com

For a given vcpu, kvm_apic_match_dest() will tell you whether
the vcpu in the destination list quickly. Drop kvm_calculate_eoi_exitmap()
and use kvm_apic_match_dest() instead.

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
---
 arch/x86/kvm/lapic.c |   47 ---
 arch/x86/kvm/lapic.h |4 
 virt/kvm/ioapic.c|9 -
 3 files changed, 4 insertions(+), 56 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index a8e9369..e227474 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -145,53 +145,6 @@ static inline int kvm_apic_id(struct kvm_lapic *apic)
return (kvm_apic_get_reg(apic, APIC_ID)  24)  0xff;
 }
 
-void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
-   struct kvm_lapic_irq *irq,
-   u64 *eoi_exit_bitmap)
-{
-   struct kvm_lapic **dst;
-   struct kvm_apic_map *map;
-   unsigned long bitmap = 1;
-   int i;
-
-   rcu_read_lock();
-   map = rcu_dereference(vcpu-kvm-arch.apic_map);
-
-   if (unlikely(!map)) {
-   __set_bit(irq-vector, (unsigned long *)eoi_exit_bitmap);
-   goto out;
-   }
-
-   if (irq-dest_mode == 0) { /* physical mode */
-   if (irq-delivery_mode == APIC_DM_LOWEST ||
-   irq-dest_id == 0xff) {
-   __set_bit(irq-vector,
- (unsigned long *)eoi_exit_bitmap);
-   goto out;
-   }
-   dst = map-phys_map[irq-dest_id  0xff];
-   } else {
-   u32 mda = irq-dest_id  (32 - map-ldr_bits);
-
-   dst = map-logical_map[apic_cluster_id(map, mda)];
-
-   bitmap = apic_logical_id(map, mda);
-   }
-
-   for_each_set_bit(i, bitmap, 16) {
-   if (!dst[i])
-   continue;
-   if (dst[i]-vcpu == vcpu) {
-   __set_bit(irq-vector,
- (unsigned long *)eoi_exit_bitmap);
-   break;
-   }
-   }
-
-out:
-   rcu_read_unlock();
-}
-
 static void recalculate_apic_map(struct kvm *kvm)
 {
struct kvm_apic_map *new, *old = NULL;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 2c721b9..baa20cf 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -160,10 +160,6 @@ static inline u16 apic_logical_id(struct kvm_apic_map 
*map, u32 ldr)
return ldr  map-lid_mask;
 }
 
-void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
-   struct kvm_lapic_irq *irq,
-   u64 *eoi_bitmap);
-
 static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
 {
return vcpu-arch.apic-pending_events;
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index ce82b94..b54ddfa 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -132,11 +132,10 @@ void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu 
*vcpu,
(e-fields.trig_mode == IOAPIC_LEVEL_TRIG ||
 kvm_irq_has_notifier(ioapic-kvm, KVM_IRQCHIP_IOAPIC,
 index))) {
-   irqe.dest_id = e-fields.dest_id;
-   irqe.vector = e-fields.vector;
-   irqe.dest_mode = e-fields.dest_mode;
-   irqe.delivery_mode = e-fields.delivery_mode  8;
-   kvm_calculate_eoi_exitmap(vcpu, irqe, eoi_exit_bitmap);
+   if (kvm_apic_match_dest(vcpu, NULL, 0,
+   e-fields.dest_id, e-fields.dest_mode))
+   __set_bit(irqe.vector,
+   (unsigned long *)eoi_exit_bitmap);
}
}
spin_unlock(ioapic-lock);
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH] KVM: PPC: e500: Expose MMU registers via ONE_REG

2013-03-21 Thread Caraman Mihai Claudiu-B02008
 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Thursday, March 21, 2013 1:07 PM
 To: Caraman Mihai Claudiu-B02008
 Cc: Wood Scott-B07421; kvm-...@vger.kernel.org; linuxppc-
 d...@lists.ozlabs.org; kvm@vger.kernel.org
 Subject: Re: [PATCH] KVM: PPC: e500: Expose MMU registers via ONE_REG
 
 
 On 21.03.2013, at 12:02, Caraman Mihai Claudiu-B02008 wrote:
 
 
 
  -Original Message-
  From: Alexander Graf [mailto:ag...@suse.de]
  Sent: Thursday, March 21, 2013 12:07 PM
  To: Wood Scott-B07421
  Cc: Caraman Mihai Claudiu-B02008; kvm-...@vger.kernel.org; linuxppc-
  d...@lists.ozlabs.org; kvm@vger.kernel.org
  Subject: Re: [PATCH] KVM: PPC: e500: Expose MMU registers via ONE_REG
 
 
  On 19.03.2013, at 18:26, Scott Wood wrote:
 
  On 03/19/2013 12:17:11 PM, Mihai Caraman wrote:
  diff --git a/arch/powerpc/kvm/e500_mmu.c
 b/arch/powerpc/kvm/e500_mmu.c
  index 66b6e31..b77b855 100644
  --- a/arch/powerpc/kvm/e500_mmu.c
  +++ b/arch/powerpc/kvm/e500_mmu.c
  @@ -596,6 +596,95 @@ int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu
  *vcpu, struct kvm_sregs *sregs)
   return 0;
  }
  +int kvmppc_get_one_reg_500_tlb(struct kvm_vcpu *vcpu, u64 id,
  +union kvmppc_one_reg *val)
 
  s/500/e500/
 
  +int kvmppc_set_one_reg_500_tlb(struct kvm_vcpu *vcpu, u64 id,
  +   union kvmppc_one_reg *val)
  +{
  +int r = 0;
  +long int i;
  +
  +switch (id) {
  +case KVM_REG_PPC_MAS0:
  +vcpu-arch.shared-mas0 = set_reg_val(id, *val);
  +break;
  +case KVM_REG_PPC_MAS1:
  +vcpu-arch.shared-mas1 = set_reg_val(id, *val);
  +break;
  +case KVM_REG_PPC_MAS2:
  +vcpu-arch.shared-mas2 = set_reg_val(id, *val);
  +break;
  +case KVM_REG_PPC_MAS7_3:
  +vcpu-arch.shared-mas7_3 = set_reg_val(id, *val);
  +break;
  +case KVM_REG_PPC_MAS4:
  +vcpu-arch.shared-mas4 = set_reg_val(id, *val);
  +break;
  +case KVM_REG_PPC_MAS6:
  +vcpu-arch.shared-mas6 = set_reg_val(id, *val);
  +break;
  +case KVM_REG_PPC_MMUCFG: {
  +u32 mmucfg = set_reg_val(id, *val);
  +vcpu-arch.mmucfg = mmucfg  ~MMUCFG_LPIDSIZE;
  +break;
  +}
 
  Do we really want to allow arbitrary MMUCFG changes?  It won't
  magically make us able to support larger RAs, PIDs, different MAVN,
 etc.
 
  Not magically, some changes e.g TLBnCFG_IND or TLBnPS require just a
 kvm
  check other changes e.g. TLBnCFG_MAVN require additional support and we
  might not implement all of them. Until then this code should do the
 job:
 
  /* MMU registers can be set only to the configuration supported by
 KVM */
  case KVM_REG_PPC_MMUCFG: {
  if (set_reg_val(id, *val) != vcpu-arch.mmucfg)
  r = -EINVAL;
  break;
  }
 
 Yes :).
 
 
 
  Only if we update the actual shadow mmu configuration as well.
 
  These registers (MMUCFG, EPTCFG, TLBnCFG, TLBnPS) are read-only (and
 shared
  between e6500 threads), we can only emulate them.
 
 We need to change the behavior of the shadow mmu as well. It's not about
 the registers, but the actually exposed TLBs. If you configure 4 TLBs,
 and you announce to the guest that you can do 4 TLBs, you better emulate
 4 TLBs :).

Right, like the rest of configs I was talking above:)

-Mike





--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 3/6] KVM : Calculate destination vcpu on interrupt injection

2013-03-21 Thread Gleb Natapov
On Thu, Mar 21, 2013 at 06:49:21PM +0800, Yang Zhang wrote:
 From: Yang Zhang yang.z.zh...@intel.com
 
 Add a new parameter to know vcpus who received the interrupt.
 
 Signed-off-by: Yang Zhang yang.z.zh...@intel.com
 ---
  arch/x86/kvm/lapic.c |   21 -
  arch/x86/kvm/lapic.h |5 +++--
  virt/kvm/ioapic.c|2 +-
  virt/kvm/ioapic.h|2 +-
  virt/kvm/irq_comm.c  |   12 ++--
  5 files changed, 27 insertions(+), 15 deletions(-)
 
 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
 index d3e322a..5f6b1d0 100644
 --- a/arch/x86/kvm/lapic.c
 +++ b/arch/x86/kvm/lapic.c
 @@ -433,10 +433,21 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
  static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
int vector, int level, int trig_mode);
  
 -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
 +static void kvm_set_irq_dest_map(struct kvm_vcpu *vcpu, unsigned long 
 *dest_map)
 +{
 + if (!kvm_lapic_enabled(vcpu))
 + return;
Why this check here?

 + __set_bit(vcpu-vcpu_id, dest_map);
 +}
 +
 +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 + unsigned long *dest_map)
  {
   struct kvm_lapic *apic = vcpu-arch.apic;
  
 + if (dest_map)
 + kvm_set_irq_dest_map(vcpu, dest_map);
 +
   return __apic_accept_irq(apic, irq-delivery_mode, irq-vector,
   irq-level, irq-trig_mode);
  }
 @@ -611,7 +622,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct 
 kvm_lapic *source,
  }
  
  bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 - struct kvm_lapic_irq *irq, int *r)
 + struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
  {
   struct kvm_apic_map *map;
   unsigned long bitmap = 1;
 @@ -622,7 +633,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, 
 struct kvm_lapic *src,
   *r = -1;
  
   if (irq-shorthand == APIC_DEST_SELF) {
 - *r = kvm_apic_set_irq(src-vcpu, irq);
 + *r = kvm_apic_set_irq(src-vcpu, irq, dest_map);
   return true;
   }
  
 @@ -667,7 +678,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, 
 struct kvm_lapic *src,
   continue;
   if (*r  0)
   *r = 0;
 - *r += kvm_apic_set_irq(dst[i]-vcpu, irq);
 + *r += kvm_apic_set_irq(dst[i]-vcpu, irq, dest_map);
   }
  
   ret = true;
 @@ -852,7 +863,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
  irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
  irq.vector);
  
 - kvm_irq_delivery_to_apic(apic-vcpu-kvm, apic, irq);
 + kvm_irq_delivery_to_apic(apic-vcpu-kvm, apic, irq, NULL);
  }
  
  static u32 apic_get_tmcct(struct kvm_lapic *apic)
 diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
 index 2c721b9..967519c 100644
 --- a/arch/x86/kvm/lapic.h
 +++ b/arch/x86/kvm/lapic.h
 @@ -55,11 +55,12 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu);
  
  int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
  int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
 -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
 +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 + unsigned long *dest_map);
  int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
  
  bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 - struct kvm_lapic_irq *irq, int *r);
 + struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
  
  u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
  void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
 diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
 index ed6f111..4767fa6 100644
 --- a/virt/kvm/ioapic.c
 +++ b/virt/kvm/ioapic.c
 @@ -217,7 +217,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int 
 irq)
   irqe.level = 1;
   irqe.shorthand = 0;
  
 - return kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe);
 + return kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe, NULL);
  }
  
  int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
 diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
 index 6e5c88f..14e5289 100644
 --- a/virt/kvm/ioapic.h
 +++ b/virt/kvm/ioapic.h
 @@ -88,7 +88,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, 
 int irq_source_id,
  void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);
  void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
  int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 - struct kvm_lapic_irq *irq);
 + struct kvm_lapic_irq *irq, unsigned long *dest_map);
  int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
  int kvm_set_ioapic(struct kvm *kvm, struct 

Re: [PATCH] KVM: PPC: e500: Add separate functions for vcpu's MMU configuration

2013-03-21 Thread Alexander Graf

On 21.03.2013, at 12:19, Caraman Mihai Claudiu-B02008 wrote:

 -Original Message-
 From: kvm-ppc-ow...@vger.kernel.org [mailto:kvm-ppc-
 ow...@vger.kernel.org] On Behalf Of Alexander Graf
 Sent: Thursday, March 21, 2013 12:07 PM
 To: Caraman Mihai Claudiu-B02008
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; linuxppc-
 d...@lists.ozlabs.org
 Subject: Re: [PATCH] KVM: PPC: e500: Add separate functions for vcpu's
 MMU configuration
 
 
 On 19.03.2013, at 18:16, Mihai Caraman wrote:
 
 Move vcpu's MMU default configuration and geometry update into their
 own
 functions.
 
 Mind to explain why?
 
 You requested a separate function for clearing TLBnCFG_IND bit (E.PT removal)
 to self-document the code. The existing logic (that TLBnCFG_IND relies on)
 was buried in a chunk of code and I thought this will add more clarity.
 If you don't agree I would document the code at least.

I guess I'll have to see the full picture then. Please just include this patch 
in the series when you change the IND bit and make the patch description a bit 
more obvious: Just indicate that you need this a cleanup to make the IND patch 
more readable.


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 16/29] arm64: KVM: HYP mode world switch implementation

2013-03-21 Thread Marc Zyngier
On 20/03/13 20:04, Christopher Covington wrote:
 Hi Marc,
 
 On 03/13/2013 03:59 PM, Christopher Covington wrote:
 
 [...]
 
 Alternatively, you could consider storing the host registers in a
 slimmed-down vcpu structure for hosts, rather than on the stack.

I am actively implementing this (I'm turning the vfp_host pointer into a
full blown CPU context). It looks promising so far, stay tuned.

 One potential argument for storing the host in the same sort of vcpu structure
 as the guest rather than on the hypervisor stack is that snapshot and
 migration support initially intended for guests might more easily be extended
 to work for hosts as well.

Not sure I'm following you here. Are you thinking of snapshoting both
host and guests, and migrating the whole thing? Ambitious... ;-)

M.
-- 
Jazz is not dead. It just smells funny...

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH v5 3/6] KVM : Calculate destination vcpu on interrupt injection

2013-03-21 Thread Zhang, Yang Z
Gleb Natapov wrote on 2013-03-21:
 On Thu, Mar 21, 2013 at 06:49:21PM +0800, Yang Zhang wrote:
 From: Yang Zhang yang.z.zh...@intel.com
 
 Add a new parameter to know vcpus who received the interrupt.
 
 Signed-off-by: Yang Zhang yang.z.zh...@intel.com
 ---
  arch/x86/kvm/lapic.c |   21 -
  arch/x86/kvm/lapic.h |5 +++--
  virt/kvm/ioapic.c|2 +-
  virt/kvm/ioapic.h|2 +-
  virt/kvm/irq_comm.c  |   12 ++--
  5 files changed, 27 insertions(+), 15 deletions(-)
 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
 index d3e322a..5f6b1d0 100644
 --- a/arch/x86/kvm/lapic.c
 +++ b/arch/x86/kvm/lapic.c
 @@ -433,10 +433,21 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu
 *vcpu)
  static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
   int vector, int level, int trig_mode);
 -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
 +static void kvm_set_irq_dest_map(struct kvm_vcpu *vcpu, unsigned long
 *dest_map) +{ +  if (!kvm_lapic_enabled(vcpu)) + return;
 Why this check here?
The vcpu who didn't enable apic should not account as destination vcpu. 
Without this check, if broadcast interrupt, all cpus will treat as destination 
vcpu, but only those who enabled apic will receive the interrupt.
There are same check in __apic_accept_irq():
if (unlikely(!apic_enabled(apic)))
 break;

 +__set_bit(vcpu-vcpu_id, dest_map);
 +}
 +
 +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 +unsigned long *dest_map)
  {
  struct kvm_lapic *apic = vcpu-arch.apic;
 +if (dest_map)
 +kvm_set_irq_dest_map(vcpu, dest_map);
 +
  return __apic_accept_irq(apic, irq-delivery_mode, irq-vector,
  irq-level, irq-trig_mode);
  }
 @@ -611,7 +622,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu,
 struct kvm_lapic *source,
  }
  
  bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 -struct kvm_lapic_irq *irq, int *r)
 +struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
  {
  struct kvm_apic_map *map;
  unsigned long bitmap = 1;
 @@ -622,7 +633,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm,
 struct kvm_lapic *src,
  *r = -1;
  
  if (irq-shorthand == APIC_DEST_SELF) {
 -*r = kvm_apic_set_irq(src-vcpu, irq);
 +*r = kvm_apic_set_irq(src-vcpu, irq, dest_map);
  return true;
  }
 @@ -667,7 +678,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm,
 struct kvm_lapic *src,
  continue;
  if (*r  0)
  *r = 0;
 -*r += kvm_apic_set_irq(dst[i]-vcpu, irq);
 +*r += kvm_apic_set_irq(dst[i]-vcpu, irq, dest_map);
  }
  
  ret = true; @@ -852,7 +863,7 @@ static void apic_send_ipi(struct
  kvm_lapic *apic)   irq.trig_mode, irq.level, irq.dest_mode,
  irq.delivery_mode, irq.vector);
 -kvm_irq_delivery_to_apic(apic-vcpu-kvm, apic, irq);
 +kvm_irq_delivery_to_apic(apic-vcpu-kvm, apic, irq, NULL);
  }
  
  static u32 apic_get_tmcct(struct kvm_lapic *apic)
 diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
 index 2c721b9..967519c 100644
 --- a/arch/x86/kvm/lapic.h
 +++ b/arch/x86/kvm/lapic.h
 @@ -55,11 +55,12 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu);
 
  int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
  int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
 -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
 +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 +unsigned long *dest_map);
  int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
  
  bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 -struct kvm_lapic_irq *irq, int *r);
 +struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
 
  u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
  void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
 diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
 index ed6f111..4767fa6 100644
 --- a/virt/kvm/ioapic.c
 +++ b/virt/kvm/ioapic.c
 @@ -217,7 +217,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int
 irq)
  irqe.level = 1;
  irqe.shorthand = 0;
 -return kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe);
 +return kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe, NULL);
  }
  
  int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int 
 irq_source_id,
 diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
 index 6e5c88f..14e5289 100644
 --- a/virt/kvm/ioapic.h
 +++ b/virt/kvm/ioapic.h
 @@ -88,7 +88,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq,
 int irq_source_id,
  void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);
  void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
  int 

Re: [PATCH v5 3/6] KVM : Calculate destination vcpu on interrupt injection

2013-03-21 Thread Gleb Natapov
On Thu, Mar 21, 2013 at 11:56:05AM +, Zhang, Yang Z wrote:
 Gleb Natapov wrote on 2013-03-21:
  On Thu, Mar 21, 2013 at 06:49:21PM +0800, Yang Zhang wrote:
  From: Yang Zhang yang.z.zh...@intel.com
  
  Add a new parameter to know vcpus who received the interrupt.
  
  Signed-off-by: Yang Zhang yang.z.zh...@intel.com
  ---
   arch/x86/kvm/lapic.c |   21 -
   arch/x86/kvm/lapic.h |5 +++--
   virt/kvm/ioapic.c|2 +-
   virt/kvm/ioapic.h|2 +-
   virt/kvm/irq_comm.c  |   12 ++--
   5 files changed, 27 insertions(+), 15 deletions(-)
  diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
  index d3e322a..5f6b1d0 100644
  --- a/arch/x86/kvm/lapic.c
  +++ b/arch/x86/kvm/lapic.c
  @@ -433,10 +433,21 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu
  *vcpu)
   static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
  int vector, int level, int trig_mode);
  -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
  +static void kvm_set_irq_dest_map(struct kvm_vcpu *vcpu, unsigned long
  *dest_map) +{ +if (!kvm_lapic_enabled(vcpu)) + return;
  Why this check here?
 The vcpu who didn't enable apic should not account as destination vcpu. 
 Without this check, if broadcast interrupt, all cpus will treat as 
 destination vcpu, but only those who enabled apic will receive the interrupt.
 There are same check in __apic_accept_irq():
 if (unlikely(!apic_enabled(apic)))
  break;
I see, but you use more strict check that also checks that apic is
emulated by the kernel and we wouldn't be here if it wasn't. Anyway lets
move bitmap update into __apic_accept_irq().

 
  +  __set_bit(vcpu-vcpu_id, dest_map);
  +}
  +
  +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
  +  unsigned long *dest_map)
   {
 struct kvm_lapic *apic = vcpu-arch.apic;
  +  if (dest_map)
  +  kvm_set_irq_dest_map(vcpu, dest_map);
  +
 return __apic_accept_irq(apic, irq-delivery_mode, irq-vector,
 irq-level, irq-trig_mode);
   }
  @@ -611,7 +622,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu,
  struct kvm_lapic *source,
   }
   
   bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
  -  struct kvm_lapic_irq *irq, int *r)
  +  struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
   {
 struct kvm_apic_map *map;
 unsigned long bitmap = 1;
  @@ -622,7 +633,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm,
  struct kvm_lapic *src,
 *r = -1;
   
 if (irq-shorthand == APIC_DEST_SELF) {
  -  *r = kvm_apic_set_irq(src-vcpu, irq);
  +  *r = kvm_apic_set_irq(src-vcpu, irq, dest_map);
 return true;
 }
  @@ -667,7 +678,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm,
  struct kvm_lapic *src,
 continue;
 if (*r  0)
 *r = 0;
  -  *r += kvm_apic_set_irq(dst[i]-vcpu, irq);
  +  *r += kvm_apic_set_irq(dst[i]-vcpu, irq, dest_map);
 }
   
 ret = true; @@ -852,7 +863,7 @@ static void apic_send_ipi(struct
   kvm_lapic *apic) irq.trig_mode, irq.level, irq.dest_mode,
   irq.delivery_mode,   irq.vector);
  -  kvm_irq_delivery_to_apic(apic-vcpu-kvm, apic, irq);
  +  kvm_irq_delivery_to_apic(apic-vcpu-kvm, apic, irq, NULL);
   }
   
   static u32 apic_get_tmcct(struct kvm_lapic *apic)
  diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
  index 2c721b9..967519c 100644
  --- a/arch/x86/kvm/lapic.h
  +++ b/arch/x86/kvm/lapic.h
  @@ -55,11 +55,12 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu);
  
   int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
   int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
  -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
  +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
  +  unsigned long *dest_map);
   int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
   
   bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
  -  struct kvm_lapic_irq *irq, int *r);
  +  struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
  
   u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
   void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
  diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
  index ed6f111..4767fa6 100644
  --- a/virt/kvm/ioapic.c
  +++ b/virt/kvm/ioapic.c
  @@ -217,7 +217,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, 
  int
  irq)
 irqe.level = 1;
 irqe.shorthand = 0;
  -  return kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe);
  +  return kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe, NULL);
   }
   
   int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int 
  irq_source_id,
  diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
  index 6e5c88f..14e5289 100644
  --- 

Re: [PATCH v5 2/6] KVM: Introduce struct rtc_status

2013-03-21 Thread Gleb Natapov
On Thu, Mar 21, 2013 at 06:49:20PM +0800, Yang Zhang wrote:
 From: Yang Zhang yang.z.zh...@intel.com
 
 Signed-off-by: Yang Zhang yang.z.zh...@intel.com
 ---
  virt/kvm/ioapic.h |8 
  1 files changed, 8 insertions(+), 0 deletions(-)
 
 diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
 index 2fc61a5..6e5c88f 100644
 --- a/virt/kvm/ioapic.h
 +++ b/virt/kvm/ioapic.h
 @@ -34,6 +34,11 @@ struct kvm_vcpu;
  #define  IOAPIC_INIT 0x5
  #define  IOAPIC_EXTINT   0x7
  
 +struct rtc_status {
 + int pending_eoi;
 + DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS);
 +};
 +
  struct kvm_ioapic {
   u64 base_address;
   u32 ioregsel;
 @@ -47,6 +52,9 @@ struct kvm_ioapic {
   void (*ack_notifier)(void *opaque, int irq);
   spinlock_t lock;
   DECLARE_BITMAP(handled_vectors, 256);
 +#ifdef CONFIG_X86
 + struct rtc_status rtc_status;
 +#endif
IA64 KVM is almost dead, but we still add CONFIG_X86 everywhere in these
patches. Lets drop all CONFIG_X86 throughout the patches and instead leave
only one:
#ifdef CONFIG_X86
#define RTC_GSI 8
else
#define RTC_GSI 255
#endif

Then use RTC_GSI instead of 8 everywhere and the code will be effectively
disabled on IA64.

  };
  
  #ifdef DEBUG
 -- 
 1.7.1

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH v5 3/6] KVM : Calculate destination vcpu on interrupt injection

2013-03-21 Thread Zhang, Yang Z
Gleb Natapov wrote on 2013-03-21:
 On Thu, Mar 21, 2013 at 11:56:05AM +, Zhang, Yang Z wrote:
 Gleb Natapov wrote on 2013-03-21:
 On Thu, Mar 21, 2013 at 06:49:21PM +0800, Yang Zhang wrote:
 From: Yang Zhang yang.z.zh...@intel.com
 
 Add a new parameter to know vcpus who received the interrupt.
 
 Signed-off-by: Yang Zhang yang.z.zh...@intel.com
 ---
  arch/x86/kvm/lapic.c |   21 -
  arch/x86/kvm/lapic.h |5 +++--
  virt/kvm/ioapic.c|2 +-
  virt/kvm/ioapic.h|2 +-
  virt/kvm/irq_comm.c  |   12 ++--
  5 files changed, 27 insertions(+), 15 deletions(-)
 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
 index d3e322a..5f6b1d0 100644
 --- a/arch/x86/kvm/lapic.c
 +++ b/arch/x86/kvm/lapic.c
 @@ -433,10 +433,21 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu
 *vcpu)
  static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 int vector, int level, int trig_mode);
 -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
 +static void kvm_set_irq_dest_map(struct kvm_vcpu *vcpu, unsigned long
 *dest_map) +{ +if (!kvm_lapic_enabled(vcpu)) + return;
 Why this check here?
 The vcpu who didn't enable apic should not account as destination vcpu.
 Without this check, if broadcast interrupt, all cpus will treat as
 destination vcpu, but only those who enabled apic will receive the
 interrupt. There are same check in __apic_accept_irq(): if
 (unlikely(!apic_enabled(apic)))
  break;
 I see, but you use more strict check that also checks that apic is
 emulated by the kernel and we wouldn't be here if it wasn't. Anyway lets
Do you mean the check add in here will block userspace apic? Shouldn't only 
in-kernel apic will get here?

 move bitmap update into __apic_accept_irq().
Sure.

 
 
 +  __set_bit(vcpu-vcpu_id, dest_map);
 +}
 +
 +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 +  unsigned long *dest_map)
  {
struct kvm_lapic *apic = vcpu-arch.apic;
 +  if (dest_map)
 +  kvm_set_irq_dest_map(vcpu, dest_map);
 +
return __apic_accept_irq(apic, irq-delivery_mode, irq-vector,
irq-level, irq-trig_mode);
  }
 @@ -611,7 +622,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu,
 struct kvm_lapic *source,
  }
  
  bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic
 *src,
 -  struct kvm_lapic_irq *irq, int *r)
 +  struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
  {
struct kvm_apic_map *map;
unsigned long bitmap = 1;
 @@ -622,7 +633,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm
 *kvm,
 struct kvm_lapic *src,
*r = -1;
  
if (irq-shorthand == APIC_DEST_SELF) {
 -  *r = kvm_apic_set_irq(src-vcpu, irq);
 +  *r = kvm_apic_set_irq(src-vcpu, irq, dest_map);
return true;
}
 @@ -667,7 +678,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm
 *kvm,
 struct kvm_lapic *src,
continue;
if (*r  0)
*r = 0;
 -  *r += kvm_apic_set_irq(dst[i]-vcpu, irq);
 +  *r += kvm_apic_set_irq(dst[i]-vcpu, irq, dest_map);
}
  
ret = true; @@ -852,7 +863,7 @@ static void apic_send_ipi(struct
  kvm_lapic *apic) irq.trig_mode, irq.level, irq.dest_mode,
  irq.delivery_mode,   irq.vector);
 -  kvm_irq_delivery_to_apic(apic-vcpu-kvm, apic, irq);
 +  kvm_irq_delivery_to_apic(apic-vcpu-kvm, apic, irq, NULL);
  }
  
  static u32 apic_get_tmcct(struct kvm_lapic *apic)
 diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
 index 2c721b9..967519c 100644
 --- a/arch/x86/kvm/lapic.h
 +++ b/arch/x86/kvm/lapic.h
 @@ -55,11 +55,12 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu);
 
  int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
  int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
 -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
 +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 +  unsigned long *dest_map);
  int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
  
  bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic
 *src,
 -  struct kvm_lapic_irq *irq, int *r);
 +  struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
 
  u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
  void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
 diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
 index ed6f111..4767fa6 100644
 --- a/virt/kvm/ioapic.c
 +++ b/virt/kvm/ioapic.c
 @@ -217,7 +217,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, 
 int
 irq)
irqe.level = 1;
irqe.shorthand = 0;
 -  return kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe);
 +  return kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe, NULL);
  }
  
  int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int 
 irq_source_id,
 diff --git a/virt/kvm/ioapic.h 

RE: [PATCH v5 2/6] KVM: Introduce struct rtc_status

2013-03-21 Thread Zhang, Yang Z
Gleb Natapov wrote on 2013-03-21:
 On Thu, Mar 21, 2013 at 06:49:20PM +0800, Yang Zhang wrote:
 From: Yang Zhang yang.z.zh...@intel.com
 
 Signed-off-by: Yang Zhang yang.z.zh...@intel.com
 ---
  virt/kvm/ioapic.h |8 
  1 files changed, 8 insertions(+), 0 deletions(-)
 diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
 index 2fc61a5..6e5c88f 100644
 --- a/virt/kvm/ioapic.h
 +++ b/virt/kvm/ioapic.h
 @@ -34,6 +34,11 @@ struct kvm_vcpu;
  #define IOAPIC_INIT 0x5
  #define IOAPIC_EXTINT   0x7
 +struct rtc_status {
 +int pending_eoi;
 +DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS);
 +};
 +
  struct kvm_ioapic { u64 base_address;   u32 ioregsel; @@ -47,6 
 +52,9
  @@ struct kvm_ioapic {  void (*ack_notifier)(void *opaque, int irq);
  spinlock_t lock;DECLARE_BITMAP(handled_vectors, 256);
 +#ifdef CONFIG_X86
 +struct rtc_status rtc_status;
 +#endif
 IA64 KVM is almost dead, but we still add CONFIG_X86 everywhere in these
 patches. Lets drop all CONFIG_X86 throughout the patches and instead leave
 only one:
 #ifdef CONFIG_X86
 #define RTC_GSI 8
 else
 #define RTC_GSI 255
 #endif
 
 Then use RTC_GSI instead of 8 everywhere and the code will be effectively
 disabled on IA64.
Nice idea!

  };
  
  #ifdef DEBUG
 --
 1.7.1
 
 --
   Gleb.


Best regards,
Yang


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 3/6] KVM : Calculate destination vcpu on interrupt injection

2013-03-21 Thread Gleb Natapov
On Thu, Mar 21, 2013 at 12:12:06PM +, Zhang, Yang Z wrote:
 Gleb Natapov wrote on 2013-03-21:
  On Thu, Mar 21, 2013 at 11:56:05AM +, Zhang, Yang Z wrote:
  Gleb Natapov wrote on 2013-03-21:
  On Thu, Mar 21, 2013 at 06:49:21PM +0800, Yang Zhang wrote:
  From: Yang Zhang yang.z.zh...@intel.com
  
  Add a new parameter to know vcpus who received the interrupt.
  
  Signed-off-by: Yang Zhang yang.z.zh...@intel.com
  ---
   arch/x86/kvm/lapic.c |   21 -
   arch/x86/kvm/lapic.h |5 +++--
   virt/kvm/ioapic.c|2 +-
   virt/kvm/ioapic.h|2 +-
   virt/kvm/irq_comm.c  |   12 ++--
   5 files changed, 27 insertions(+), 15 deletions(-)
  diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
  index d3e322a..5f6b1d0 100644
  --- a/arch/x86/kvm/lapic.c
  +++ b/arch/x86/kvm/lapic.c
  @@ -433,10 +433,21 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu
  *vcpu)
   static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
int vector, int level, int trig_mode);
  -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
  +static void kvm_set_irq_dest_map(struct kvm_vcpu *vcpu, unsigned long
  *dest_map) +{ +  if (!kvm_lapic_enabled(vcpu)) + return;
  Why this check here?
  The vcpu who didn't enable apic should not account as destination vcpu.
  Without this check, if broadcast interrupt, all cpus will treat as
  destination vcpu, but only those who enabled apic will receive the
  interrupt. There are same check in __apic_accept_irq(): if
  (unlikely(!apic_enabled(apic)))
   break;
  I see, but you use more strict check that also checks that apic is
  emulated by the kernel and we wouldn't be here if it wasn't. Anyway lets
 Do you mean the check add in here will block userspace apic? Shouldn't only 
 in-kernel apic will get here?
 
No, it will not block. It checks for in kernel apic needlessly. Since we
patch all those checks out anyway using jump labels it is not really
affects performance, but I prefer to make only necessary checks for
consistency.

  move bitmap update into __apic_accept_irq().
 Sure.
 

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/11] KVM: nVMX: shadow VMCS support, v1

2013-03-21 Thread Orit Wasserman
On 03/10/2013 06:03 PM, Abel Gordon wrote:
 This series of patches implements shadow-vmcs capability for nested VMX.
 
 Shadow-vmcs - background and overview:
 
  In Intel VMX, vmread and vmwrite privileged instructions are used by the
  hypervisor to read and modify the guest and host specifications (VMCS). In a
  nested virtualization environment, L1 executes multiple vmread and vmwrite
  instruction to handle a single L2 exit. Each vmread and vmwrite executed by 
 L1
  traps (cause an exit) to the L0 hypervisor (KVM). L0 emulates the instruction
  behaviour and resumes L1 execution.
 
  Removing the need to trap and emulate these special instructions reduces the
  number of exits and improves nested virtualization performance. As it was 
 first
  evaluated in [1], exit-less vmread and vmwrite can reduce nested 
 virtualization
  overhead up-to 40%.
  
  Intel introduced a new feature to their processors called shadow-vmcs.  Using
  shadow-vmcs, L0 can configure the processor to let L1 running in guest-mode
  access VMCS12 fields using vmread and vmwrite instructions but without 
 causing
  an exit to L0. The VMCS12 fields' data is stored in a shadow-vmcs controlled
  by L0.
 
 Shadow-vmcs - design considerations: 
 
  A shadow-vmcs is processor-dependent and must be accessed by L0 or L1 using
  vmread and vmwrite instructions. With nested virtualization we aim to 
 abstract
  the hardware from the L1 hypervisor. Thus, to avoid hardware dependencies we
  prefered to keep the software defined VMCS12 format as part of L1 address 
 space
  and hold the processor-specific shadow-vmcs format only in L0 address space.
  In other words, the shadow-vmcs is used by L0 as an accelerator but the 
 format
  and content is never exposed to L1 directly. L0 syncs the content of the
  processor-specific shadow vmcs with the content of the software-controlled
  VMCS12 format.
 
  We could have been kept the processor-specific shadow-vmcs format in L1 
 address
  space to avoid using the software defined VMCS12 format, however, this type 
 of
  design/implementation would have been created hardware dependencies and
  would complicate other capabilities (e.g. Live Migration of L1).
  
 Acknowledgments:
 
  Many thanks to
  Xu, Dongxiao dongxiao...@intel.com
  Nakajima, Jun jun.nakaj...@intel.com
  Har'El, Nadav na...@harel.org.il 
   
  for the insightful discussions, comments and reviews.
 
 
  These patches were easily created and maintained using
  Patchouli -- patch creator
  http://patchouli.sourceforge.net/
 
 
 [1] The Turtles Project: Design and Implementation of Nested Virtualization,
 http://www.usenix.org/events/osdi10/tech/full_papers/Ben-Yehuda.pdf
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 
Reviewed-by: Orit Wasserman owass...@redhat.com

By the way do you have some performance results, how does it improve nested ?

Orit
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH v5 3/6] KVM : Calculate destination vcpu on interrupt injection

2013-03-21 Thread Zhang, Yang Z
Gleb Natapov wrote on 2013-03-21:
 On Thu, Mar 21, 2013 at 12:12:06PM +, Zhang, Yang Z wrote:
 Gleb Natapov wrote on 2013-03-21:
 On Thu, Mar 21, 2013 at 11:56:05AM +, Zhang, Yang Z wrote:
 Gleb Natapov wrote on 2013-03-21:
 On Thu, Mar 21, 2013 at 06:49:21PM +0800, Yang Zhang wrote:
 From: Yang Zhang yang.z.zh...@intel.com
 
 Add a new parameter to know vcpus who received the interrupt.
 
 Signed-off-by: Yang Zhang yang.z.zh...@intel.com
 ---
  arch/x86/kvm/lapic.c |   21 -
  arch/x86/kvm/lapic.h |5 +++--
  virt/kvm/ioapic.c|2 +-
  virt/kvm/ioapic.h|2 +-
  virt/kvm/irq_comm.c  |   12 ++--
  5 files changed, 27 insertions(+), 15 deletions(-)
 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
 index d3e322a..5f6b1d0 100644
 --- a/arch/x86/kvm/lapic.c
 +++ b/arch/x86/kvm/lapic.c
 @@ -433,10 +433,21 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu
 *vcpu)
  static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
   int vector, int level, int trig_mode);
 -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq
 *irq) +static void kvm_set_irq_dest_map(struct kvm_vcpu *vcpu,
 unsigned long *dest_map) +{ +if (!kvm_lapic_enabled(vcpu))
 +return;
 Why this check here?
 The vcpu who didn't enable apic should not account as destination vcpu.
 Without this check, if broadcast interrupt, all cpus will treat as
 destination vcpu, but only those who enabled apic will receive the
 interrupt. There are same check in __apic_accept_irq(): if
 (unlikely(!apic_enabled(apic)))
  break;
 I see, but you use more strict check that also checks that apic is
 emulated by the kernel and we wouldn't be here if it wasn't. Anyway lets
 Do you mean the check add in here will block userspace apic?
 Shouldn't only in-kernel apic will get here?
 
 No, it will not block. It checks for in kernel apic needlessly. Since we
 patch all those checks out anyway using jump labels it is not really
 affects performance, but I prefer to make only necessary checks for
 consistency.
Make sense.
 
 move bitmap update into __apic_accept_irq().
 Sure.
 
 
 --
   Gleb.


Best regards,
Yang


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 4/7] KVM: MMU: delete shadow page from hash list in kvm_mmu_prepare_zap_page

2013-03-21 Thread Gleb Natapov
On Wed, Mar 20, 2013 at 04:30:24PM +0800, Xiao Guangrong wrote:
 Move deletion shadow page from the hash list from kvm_mmu_commit_zap_page to
 kvm_mmu_prepare_zap_page, we that we can free the shadow page out of mmu-lock.
 
 Also, delete the invalid shadow page from the hash list since this page can
 not be reused anymore. This makes reset mmu-cache more easier - we do not need
 to care all hash entries after reset mmu-cache
 
 Signed-off-by: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com
 ---
  arch/x86/kvm/mmu.c |8 ++--
  1 files changed, 6 insertions(+), 2 deletions(-)
 
 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
 index dc37512..5578c91 100644
 --- a/arch/x86/kvm/mmu.c
 +++ b/arch/x86/kvm/mmu.c
 @@ -1472,7 +1472,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm 
 *kvm, int nr)
  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
  {
   ASSERT(is_empty_shadow_page(sp-spt));
 - hlist_del(sp-hash_link);
 +
   list_del(sp-link);
   free_page((unsigned long)sp-spt);
   if (!sp-role.direct)
 @@ -1660,7 +1660,8 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
  
  #define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn)  
 \
   for_each_gfn_sp(_kvm, _sp, _gfn)\
 - if ((_sp)-role.direct || (_sp)-role.invalid) {} else
 + if ((_sp)-role.direct ||   \
 +   ((_sp)-role.invalid  WARN_ON(1))) {} else
  
  /* @sp-gfn should be write-protected at the call site */
  static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 @@ -2079,6 +2080,9 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, 
 struct kvm_mmu_page *sp,
   unaccount_shadowed(kvm, sp-gfn);
   if (sp-unsync)
   kvm_unlink_unsync_page(kvm, sp);
 +
 + hlist_del_init(sp-hash_link);
 +
Now we delete roots from hash, but leave it on active_mmu_pages list. Is
this OK?

   if (!sp-root_count) {
   /* Count self */
   ret++;
 -- 
 1.7.7.6

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/11] KVM: nVMX: shadow VMCS support, v1

2013-03-21 Thread Abel Gordon


Orit Wasserman owass...@redhat.com wrote on 21/03/2013 02:22:44 PM:

 By the way do you have some performance results, how does it improve
nested ?

Only the old numbers we obtained emulating this type of feature using
Nehalem processors and we --including you :)-- published in the Turtles
papers: http://static.usenix.org/event/osdi10/tech/slides/ben-yehuda.pdf
The results showed that up-to 40% of nested overhead was caused
by L0 trapping and emulating L1 vmread/vmwrite instructions.

To handle a single L2 exit, L1 performs around 10 vmread/vmwrite
instructions (10 exits + 10 entries), so this feature
should reduce the virtual exit/entry cost (L2-L1-L2)
by at least an order of magnitude. Instead of doing a long chain of
entries/exits (L2-L0-L1-L0-L1L0-L1-L0-L2) we will have
a simple and short chain (L2-L0-L1-L0-L2).

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Host kernel crash at pci_find_upstream_pcie_bridge on VM exit

2013-03-21 Thread Ganesh Narayanaswamy
Hi Alex,

Yes. They are PCIe devices which expose the PCIe functionality:

-bash-4.1# lspci -vv -s 04:00
….
Capabilities: [ac] Express (v2) Endpoint, MSI 00

-bash-4.1# lspci -vv -s 03:00
….
Capabilities: [80] Express (v2) Endpoint, MSI 00

Is there any dependency issue here ? Does KVM expect the downstream ports of 
the PCIe switch also expected to be passed through ? 

Thanks,
Ganesh

On Mar 20, 2013, at 7:41 PM, Alex Williamson wrote:

 On Tue, 2013-03-19 at 17:09 -0700, Ganesh Narayanaswamy wrote:
 Hi Alex,
 
 Thanks for your reply.  The pci devices in question are proprietary FPGAs.  
 Here is the lspci -tv output:
 
 -bash-4.1# lspci -tv
 -[:00]-+-00.0  Intel Corporation Sandy Bridge DRAM Controller
   +-01.0-[01-04]00.0-[02-04]--+-01.0-[03]00.0  Broadcom 
 Corporation Device b850
   |   \-02.0-[04]00.0  Broadcom 
 Corporation Device b850
   +-01.1-[05]--
   +-06.0-[06]--+-00.0  Intel Corporation Device 0434
   |+-00.1  Intel Corporation Device 0438
   |+-00.2  Intel Corporation Device 0438
   |+-00.3  Intel Corporation Device 0436
   |\-00.4  Intel Corporation Device 0436
   +-1d.0  Intel Corporation Device 2334
   +-1f.0  Intel Corporation Device 2310
   +-1f.2  Intel Corporation Device 2323
   +-1f.3  Intel Corporation Device 2330
   +-1f.4  Intel Corporation Device 2331
   +-1f.6  Intel Corporation Device 2332
   \-1f.7  Intel Corporation Device 2360
 
 My qemu command line is as follows:
 
 qemu-system-x86_64 -M q35 --enable-kvm -m 2048 -nographic -vga std
 -usb -drive file=IMG file,if=none,id=drive-sata-disk0,format=raw
 -device ahci,id=ahci -device
 ide-drive,bus=ahci.0,drive=drive-sata-disk0,id=sata-disk0,bootindex=1
 -device pci-assign,host=04:00.0 -device pci-assign,host=03:00.0
 
 
 The PCIe bridge is a PLX 8613 device:
 
 01:00.0 PCI bridge: PLX Technology, Inc. PEX 8613 12-lane, 3-Port PCI 
 Express Gen 2 (5.0 GT/s) Switch (rev ba)
 02:01.0 PCI bridge: PLX Technology, Inc. PEX 8613 12-lane, 3-Port PCI 
 Express Gen 2 (5.0 GT/s) Switch (rev ba)
 02:02.0 PCI bridge: PLX Technology, Inc. PEX 8613 12-lane, 3-Port PCI 
 Express Gen 2 (5.0 GT/s) Switch (rev ba)
 
 As shown by the lspci -tv output, each of the PCI device being passed
 through is connected to one of the downstream ports of the PLX PCI
 bridge.
 
 Are your FPGAs actually PCIe devices (they must be because they connect
 to a PCIe switch) that do not expose a PCIe capability?  For example,
 lspci -v:
 
   Capabilities: [e0] Express Endpoint, MSI 00
 
 If so, they're in violation of the PCI Express specification and likely
 the cause of this problem.  Thanks,
 
 Alex
 
 On Mar 19, 2013, at 3:28 PM, Alex Williamson wrote:
 
 On Tue, 2013-03-19 at 13:30 -0700, Ganesh Narayanaswamy wrote:
 Hi,
 
 I am running qemu with kvm and VT-d enabled and a couple of PCI
 devices assigned to the guest VM. Both host and guest are running
 linux 2.6 kernel.  
 
 The passthrough works fine, but when I exit the VM, the host kernel
 crashes with the following backtrace:
 
 4[ 5569.836893] Process qemu-system-x86 (pid: 2925, threadinfo 
 8801f5f4, task 88024fa28720)
 0[ 5569.944946] Stack:
 4[ 5569.968845]  8801f5f41aa8 811a45fb 88024f04b680 
 88024f049980
 4[ 5570.057156]  88024f04b680 88024f049988 8801f5f41b08 
 811a6371
 4[ 5570.145470]  8801f5f41ad8 81391045 0246 
 88024f049990
 0[ 5570.233785] Call Trace:
 4[ 5570.262880]  [811a45fb] 
 iommu_detach_dependent_devices+0x25/0x91
 4[ 5570.344958]  [811a6371] vm_domain_exit+0xf8/0x28b
 4[ 5570.411457]  [81391045] ? sub_preempt_count+0x92/0xa6
 4[ 5570.482106]  [811a651a] 
 intel_iommu_domain_destroy+0x16/0x18
 4[ 5570.560030]  [811fb5ea] iommu_domain_free+0x16/0x22
 4[ 5570.628611]  [a0006261] kvm_iommu_unmap_guest+0x22/0x28 
 [kvm]
 4[ 5570.707570]  [a0009b7b] kvm_arch_destroy_vm+0x19/0x12a 
 [kvm]
 4[ 5570.785492]  [a0002614] kvm_put_kvm+0xe6/0x129 [kvm]
 4[ 5570.855102]  [a0002eb3] kvm_vcpu_release+0x13/0x17 [kvm]
 4[ 5570.928867]  [8109cdfc] fput+0x117/0x1be
 4[ 5570.986013]  [8109a147] filp_close+0x63/0x6d
 4[ 5571.047314]  [810342dd] put_files_struct+0x6f/0xda
 4[ 5571.114845]  [8103438e] exit_files+0x46/0x4e
 4[ 5571.176145]  [81035b3d] do_exit+0x1fc/0x681
 4[ 5571.236416]  [a000dedc] ? 
 kvm_arch_vcpu_ioctl_run+0xc2d/0xc55 [kvm]
 4[ 5571.321605]  [8138cc41] ? __mutex_lock_slowpath+0x26c/0x294
 4[ 5571.398490]  [81036034] do_group_exit+0x72/0x9a
 4[ 5571.462907]  [8103fec9] get_signal_to_deliver+0x331/0x350
 4[ 5571.537719]  [81001f0f] do_signal+0x6d/0x69a
 4[ 5571.599013]  [811da1fc] ? put_ldisc+0x92/0x97
 4[ 5571.661353]  

Re: [PATCH] KVM: x86: Avoid busy loops over uninjectable pending APIC timers

2013-03-21 Thread Marcelo Tosatti
On Thu, Mar 21, 2013 at 06:54:46AM +0200, Gleb Natapov wrote:
 On Wed, Mar 20, 2013 at 08:19:13PM -0300, Marcelo Tosatti wrote:
  On Wed, Mar 20, 2013 at 11:32:38PM +0200, Gleb Natapov wrote:
   On Wed, Mar 20, 2013 at 05:03:19PM -0300, Marcelo Tosatti wrote:
On Wed, Mar 20, 2013 at 04:30:33PM -0300, Marcelo Tosatti wrote:
 On Sun, Mar 17, 2013 at 12:47:17PM +0200, Gleb Natapov wrote:
  On Sun, Mar 17, 2013 at 11:45:34AM +0100, Jan Kiszka wrote:
   On 2013-03-17 09:47, Gleb Natapov wrote:
On Sat, Mar 16, 2013 at 09:49:07PM +0100, Jan Kiszka wrote:
From: Jan Kiszka jan.kis...@siemens.com
   
If the guest didn't take the last APIC timer interrupt yet and 
generates
another one on top, e.g. via periodic mode, we do not block 
the VCPU
even if the guest state is halted. The reason is that
apic_has_pending_timer continues to return a non-zero value.
   
Fix this busy loop by taking the IRR content for the LVT 
vector in
apic_has_pending_timer into account.
   
Just drop coalescing tacking for lapic interrupt. After posted 
interrupt
will be merged __apic_accept_irq() will not longer return 
coalescing
information, so the code will be dead anyway.
   
   That requires the RTC decoalescing series to go first to avoid a
   regression, no? Then let's postpone this topic for now.
   
  Yes, but decoalescing will work only for RTC :(
 
 Are you proposing to drop LAPIC interrupt reinjection? 

Since timer handling and injection is VCPU-local for LAPIC,
__apic_accept_irq can (and must) return coalesced information (cannot
drop LAPIC interrupt reinjection).

   Why can't we drop LAPIC interrupt reinjection? Proposed posted interrupt
   patches do not properly check for interrupt coalescing even for
   VCPU-local injection.
   
   --
 Gleb.
  
  Because older Linux guests depend on reinjection for proper timekeeping.
 Which versions? Those without kvmclock? Can we make them use PIT instead?
 Posted interrupts going to break them.

There is no reason to break them if its OK to receive reinjection info
from LAPIC... its a matter of returning the information from
apic_accept_irq, no big deal.


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: x86: Avoid busy loops over uninjectable pending APIC timers

2013-03-21 Thread Gleb Natapov
On Thu, Mar 21, 2013 at 11:02:24AM -0300, Marcelo Tosatti wrote:
 On Thu, Mar 21, 2013 at 06:54:46AM +0200, Gleb Natapov wrote:
  On Wed, Mar 20, 2013 at 08:19:13PM -0300, Marcelo Tosatti wrote:
   On Wed, Mar 20, 2013 at 11:32:38PM +0200, Gleb Natapov wrote:
On Wed, Mar 20, 2013 at 05:03:19PM -0300, Marcelo Tosatti wrote:
 On Wed, Mar 20, 2013 at 04:30:33PM -0300, Marcelo Tosatti wrote:
  On Sun, Mar 17, 2013 at 12:47:17PM +0200, Gleb Natapov wrote:
   On Sun, Mar 17, 2013 at 11:45:34AM +0100, Jan Kiszka wrote:
On 2013-03-17 09:47, Gleb Natapov wrote:
 On Sat, Mar 16, 2013 at 09:49:07PM +0100, Jan Kiszka wrote:
 From: Jan Kiszka jan.kis...@siemens.com

 If the guest didn't take the last APIC timer interrupt yet 
 and generates
 another one on top, e.g. via periodic mode, we do not block 
 the VCPU
 even if the guest state is halted. The reason is that
 apic_has_pending_timer continues to return a non-zero value.

 Fix this busy loop by taking the IRR content for the LVT 
 vector in
 apic_has_pending_timer into account.

 Just drop coalescing tacking for lapic interrupt. After 
 posted interrupt
 will be merged __apic_accept_irq() will not longer return 
 coalescing
 information, so the code will be dead anyway.

That requires the RTC decoalescing series to go first to avoid a
regression, no? Then let's postpone this topic for now.

   Yes, but decoalescing will work only for RTC :(
  
  Are you proposing to drop LAPIC interrupt reinjection? 
 
 Since timer handling and injection is VCPU-local for LAPIC,
 __apic_accept_irq can (and must) return coalesced information (cannot
 drop LAPIC interrupt reinjection).
 
Why can't we drop LAPIC interrupt reinjection? Proposed posted interrupt
patches do not properly check for interrupt coalescing even for
VCPU-local injection.

--
Gleb.
   
   Because older Linux guests depend on reinjection for proper timekeeping.
  Which versions? Those without kvmclock? Can we make them use PIT instead?
  Posted interrupts going to break them.
 
 There is no reason to break them if its OK to receive reinjection info
 from LAPIC... its a matter of returning the information from
 apic_accept_irq, no big deal.
 
But current PI patches do break them, thats my point. So we either
need to revise them again, or drop LAPIC timer reinjection. Making
apic_accept_irq semantics it returns coalescing info, but only sometimes
is dubious though.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH] KVM: x86: Avoid busy loops over uninjectable pending APIC timers

2013-03-21 Thread Zhang, Yang Z
Gleb Natapov wrote on 2013-03-21:
 On Thu, Mar 21, 2013 at 11:02:24AM -0300, Marcelo Tosatti wrote:
 On Thu, Mar 21, 2013 at 06:54:46AM +0200, Gleb Natapov wrote:
 On Wed, Mar 20, 2013 at 08:19:13PM -0300, Marcelo Tosatti wrote:
 On Wed, Mar 20, 2013 at 11:32:38PM +0200, Gleb Natapov wrote:
 On Wed, Mar 20, 2013 at 05:03:19PM -0300, Marcelo Tosatti wrote:
 On Wed, Mar 20, 2013 at 04:30:33PM -0300, Marcelo Tosatti wrote:
 On Sun, Mar 17, 2013 at 12:47:17PM +0200, Gleb Natapov wrote:
 On Sun, Mar 17, 2013 at 11:45:34AM +0100, Jan Kiszka wrote:
 On 2013-03-17 09:47, Gleb Natapov wrote:
 On Sat, Mar 16, 2013 at 09:49:07PM +0100, Jan Kiszka wrote:
 From: Jan Kiszka jan.kis...@siemens.com
 
 If the guest didn't take the last APIC timer interrupt yet and
 generates another one on top, e.g. via periodic mode, we do
 not block the VCPU even if the guest state is halted. The
 reason is that apic_has_pending_timer continues to return a
 non-zero value.
 
 Fix this busy loop by taking the IRR content for the LVT vector in
 apic_has_pending_timer into account.
 
 Just drop coalescing tacking for lapic interrupt. After posted
 interrupt will be merged __apic_accept_irq() will not longer
 return coalescing information, so the code will be dead anyway.
 
 That requires the RTC decoalescing series to go first to avoid a
 regression, no? Then let's postpone this topic for now.
 
 Yes, but decoalescing will work only for RTC :(
 
 Are you proposing to drop LAPIC interrupt reinjection?
 
 Since timer handling and injection is VCPU-local for LAPIC,
 __apic_accept_irq can (and must) return coalesced information (cannot
 drop LAPIC interrupt reinjection).
 
 Why can't we drop LAPIC interrupt reinjection? Proposed posted
 interrupt patches do not properly check for interrupt coalescing
 even for VCPU-local injection.
 
 --
   Gleb.
 
 Because older Linux guests depend on reinjection for proper timekeeping.
 Which versions? Those without kvmclock? Can we make them use PIT
 instead? Posted interrupts going to break them.
 
 There is no reason to break them if its OK to receive reinjection info
 from LAPIC... its a matter of returning the information from
 apic_accept_irq, no big deal.
 
 But current PI patches do break them, thats my point. So we either
 need to revise them again, or drop LAPIC timer reinjection. Making
 apic_accept_irq semantics it returns coalescing info, but only sometimes
 is dubious though.
We may rollback to the initial idea: test both irr and pir to get coalescing 
info. In this case, inject LAPIC timer always in vcpu context. So 
apic_accept_irq() will return right coalescing info.
Also, we need to add comments to tell caller, apic_accept_irq() can ensure the 
return value is correct only when caller is in target vcpu context.

Best regards,
Yang


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: MMU: improve n_max_mmu_pages calculation with TDP

2013-03-21 Thread Marcelo Tosatti
On Thu, Mar 21, 2013 at 01:41:59PM +0800, Xiao Guangrong wrote:
 On 03/21/2013 04:14 AM, Marcelo Tosatti wrote:
  
  kvm_mmu_calculate_mmu_pages numbers, 
  
  maximum number of shadow pages = 2% of mapped guest pages
  
  Does not make sense for TDP guests where mapping all of guest
  memory with 4k pages cannot exceed mapped guest pages / 512
  (not counting root pages).
  
  Allow that maximum for TDP, forcing the guest to recycle otherwise.
  
  Signed-off-by: Marcelo Tosatti mtosa...@redhat.com
  
  diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
  index 956ca35..a9694a8d7 100644
  --- a/arch/x86/kvm/mmu.c
  +++ b/arch/x86/kvm/mmu.c
  @@ -4293,7 +4293,7 @@ nomem:
   unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
   {
  unsigned int nr_mmu_pages;
  -   unsigned int  nr_pages = 0;
  +   unsigned int i, nr_pages = 0;
  struct kvm_memslots *slots;
  struct kvm_memory_slot *memslot;
  
  @@ -4302,7 +4302,19 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm 
  *kvm)
  kvm_for_each_memslot(memslot, slots)
  nr_pages += memslot-npages;
  
  -   nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
  +   if (tdp_enabled) {
  +   /* one root page */
  +   nr_mmu_pages = 1;
  +   /* nr_pages / (512^i) per level, due to
  +* guest RAM map being linear */
  +   for (i = 1; i  4; i++) {
  +   int nr_pages_round = nr_pages + (1  (9*i));
  +   nr_mmu_pages += nr_pages_round  (9*i);
  +   }
 
 Marcelo,
 
 Can it work if nested guest is used? Did you see any problem in practice 
 (direct guest
 uses more memory than your calculation)?

Direct guest can use more than the calculation by switching between
different paging modes.

About nested guest: at one point in time the working set cannot exceed 
the number of physical pages visible by the guest.

Allowing an excessively high number of shadow pages is a security
concern, also, as unpreemptable long operations are necessary to tear
down the pages.

 And mmio also can build some page table that looks like not considered
 in this patch.

Right, but its only a few pages. Same argument as above: working set at
one given time is smaller than total RAM. Do you see any potential
problem?

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: allow host header to be included even for !CONFIG_KVM

2013-03-21 Thread Kevin Hilman
Gleb Natapov g...@redhat.com writes:

 On Wed, Mar 20, 2013 at 06:58:41PM -0500, Scott Wood wrote:
 On 03/14/2013 07:13:46 PM, Kevin Hilman wrote:
 The new context tracking subsystem unconditionally includes kvm_host.h
 headers for the guest enter/exit macros.  This causes a compile
 failure when KVM is not enabled.
 
 Fix by adding an IS_ENABLED(CONFIG_KVM) check to kvm_host so it can
 be included/compiled even when KVM is not enabled.
 
 Cc: Frederic Weisbecker fweis...@gmail.com
 Signed-off-by: Kevin Hilman khil...@linaro.org
 ---
 Applies on v3.9-rc2
 
  include/linux/kvm_host.h | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)
 
 This broke the PPC non-KVM build, which was relying on stub
 functions in kvm_ppc.h, which relies on struct vcpu in kvm_host.h.
 
 Why can't the entirety kvm_host.h be included regardless of
 CONFIG_KVM, just like most other feature-specific headers?  Why
 can't the if/else just go around the functions that you want to stub
 out for non-KVM builds?
 
 Kevin,

  What compilation failure this patch fixes? I presume something ARM
 related.

Not specficially ARM related, but more context tracking related since
kernel/context_tracking.c pulls in kvm_host.h, which attempts to pull in
asm/kvm*.h which may not exist on some platforms.

At least for ARM, KVM support was added in v3.9 so this patch can
probably be dropped since the non-KVM builds on ARM now work.  But any
platform without the asm/kvm*.h will still be broken when trying to
build the context tracker.

Kevin
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 6/6] kvm/ppc/mpic: in-kernel MPIC emulation

2013-03-21 Thread Scott Wood

On 03/21/2013 03:28:35 AM, Alexander Graf wrote:


On 14.02.2013, at 06:49, Scott Wood wrote:

 Hook the MPIC code up to the KVM interfaces, add locking, etc.

 TODO: irqfd support

 Signed-off-by: Scott Wood scottw...@freescale.com

Could you please split this patch up on your next respin?


Any particular split you're looking for?

The only reason it's split as much as it is already is to give some  
chance of merging updates from QEMU being less painful.  As far as the  
kernel is concerned, this is new code, which is not functional (and  
thus not built) before this patch.  There aren't meaningful  
intermediate states.



Also please make sure you don't have #if 0'ed code in here.


Well, yeah.  Note the RFC. :-)

-Scott
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 6/6] kvm/ppc/mpic: in-kernel MPIC emulation

2013-03-21 Thread Alexander Graf

On 21.03.2013, at 15:43, Scott Wood wrote:

 On 03/21/2013 03:28:35 AM, Alexander Graf wrote:
 On 14.02.2013, at 06:49, Scott Wood wrote:
  Hook the MPIC code up to the KVM interfaces, add locking, etc.
 
  TODO: irqfd support
 
  Signed-off-by: Scott Wood scottw...@freescale.com
 Could you please split this patch up on your next respin?
 
 Any particular split you're looking for?

Anything that makes reviewing it easier :). I can't concentrate for 100k 
straight.

 The only reason it's split as much as it is already is to give some chance of 
 merging updates from QEMU being less painful.  As far as the kernel is 
 concerned, this is new code, which is not functional (and thus not built) 
 before this patch.  There aren't meaningful intermediate states.
 
 Also please make sure you don't have #if 0'ed code in here.
 
 Well, yeah.  Note the RFC. :-)

Just wanted to make sure you don't forget them when you send out a non-RFC :). 
Not that I'd assume you'd do that ;)


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Virtualbox svga card in KVM

2013-03-21 Thread Alon Levy
 
 
 Hi,
  I am planning on bringing in the virtualbox svga card into kvm
  as a new svga card type (vbox probably?) so that we can load
  the VirtualBox SVGA card drivers in the guest.
 
  Is this even feasible?. Any ideas on where I should start
  looking?

I don't see why it wouldn't, sounds like a great idea. You can look at 
hw/qxl.c, hw/cirrus_vga.c, hw/vmware_vga.c as existing pci vga cards. Also this 
should go on qemu-devel (cc'ed).

 
 Regards,Sriram
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Host kernel crash at pci_find_upstream_pcie_bridge on VM exit

2013-03-21 Thread Alex Williamson

On Thu, 2013-03-21 at 06:55 -0700, Ganesh Narayanaswamy wrote:
 Hi Alex,
 
 Yes. They are PCIe devices which expose the PCIe functionality:
 
 -bash-4.1# lspci -vv -s 04:00
 ….
   Capabilities: [ac] Express (v2) Endpoint, MSI 00
 
 -bash-4.1# lspci -vv -s 03:00
 ….
   Capabilities: [80] Express (v2) Endpoint, MSI 00

Ok, so we're not hitting the obvious problem that
pci_find_upstream_pcie_bridge thinks we're starting at a legacy PCI
device and expects there to be a PCIe-to-PCI bridge.  What about the PLX
switch ports, do they all have express capabilities?  Perhaps you can
provide lspci -vvv for the hierarchy to your FPGA device and just
exclude or obfuscate the FPGA devices themselves if they're somehow too
secret to that we could learn something about them from config space
(unlikely).

Do the FPGA devices support some form of reset, either express FLR, AF
FLR, or do a soft reset on D3hot-D0?  Are there any dmesg entries prior
to the crash?  If KVM attempts to reset the device via a secondary bus
reset on the downstream switch port and that triggers a surprise hotplug
things can get broken fast.  The downstream ports can be unbound from
pciehp if this is the problem.

 Is there any dependency issue here ? Does KVM expect the downstream ports of 
 the PCIe switch also expected to be passed through ? 

No, switch ports and bridges should never be attached to the guest.  Is
there some reason you're using -M q35?  It's still a bit fragile for
device assignment at this point.  Have you tried vfio-pci for doing the
assignment?  Thanks,

Alex

 On Mar 20, 2013, at 7:41 PM, Alex Williamson wrote:
 
  On Tue, 2013-03-19 at 17:09 -0700, Ganesh Narayanaswamy wrote:
  Hi Alex,
  
  Thanks for your reply.  The pci devices in question are proprietary FPGAs. 
   Here is the lspci -tv output:
  
  -bash-4.1# lspci -tv
  -[:00]-+-00.0  Intel Corporation Sandy Bridge DRAM Controller
+-01.0-[01-04]00.0-[02-04]--+-01.0-[03]00.0  Broadcom 
  Corporation Device b850
|   \-02.0-[04]00.0  Broadcom 
  Corporation Device b850
+-01.1-[05]--
+-06.0-[06]--+-00.0  Intel Corporation Device 0434
|+-00.1  Intel Corporation Device 0438
|+-00.2  Intel Corporation Device 0438
|+-00.3  Intel Corporation Device 0436
|\-00.4  Intel Corporation Device 0436
+-1d.0  Intel Corporation Device 2334
+-1f.0  Intel Corporation Device 2310
+-1f.2  Intel Corporation Device 2323
+-1f.3  Intel Corporation Device 2330
+-1f.4  Intel Corporation Device 2331
+-1f.6  Intel Corporation Device 2332
\-1f.7  Intel Corporation Device 2360
  
  My qemu command line is as follows:
  
  qemu-system-x86_64 -M q35 --enable-kvm -m 2048 -nographic -vga std
  -usb -drive file=IMG file,if=none,id=drive-sata-disk0,format=raw
  -device ahci,id=ahci -device
  ide-drive,bus=ahci.0,drive=drive-sata-disk0,id=sata-disk0,bootindex=1
  -device pci-assign,host=04:00.0 -device pci-assign,host=03:00.0
  
  
  The PCIe bridge is a PLX 8613 device:
  
  01:00.0 PCI bridge: PLX Technology, Inc. PEX 8613 12-lane, 3-Port PCI 
  Express Gen 2 (5.0 GT/s) Switch (rev ba)
  02:01.0 PCI bridge: PLX Technology, Inc. PEX 8613 12-lane, 3-Port PCI 
  Express Gen 2 (5.0 GT/s) Switch (rev ba)
  02:02.0 PCI bridge: PLX Technology, Inc. PEX 8613 12-lane, 3-Port PCI 
  Express Gen 2 (5.0 GT/s) Switch (rev ba)
  
  As shown by the lspci -tv output, each of the PCI device being passed
  through is connected to one of the downstream ports of the PLX PCI
  bridge.
  
  Are your FPGAs actually PCIe devices (they must be because they connect
  to a PCIe switch) that do not expose a PCIe capability?  For example,
  lspci -v:
  
  Capabilities: [e0] Express Endpoint, MSI 00
  
  If so, they're in violation of the PCI Express specification and likely
  the cause of this problem.  Thanks,
  
  Alex
  
  On Mar 19, 2013, at 3:28 PM, Alex Williamson wrote:
  
  On Tue, 2013-03-19 at 13:30 -0700, Ganesh Narayanaswamy wrote:
  Hi,
  
  I am running qemu with kvm and VT-d enabled and a couple of PCI
  devices assigned to the guest VM. Both host and guest are running
  linux 2.6 kernel.  
  
  The passthrough works fine, but when I exit the VM, the host kernel
  crashes with the following backtrace:
  
  4[ 5569.836893] Process qemu-system-x86 (pid: 2925, threadinfo 
  8801f5f4, task 88024fa28720)
  0[ 5569.944946] Stack:
  4[ 5569.968845]  8801f5f41aa8 811a45fb 88024f04b680 
  88024f049980
  4[ 5570.057156]  88024f04b680 88024f049988 8801f5f41b08 
  811a6371
  4[ 5570.145470]  8801f5f41ad8 81391045 0246 
  88024f049990
  0[ 5570.233785] Call Trace:
  4[ 5570.262880]  [811a45fb] 
  iommu_detach_dependent_devices+0x25/0x91
  4[ 5570.344958]  

Re: [PATCH 3/3] ARM: EXYNOS5250: Register architected timers

2013-03-21 Thread Alexander Graf

On 14.03.2013, at 20:07, Sergei Shtylyov wrote:

 Hello.
 
 On 14-03-2013 4:59, Alexander Graf wrote:
 
 When running on an exynos 5250 SoC, we don't initialize the architected
 timers. The chip however supports architected timers.
 
 When we don't initialize them, KVM will try to access them and run into
 NULL pointer dereferences attempting to do so.
 
 This patch is really more of a hack than a real fix, but does get me
 working with KVM on Arndale.
 
 Signed-off-by: Alexander Graf ag...@suse.de
 ---
  arch/arm/mach-exynos/mct.c |4 
  1 file changed, 4 insertions(+)
 
 diff --git a/arch/arm/mach-exynos/mct.c b/arch/arm/mach-exynos/mct.c
 index c9d6650..eefb8af 100644
 --- a/arch/arm/mach-exynos/mct.c
 +++ b/arch/arm/mach-exynos/mct.c
 @@ -482,4 +482,8 @@ void __init exynos4_timer_init(void)
  exynos4_timer_resources();
  exynos4_clocksource_init();
  exynos4_clockevent_init();
 +
 +if (soc_is_exynos5250()) {
 +arch_timer_of_register();
 +}
 
   {} not needed here. scripts/checkpatch.pl should probabl;y warn about it.

Yeah, I'd leave it to whoever wants to apply this patch to remove the braces 
:). IMHO it's not worth it to respin just for this.


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM next tree merge onto current Linus master

2013-03-21 Thread Marcelo Tosatti
On Thu, Mar 21, 2013 at 08:49:07AM +0100, Alexander Graf wrote:
 Hi guys,
 
 Just recently this really important patch got pulled into Linus' tree for 3.9:
 
 commit 1674400aaee5b466c595a8fc310488263ce888c7
 Author: Anton Blanchard an...@samba.org
 Date:   Tue Mar 12 01:51:51 2013 +
 
 powerpc: Fix -mcmodel=medium breakage in prom_init.c
 
 Commit 5ac47f7a6efb (powerpc: Relocate prom_init.c on 64bit) made
 prom_init.c position independent by manually relocating its entries
 in the TOC.
 
 We get the address of the TOC entries with the __prom_init_toc_start
 linker symbol. If __prom_init_toc_start ends up as an entry in the
 TOC then we need to add an offset to get the current address. This is
 the case for older toolchains.
 
 On the other hand, if we have a newer toolchain that supports
 -mcmodel=medium then __prom_init_toc_start will be created by a
 relative offset from r2 (the TOC pointer). Since r2 has already been
 relocated, nothing more needs to be done.  Adding an offset in this
 case is wrong and Aaro Koskinen and Alexander Graf have noticed noticed
 G5 and OpenBIOS breakage.
 
 Alan Modra suggested we just use r2 to get at the TOC which is simpler
 and works with both old and new toolchains.
 
 Reported-by: Alexander Graf ag...@suse.de
 Signed-off-by: Anton Blanchard an...@samba.org
 Tested-by: Aaro Koskinen aaro.koski...@iki.fi
 Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org
 
 
 Without that commit, I can not boot my G5, thus I can't run automated tests 
 on it against my queue.
 
 Could you please merge kvm/next against linus/master, so that I can base my 
 trees against that?
 
 
 Alex

Done, will push master as soon as autotest finishes.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net] vhost/net: fix heads usage of ubuf_info

2013-03-21 Thread Ben Hutchings
On Thu, 2013-03-21 at 08:02 +0200, Michael S. Tsirkin wrote:
 On Sun, Mar 17, 2013 at 02:29:55PM -0400, David Miller wrote:
  From: Michael S. Tsirkin m...@redhat.com
  Date: Sun, 17 Mar 2013 14:46:09 +0200
  
   ubuf info allocator uses guest controlled head as an index,
   so a malicious guest could put the same head entry in the ring twice,
   and we will get two callbacks on the same value.
   To fix use upend_idx which is guaranteed to be unique.
   
   Reported-by: Rusty Russell ru...@rustcorp.com.au
   Signed-off-by: Michael S. Tsirkin m...@redhat.com
  
  Applied and queued up for -stable, thanks.
  
  And thankfully you got the stable URL wrong,
 
 Yes I wrote sta...@kernel.org that's what an old copy
 says here:
 https://www.kernel.org/doc/Documentation/stable_kernel_rules.txt
 
 I should have known better than look at it on the 'net.  The top
 'Everything you ever wanted to know about Linux 2.6 -stable releases.'
 is a big hint that it's stale.
 Any idea who maintains this? Better update it or remove it or redirect to git.

Rob Landley maintains it, but he's been having trouble updating it since
all the upload mechanisms were changed on kernel.org.

(My stable maintenance scripts still match the old address, anyway.  Not
sure about Greg's.)

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: x86: Avoid busy loops over uninjectable pending APIC timers

2013-03-21 Thread Gleb Natapov
On Thu, Mar 21, 2013 at 02:27:22PM +, Zhang, Yang Z wrote:
 Gleb Natapov wrote on 2013-03-21:
  On Thu, Mar 21, 2013 at 11:02:24AM -0300, Marcelo Tosatti wrote:
  On Thu, Mar 21, 2013 at 06:54:46AM +0200, Gleb Natapov wrote:
  On Wed, Mar 20, 2013 at 08:19:13PM -0300, Marcelo Tosatti wrote:
  On Wed, Mar 20, 2013 at 11:32:38PM +0200, Gleb Natapov wrote:
  On Wed, Mar 20, 2013 at 05:03:19PM -0300, Marcelo Tosatti wrote:
  On Wed, Mar 20, 2013 at 04:30:33PM -0300, Marcelo Tosatti wrote:
  On Sun, Mar 17, 2013 at 12:47:17PM +0200, Gleb Natapov wrote:
  On Sun, Mar 17, 2013 at 11:45:34AM +0100, Jan Kiszka wrote:
  On 2013-03-17 09:47, Gleb Natapov wrote:
  On Sat, Mar 16, 2013 at 09:49:07PM +0100, Jan Kiszka wrote:
  From: Jan Kiszka jan.kis...@siemens.com
  
  If the guest didn't take the last APIC timer interrupt yet and
  generates another one on top, e.g. via periodic mode, we do
  not block the VCPU even if the guest state is halted. The
  reason is that apic_has_pending_timer continues to return a
  non-zero value.
  
  Fix this busy loop by taking the IRR content for the LVT vector in
  apic_has_pending_timer into account.
  
  Just drop coalescing tacking for lapic interrupt. After posted
  interrupt will be merged __apic_accept_irq() will not longer
  return coalescing information, so the code will be dead anyway.
  
  That requires the RTC decoalescing series to go first to avoid a
  regression, no? Then let's postpone this topic for now.
  
  Yes, but decoalescing will work only for RTC :(
  
  Are you proposing to drop LAPIC interrupt reinjection?
  
  Since timer handling and injection is VCPU-local for LAPIC,
  __apic_accept_irq can (and must) return coalesced information (cannot
  drop LAPIC interrupt reinjection).
  
  Why can't we drop LAPIC interrupt reinjection? Proposed posted
  interrupt patches do not properly check for interrupt coalescing
  even for VCPU-local injection.
  
  --
  Gleb.
  
  Because older Linux guests depend on reinjection for proper timekeeping.
  Which versions? Those without kvmclock? Can we make them use PIT
  instead? Posted interrupts going to break them.
  
  There is no reason to break them if its OK to receive reinjection info
  from LAPIC... its a matter of returning the information from
  apic_accept_irq, no big deal.
  
  But current PI patches do break them, thats my point. So we either
  need to revise them again, or drop LAPIC timer reinjection. Making
  apic_accept_irq semantics it returns coalescing info, but only sometimes
  is dubious though.
 We may rollback to the initial idea: test both irr and pir to get coalescing 
 info. In this case, inject LAPIC timer always in vcpu context. So 
 apic_accept_irq() will return right coalescing info.
 Also, we need to add comments to tell caller, apic_accept_irq() can ensure 
 the return value is correct only when caller is in target vcpu context.
 
We cannot touch irr while vcpu is in non-root operation, so we will have
to pass flag to apic_accept_irq() to let it know that it is called
synchronously. While all this is possible I want to know which guests
exactly will we break if we will not track interrupt coalescing for
lapic timer. If only 2.0 smp kernels will break we can probably drop it.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net] vhost/net: fix heads usage of ubuf_info

2013-03-21 Thread Michael S. Tsirkin
On Thu, Mar 21, 2013 at 04:23:48PM +, Ben Hutchings wrote:
 On Thu, 2013-03-21 at 08:02 +0200, Michael S. Tsirkin wrote:
  On Sun, Mar 17, 2013 at 02:29:55PM -0400, David Miller wrote:
   From: Michael S. Tsirkin m...@redhat.com
   Date: Sun, 17 Mar 2013 14:46:09 +0200
   
ubuf info allocator uses guest controlled head as an index,
so a malicious guest could put the same head entry in the ring twice,
and we will get two callbacks on the same value.
To fix use upend_idx which is guaranteed to be unique.

Reported-by: Rusty Russell ru...@rustcorp.com.au
Signed-off-by: Michael S. Tsirkin m...@redhat.com
   
   Applied and queued up for -stable, thanks.
   
   And thankfully you got the stable URL wrong,
  
  Yes I wrote sta...@kernel.org that's what an old copy
  says here:
  https://www.kernel.org/doc/Documentation/stable_kernel_rules.txt
  
  I should have known better than look at it on the 'net.  The top
  'Everything you ever wanted to know about Linux 2.6 -stable releases.'
  is a big hint that it's stale.
  Any idea who maintains this? Better update it or remove it or redirect to 
  git.
 
 Rob Landley maintains it, but he's been having trouble updating it since
 all the upload mechanisms were changed on kernel.org.
 
 (My stable maintenance scripts still match the old address, anyway.  Not
 sure about Greg's.)
 
 Ben.

I hope you mean it will match both the old and the new address?


 -- 
 Ben Hutchings, Staff Engineer, Solarflare
 Not speaking for my employer; that's the marketing department's job.
 They asked us to note that Solarflare product names are trademarked.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net] vhost/net: fix heads usage of ubuf_info

2013-03-21 Thread Ben Hutchings
On Thu, 2013-03-21 at 18:28 +0200, Michael S. Tsirkin wrote:
 On Thu, Mar 21, 2013 at 04:23:48PM +, Ben Hutchings wrote:
  On Thu, 2013-03-21 at 08:02 +0200, Michael S. Tsirkin wrote:
   On Sun, Mar 17, 2013 at 02:29:55PM -0400, David Miller wrote:
From: Michael S. Tsirkin m...@redhat.com
Date: Sun, 17 Mar 2013 14:46:09 +0200

 ubuf info allocator uses guest controlled head as an index,
 so a malicious guest could put the same head entry in the ring twice,
 and we will get two callbacks on the same value.
 To fix use upend_idx which is guaranteed to be unique.
 
 Reported-by: Rusty Russell ru...@rustcorp.com.au
 Signed-off-by: Michael S. Tsirkin m...@redhat.com

Applied and queued up for -stable, thanks.

And thankfully you got the stable URL wrong,
   
   Yes I wrote sta...@kernel.org that's what an old copy
   says here:
   https://www.kernel.org/doc/Documentation/stable_kernel_rules.txt
   
   I should have known better than look at it on the 'net.  The top
   'Everything you ever wanted to know about Linux 2.6 -stable releases.'
   is a big hint that it's stale.
   Any idea who maintains this? Better update it or remove it or redirect to 
   git.
  
  Rob Landley maintains it, but he's been having trouble updating it since
  all the upload mechanisms were changed on kernel.org.
  
  (My stable maintenance scripts still match the old address, anyway.  Not
  sure about Greg's.)
  
  Ben.
 
 I hope you mean it will match both the old and the new address?

Yes, of course!

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: allow host header to be included even for !CONFIG_KVM

2013-03-21 Thread Scott Wood

On 03/21/2013 09:27:14 AM, Kevin Hilman wrote:

Gleb Natapov g...@redhat.com writes:

 On Wed, Mar 20, 2013 at 06:58:41PM -0500, Scott Wood wrote:
 On 03/14/2013 07:13:46 PM, Kevin Hilman wrote:
 The new context tracking subsystem unconditionally includes  
kvm_host.h

 headers for the guest enter/exit macros.  This causes a compile
 failure when KVM is not enabled.
 
 Fix by adding an IS_ENABLED(CONFIG_KVM) check to kvm_host so it  
can

 be included/compiled even when KVM is not enabled.
 
 Cc: Frederic Weisbecker fweis...@gmail.com
 Signed-off-by: Kevin Hilman khil...@linaro.org
 ---
 Applies on v3.9-rc2
 
  include/linux/kvm_host.h | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

 This broke the PPC non-KVM build, which was relying on stub
 functions in kvm_ppc.h, which relies on struct vcpu in  
kvm_host.h.


 Why can't the entirety kvm_host.h be included regardless of
 CONFIG_KVM, just like most other feature-specific headers?  Why
 can't the if/else just go around the functions that you want to  
stub

 out for non-KVM builds?

 Kevin,

  What compilation failure this patch fixes? I presume something ARM
 related.

Not specficially ARM related, but more context tracking related since
kernel/context_tracking.c pulls in kvm_host.h, which attempts to pull  
in

asm/kvm*.h which may not exist on some platforms.

At least for ARM, KVM support was added in v3.9 so this patch can
probably be dropped since the non-KVM builds on ARM now work.  But any
platform without the asm/kvm*.h will still be broken when trying to
build the context tracker.


Maybe other platforms should get empty asm/kvm*.h files.  Is there  
anything from those files that the linux/kvm*.h headers need to build?


-Scott
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: allow host header to be included even for !CONFIG_KVM

2013-03-21 Thread Gleb Natapov
On Thu, Mar 21, 2013 at 01:42:34PM -0500, Scott Wood wrote:
 On 03/21/2013 09:27:14 AM, Kevin Hilman wrote:
 Gleb Natapov g...@redhat.com writes:
 
  On Wed, Mar 20, 2013 at 06:58:41PM -0500, Scott Wood wrote:
  On 03/14/2013 07:13:46 PM, Kevin Hilman wrote:
  The new context tracking subsystem unconditionally includes
 kvm_host.h
  headers for the guest enter/exit macros.  This causes a compile
  failure when KVM is not enabled.
  
  Fix by adding an IS_ENABLED(CONFIG_KVM) check to kvm_host so
 it can
  be included/compiled even when KVM is not enabled.
  
  Cc: Frederic Weisbecker fweis...@gmail.com
  Signed-off-by: Kevin Hilman khil...@linaro.org
  ---
  Applies on v3.9-rc2
  
   include/linux/kvm_host.h | 7 ++-
   1 file changed, 6 insertions(+), 1 deletion(-)
 
  This broke the PPC non-KVM build, which was relying on stub
  functions in kvm_ppc.h, which relies on struct vcpu in
 kvm_host.h.
 
  Why can't the entirety kvm_host.h be included regardless of
  CONFIG_KVM, just like most other feature-specific headers?  Why
  can't the if/else just go around the functions that you want to
 stub
  out for non-KVM builds?
 
  Kevin,
 
   What compilation failure this patch fixes? I presume something ARM
  related.
 
 Not specficially ARM related, but more context tracking related since
 kernel/context_tracking.c pulls in kvm_host.h, which attempts to
 pull in
 asm/kvm*.h which may not exist on some platforms.
 
 At least for ARM, KVM support was added in v3.9 so this patch can
 probably be dropped since the non-KVM builds on ARM now work.  But any
 platform without the asm/kvm*.h will still be broken when trying to
 build the context tracker.
 
 Maybe other platforms should get empty asm/kvm*.h files.  Is there
 anything from those files that the linux/kvm*.h headers need to
 build?
 
arch things. kvm_vcpu_arch, kvm_arch_memory_slot, kvm_arch etc.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: allow host header to be included even for !CONFIG_KVM

2013-03-21 Thread Scott Wood

On 03/21/2013 02:16:00 PM, Gleb Natapov wrote:

On Thu, Mar 21, 2013 at 01:42:34PM -0500, Scott Wood wrote:
 On 03/21/2013 09:27:14 AM, Kevin Hilman wrote:
 Gleb Natapov g...@redhat.com writes:
 
  On Wed, Mar 20, 2013 at 06:58:41PM -0500, Scott Wood wrote:
  Why can't the entirety kvm_host.h be included regardless of
  CONFIG_KVM, just like most other feature-specific headers?  Why
  can't the if/else just go around the functions that you want to
 stub
  out for non-KVM builds?
 
  Kevin,
 
   What compilation failure this patch fixes? I presume something  
ARM

  related.
 
 Not specficially ARM related, but more context tracking related  
since

 kernel/context_tracking.c pulls in kvm_host.h, which attempts to
 pull in
 asm/kvm*.h which may not exist on some platforms.
 
 At least for ARM, KVM support was added in v3.9 so this patch can
 probably be dropped since the non-KVM builds on ARM now work.  But  
any
 platform without the asm/kvm*.h will still be broken when trying  
to

 build the context tracker.

 Maybe other platforms should get empty asm/kvm*.h files.  Is there
 anything from those files that the linux/kvm*.h headers need to
 build?

arch things. kvm_vcpu_arch, kvm_arch_memory_slot, kvm_arch etc.


Could define them as empty structs.

-Scott
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: x86: Avoid busy loops over uninjectable pending APIC timers

2013-03-21 Thread Marcelo Tosatti
   But current PI patches do break them, thats my point. So we either
   need to revise them again, or drop LAPIC timer reinjection. Making
   apic_accept_irq semantics it returns coalescing info, but only sometimes
   is dubious though.
  We may rollback to the initial idea: test both irr and pir to get 
  coalescing info. In this case, inject LAPIC timer always in vcpu context. 
  So apic_accept_irq() will return right coalescing info.
  Also, we need to add comments to tell caller, apic_accept_irq() can ensure 
  the return value is correct only when caller is in target vcpu context.
  
 We cannot touch irr while vcpu is in non-root operation, so we will have
 to pass flag to apic_accept_irq() to let it know that it is called
 synchronously. While all this is possible I want to know which guests
 exactly will we break if we will not track interrupt coalescing for
 lapic timer. If only 2.0 smp kernels will break we can probably drop it.

RHEL4 / RHEL5 guests.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: x86: Avoid busy loops over uninjectable pending APIC timers

2013-03-21 Thread Gleb Natapov
On Thu, Mar 21, 2013 at 05:51:50PM -0300, Marcelo Tosatti wrote:
But current PI patches do break them, thats my point. So we either
need to revise them again, or drop LAPIC timer reinjection. Making
apic_accept_irq semantics it returns coalescing info, but only 
sometimes
is dubious though.
   We may rollback to the initial idea: test both irr and pir to get 
   coalescing info. In this case, inject LAPIC timer always in vcpu context. 
   So apic_accept_irq() will return right coalescing info.
   Also, we need to add comments to tell caller, apic_accept_irq() can 
   ensure the return value is correct only when caller is in target vcpu 
   context.
   
  We cannot touch irr while vcpu is in non-root operation, so we will have
  to pass flag to apic_accept_irq() to let it know that it is called
  synchronously. While all this is possible I want to know which guests
  exactly will we break if we will not track interrupt coalescing for
  lapic timer. If only 2.0 smp kernels will break we can probably drop it.
 
 RHEL4 / RHEL5 guests.
RHEL5 has kvmclock no? We should not break RHEL4 though.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: allow host header to be included even for !CONFIG_KVM

2013-03-21 Thread Gleb Natapov
On Thu, Mar 21, 2013 at 02:33:13PM -0500, Scott Wood wrote:
 On 03/21/2013 02:16:00 PM, Gleb Natapov wrote:
 On Thu, Mar 21, 2013 at 01:42:34PM -0500, Scott Wood wrote:
  On 03/21/2013 09:27:14 AM, Kevin Hilman wrote:
  Gleb Natapov g...@redhat.com writes:
  
   On Wed, Mar 20, 2013 at 06:58:41PM -0500, Scott Wood wrote:
   Why can't the entirety kvm_host.h be included regardless of
   CONFIG_KVM, just like most other feature-specific headers?  Why
   can't the if/else just go around the functions that you want to
  stub
   out for non-KVM builds?
  
   Kevin,
  
What compilation failure this patch fixes? I presume
 something ARM
   related.
  
  Not specficially ARM related, but more context tracking related
 since
  kernel/context_tracking.c pulls in kvm_host.h, which attempts to
  pull in
  asm/kvm*.h which may not exist on some platforms.
  
  At least for ARM, KVM support was added in v3.9 so this patch can
  probably be dropped since the non-KVM builds on ARM now work.
 But any
  platform without the asm/kvm*.h will still be broken when
 trying to
  build the context tracker.
 
  Maybe other platforms should get empty asm/kvm*.h files.  Is there
  anything from those files that the linux/kvm*.h headers need to
  build?
 
 arch things. kvm_vcpu_arch, kvm_arch_memory_slot, kvm_arch etc.
 
 Could define them as empty structs.
 
Isn't is simpler for kernel/context_tracking.c to define empty
__guest_enter()/__guest_exit() if !CONFIG_KVM.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Bug 55421] igb VF can't work in KVM guest

2013-03-21 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=55421


Alex Williamson alex.william...@redhat.com changed:

   What|Removed |Added

 CC||alex.william...@redhat.com,
   ||jeffrey.t.kirs...@intel.com




--- Comment #4 from Alex Williamson alex.william...@redhat.com  2013-03-21 
21:29:09 ---
Further bisected to:

commit 5ac6f91d39e0884813dc010e14552143cd1d0d8b
Author: Mitch A Williams mitch.a.willi...@intel.com
Date:   Fri Jan 18 08:57:20 2013 +

igb: Don't give VFs random MAC addresses

If the user has not assigned a MAC address to a VM, then don't give it a
random one. Instead, just give it zeros and let it figure out what to do
with them.

Signed-off-by: Mitch Williams mitch.a.willi...@intel.com
CC: Andy Gospodarek a...@greyhouse.net
CC: Stefan Assmann sassm...@kpanic.de
Tested-by: Aaron Brown aaron.f.br...@intel.com
Tested-by: Stefan Assmann sassm...@redhat.com
Signed-off-by: Jeff Kirsher jeffrey.t.kirs...@intel.com

So, for whatever reason we no longer assign a random MAC address when using the
device in a VM (but we do still use one if attached to igbvf in the host).  I
expect we'll eventually see this on all the Intel SR-IOV NICs.  The solution is
to use the ip command to assign the VF a valid MAC address prior to using it
with KVM.  I'll let those who made the change defend it further if they wish.

-- 
Configure bugmail: https://bugzilla.kernel.org/userprefs.cgi?tab=email
--- You are receiving this mail because: ---
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Bug 55421] igb VF can't work in KVM guest

2013-03-21 Thread Alex Williamson
Mitch,

Bugzilla wouldn't let me add you to the CC, so FYI...
Thanks,

Alex

On Thu, 2013-03-21 at 21:29 +, bugzilla-dae...@bugzilla.kernel.org
wrote:
 https://bugzilla.kernel.org/show_bug.cgi?id=55421
 
 
 Alex Williamson alex.william...@redhat.com changed:
 
What|Removed |Added
 
  CC||alex.william...@redhat.com,
||jeffrey.t.kirs...@intel.com
 
 
 
 
 --- Comment #4 from Alex Williamson alex.william...@redhat.com  2013-03-21 
 21:29:09 ---
 Further bisected to:
 
 commit 5ac6f91d39e0884813dc010e14552143cd1d0d8b
 Author: Mitch A Williams mitch.a.willi...@intel.com
 Date:   Fri Jan 18 08:57:20 2013 +
 
 igb: Don't give VFs random MAC addresses
 
 If the user has not assigned a MAC address to a VM, then don't give it a
 random one. Instead, just give it zeros and let it figure out what to do
 with them.
 
 Signed-off-by: Mitch Williams mitch.a.willi...@intel.com
 CC: Andy Gospodarek a...@greyhouse.net
 CC: Stefan Assmann sassm...@kpanic.de
 Tested-by: Aaron Brown aaron.f.br...@intel.com
 Tested-by: Stefan Assmann sassm...@redhat.com
 Signed-off-by: Jeff Kirsher jeffrey.t.kirs...@intel.com
 
 So, for whatever reason we no longer assign a random MAC address when using 
 the
 device in a VM (but we do still use one if attached to igbvf in the host).  I
 expect we'll eventually see this on all the Intel SR-IOV NICs.  The solution 
 is
 to use the ip command to assign the VF a valid MAC address prior to using it
 with KVM.  I'll let those who made the change defend it further if they wish.
 



--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [Bug 55421] igb VF can't work in KVM guest

2013-03-21 Thread Williams, Mitch A
Thanks for the FYI, Alex. When we pushed this patch upstream, there was a 
corresponding patch to igbvf that was pushed at the same time. With this patch, 
the VFs detect the zero address and generate a random MAC address themselves. 
The reason for this change is to make the VFs play nicely with udev. The issue 
was originally raised by Stefan Assmann.

With older drivers in the guest, this will cause the issue you noted. Distros 
should cherry-pick the corresponding VF patch.

-Mitch

 -Original Message-
 From: Alex Williamson [mailto:alex.william...@redhat.com]
 Sent: Thursday, March 21, 2013 2:32 PM
 To: Williams, Mitch A
 Cc: kvm@vger.kernel.org
 Subject: Re: [Bug 55421] igb VF can't work in KVM guest
 
 Mitch,
 
 Bugzilla wouldn't let me add you to the CC, so FYI...
 Thanks,
 
 Alex
 
 On Thu, 2013-03-21 at 21:29 +, bugzilla-dae...@bugzilla.kernel.org
 wrote:
  https://bugzilla.kernel.org/show_bug.cgi?id=55421
 
 
  Alex Williamson alex.william...@redhat.com changed:
 
 What|Removed |Added
  --
 --
   CC|
 |alex.william...@redhat.com,
 |
 |jeffrey.t.kirs...@intel.com
 
 
 
 
  --- Comment #4 from Alex Williamson alex.william...@redhat.com  2013-03-
 21 21:29:09 ---
  Further bisected to:
 
  commit 5ac6f91d39e0884813dc010e14552143cd1d0d8b
  Author: Mitch A Williams mitch.a.willi...@intel.com
  Date:   Fri Jan 18 08:57:20 2013 +
 
  igb: Don't give VFs random MAC addresses
 
  If the user has not assigned a MAC address to a VM, then don't give it
 a
  random one. Instead, just give it zeros and let it figure out what to
 do
  with them.
 
  Signed-off-by: Mitch Williams mitch.a.willi...@intel.com
  CC: Andy Gospodarek a...@greyhouse.net
  CC: Stefan Assmann sassm...@kpanic.de
  Tested-by: Aaron Brown aaron.f.br...@intel.com
  Tested-by: Stefan Assmann sassm...@redhat.com
  Signed-off-by: Jeff Kirsher jeffrey.t.kirs...@intel.com
 
  So, for whatever reason we no longer assign a random MAC address when
 using the
  device in a VM (but we do still use one if attached to igbvf in the host).
 I
  expect we'll eventually see this on all the Intel SR-IOV NICs.  The
 solution is
  to use the ip command to assign the VF a valid MAC address prior to using
 it
  with KVM.  I'll let those who made the change defend it further if they
 wish.
 
 
 

N�r��yb�X��ǧv�^�)޺{.n�+h����ܨ}���Ơz�j:+v���zZ+��+zf���h���~i���z��w���?��)ߢf

Re: [Bug 55421] igb VF can't work in KVM guest

2013-03-21 Thread Alex Williamson
On Thu, 2013-03-21 at 22:03 +, Williams, Mitch A wrote:
 Thanks for the FYI, Alex. When we pushed this patch upstream, there
 was a corresponding patch to igbvf that was pushed at the same time.
 With this patch, the VFs detect the zero address and generate a random
 MAC address themselves. The reason for this change is to make the VFs
 play nicely with udev. The issue was originally raised by Stefan
 Assmann.
 
 With older drivers in the guest, this will cause the issue you noted.
 Distros should cherry-pick the corresponding VF patch.

I agree the random MAC doesn't play nicely with udev, although I have
found that my guest stops adding new persistent net rules after
eth680 ;)  Arguably anyone using VFs for more than testing should
already be assigning a stable MAC address, but I don't really think a
guest driver update across every possible guest OS is all that
practical.  At least that explains why igbvf in the host is unaffected
though.  Should we expect a Code 10 (device cannot start) from the
Windows igbvf driver when it finds a zero'd MAC?  Thanks,

Alex

  -Original Message-
  From: Alex Williamson [mailto:alex.william...@redhat.com]
  Sent: Thursday, March 21, 2013 2:32 PM
  To: Williams, Mitch A
  Cc: kvm@vger.kernel.org
  Subject: Re: [Bug 55421] igb VF can't work in KVM guest
  
  Mitch,
  
  Bugzilla wouldn't let me add you to the CC, so FYI...
  Thanks,
  
  Alex
  
  On Thu, 2013-03-21 at 21:29 +, bugzilla-dae...@bugzilla.kernel.org
  wrote:
   https://bugzilla.kernel.org/show_bug.cgi?id=55421
  
  
   Alex Williamson alex.william...@redhat.com changed:
  
  What|Removed |Added
   --
  --
CC|
  |alex.william...@redhat.com,
  |
  |jeffrey.t.kirs...@intel.com
  
  
  
  
   --- Comment #4 from Alex Williamson alex.william...@redhat.com  2013-03-
  21 21:29:09 ---
   Further bisected to:
  
   commit 5ac6f91d39e0884813dc010e14552143cd1d0d8b
   Author: Mitch A Williams mitch.a.willi...@intel.com
   Date:   Fri Jan 18 08:57:20 2013 +
  
   igb: Don't give VFs random MAC addresses
  
   If the user has not assigned a MAC address to a VM, then don't give it
  a
   random one. Instead, just give it zeros and let it figure out what to
  do
   with them.
  
   Signed-off-by: Mitch Williams mitch.a.willi...@intel.com
   CC: Andy Gospodarek a...@greyhouse.net
   CC: Stefan Assmann sassm...@kpanic.de
   Tested-by: Aaron Brown aaron.f.br...@intel.com
   Tested-by: Stefan Assmann sassm...@redhat.com
   Signed-off-by: Jeff Kirsher jeffrey.t.kirs...@intel.com
  
   So, for whatever reason we no longer assign a random MAC address when
  using the
   device in a VM (but we do still use one if attached to igbvf in the host).
  I
   expect we'll eventually see this on all the Intel SR-IOV NICs.  The
  solution is
   to use the ip command to assign the VF a valid MAC address prior to using
  it
   with KVM.  I'll let those who made the change defend it further if they
  wish.
  
  

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 0/7] KVM: MMU: fast zap all shadow pages

2013-03-21 Thread Marcelo Tosatti
On Wed, Mar 20, 2013 at 04:30:20PM +0800, Xiao Guangrong wrote:
 Changlog:
 V2:
   - do not reset n_requested_mmu_pages and n_max_mmu_pages
   - batch free root shadow pages to reduce vcpu notification and mmu-lock
 contention
   - remove the first patch that introduce kvm-arch.mmu_cache since we only
 'memset zero' on hashtable rather than all mmu cache members in this
 version
   - remove unnecessary kvm_reload_remote_mmus after kvm_mmu_zap_all
 
 * Issue
 The current kvm_mmu_zap_all is really slow - it is holding mmu-lock to
 walk and zap all shadow pages one by one, also it need to zap all guest
 page's rmap and all shadow page's parent spte list. Particularly, things
 become worse if guest uses more memory or vcpus. It is not good for
 scalability.

Xiao, 

The bulk removal of shadow pages from mmu cache is nerving - it creates
two codepaths to delete a data structure: the usual, single entry one
and the bulk one.

There are two main usecases for kvm_mmu_zap_all(): to invalidate the
current mmu tree (from kvm_set_memory) and to tear down all pages
(VM shutdown).

The first usecase can use your idea of an invalid generation number
on shadow pages. That is, increment the VM generation number, nuke the root
pages and thats it. 

The modifications should be contained to kvm_mmu_get_page() mostly,
correct? (would also have to keep counters to increase SLAB freeing 
ratio, relative to number of outdated shadow pages).

And then have codepaths that nuke shadow pages break from the spinlock,
such as kvm_mmu_slot_remove_write_access does now (spin_needbreak).
That would also solve the current issues without using more memory 
for pte_list_desc and without the delicate Reset MMU cache step.

What you think?

 * Idea
 Since all shadow page will be zapped, we can directly zap the mmu-cache
 and rmap so that vcpu will fault on the new mmu-cache, after that, we can
 directly free the memory used by old mmu-cache.
 
 The root shadow page is little especial since they are currently used by
 vcpus, we can not directly free them. So, we zap the root shadow pages and
 re-add them into the new mmu-cache.
 
 * TODO
 (1): free root shadow pages by using generation-number
 (2): drop unnecessary @npages from kvm_arch_create_memslot
 
 * Performance
 The testcase can be found at:
 http://www.gossamer-threads.com/lists/engine?do=post_attachment;postatt_id=54896;list=linux
 is used to measure the time of delete / add memslot. At that time, all vcpus
 are waiting, that means, no mmu-lock contention. I believe the result be more
 beautiful if other vcpus and mmu notification need to hold the mmu-lock.
 
 Guest VCPU:6, Mem:2048M
 
 before: Run 10 times, Avg time:46078825 ns.
 
 after: Run 10 times, Avg time:21558774 ns. (+ 113%)
 
 Xiao Guangrong (7):
   KVM: MMU: introduce mmu_cache-pte_list_descs
   KVM: x86: introduce memslot_set_lpage_disallowed
   KVM: x86: introduce kvm_clear_all_gfn_page_info
   KVM: MMU: delete shadow page from hash list in
 kvm_mmu_prepare_zap_page
   KVM: MMU: split kvm_mmu_prepare_zap_page
   KVM: MMU: fast zap all shadow pages
   KVM: MMU: drop unnecessary kvm_reload_remote_mmus after
 kvm_mmu_zap_all
 
  arch/x86/include/asm/kvm_host.h |7 ++-
  arch/x86/kvm/mmu.c  |  105 
 ++-
  arch/x86/kvm/mmu.h  |1 +
  arch/x86/kvm/x86.c  |   87 +---
  include/linux/kvm_host.h|1 +
  5 files changed, 166 insertions(+), 35 deletions(-)
 
 -- 
 1.7.7.6
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/2] KVM: MMU: Make the meaning of kvm_mmu_free_some_pages() clearer

2013-03-21 Thread Marcelo Tosatti
On Thu, Mar 21, 2013 at 07:32:56PM +0900, Takuya Yoshikawa wrote:
 Takuya Yoshikawa (2):
   KVM: MMU: Move kvm_mmu_free_some_pages() into kvm_mmu_alloc_page()
   KVM: MMU: Rename kvm_mmu_free_some_pages() to make_mmu_pages_available()
 
  arch/x86/kvm/mmu.c |   16 +---
  arch/x86/kvm/mmu.h |6 --
  arch/x86/kvm/paging_tmpl.h |1 -
  3 files changed, 9 insertions(+), 14 deletions(-)

Applied, thanks.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: x86: Avoid busy loops over uninjectable pending APIC timers

2013-03-21 Thread Marcelo Tosatti
On Thu, Mar 21, 2013 at 11:13:39PM +0200, Gleb Natapov wrote:
 On Thu, Mar 21, 2013 at 05:51:50PM -0300, Marcelo Tosatti wrote:
 But current PI patches do break them, thats my point. So we either
 need to revise them again, or drop LAPIC timer reinjection. Making
 apic_accept_irq semantics it returns coalescing info, but only 
 sometimes
 is dubious though.
We may rollback to the initial idea: test both irr and pir to get 
coalescing info. In this case, inject LAPIC timer always in vcpu 
context. So apic_accept_irq() will return right coalescing info.
Also, we need to add comments to tell caller, apic_accept_irq() can 
ensure the return value is correct only when caller is in target vcpu 
context.

   We cannot touch irr while vcpu is in non-root operation, so we will have
   to pass flag to apic_accept_irq() to let it know that it is called
   synchronously. While all this is possible I want to know which guests
   exactly will we break if we will not track interrupt coalescing for
   lapic timer. If only 2.0 smp kernels will break we can probably drop it.
  
  RHEL4 / RHEL5 guests.
 RHEL5 has kvmclock no? We should not break RHEL4 though.

kvmclock provides no timer interrupt... either LAPIC or PIT must be used
with kvmclock.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: allow host header to be included even for !CONFIG_KVM

2013-03-21 Thread Kevin Hilman
Gleb Natapov g...@redhat.com writes:

 On Thu, Mar 21, 2013 at 02:33:13PM -0500, Scott Wood wrote:
 On 03/21/2013 02:16:00 PM, Gleb Natapov wrote:
 On Thu, Mar 21, 2013 at 01:42:34PM -0500, Scott Wood wrote:
  On 03/21/2013 09:27:14 AM, Kevin Hilman wrote:
  Gleb Natapov g...@redhat.com writes:
  
   On Wed, Mar 20, 2013 at 06:58:41PM -0500, Scott Wood wrote:
   Why can't the entirety kvm_host.h be included regardless of
   CONFIG_KVM, just like most other feature-specific headers?  Why
   can't the if/else just go around the functions that you want to
  stub
   out for non-KVM builds?
  
   Kevin,
  
What compilation failure this patch fixes? I presume
 something ARM
   related.
  
  Not specficially ARM related, but more context tracking related
 since
  kernel/context_tracking.c pulls in kvm_host.h, which attempts to
  pull in
  asm/kvm*.h which may not exist on some platforms.
  
  At least for ARM, KVM support was added in v3.9 so this patch can
  probably be dropped since the non-KVM builds on ARM now work.
 But any
  platform without the asm/kvm*.h will still be broken when
 trying to
  build the context tracker.
 
  Maybe other platforms should get empty asm/kvm*.h files.  Is there
  anything from those files that the linux/kvm*.h headers need to
  build?
 
 arch things. kvm_vcpu_arch, kvm_arch_memory_slot, kvm_arch etc.
 
 Could define them as empty structs.
 
 Isn't is simpler for kernel/context_tracking.c to define empty
 __guest_enter()/__guest_exit() if !CONFIG_KVM.

I proposed something like that in an earlier version but Frederic asked
me to propose a fix to the KVM headers instead.

Just in case fixing the context tracking subsystem is preferred, 
the patch below fixes the problem also.

Kevin

From f22995a262144d0d61705fa72134694d911283eb Mon Sep 17 00:00:00 2001
From: Kevin Hilman khil...@linaro.org
Date: Thu, 21 Mar 2013 16:57:14 -0700
Subject: [PATCH] context_tracking: fix !CONFIG_KVM compile: add stub guest
 enter/exit

When KVM is not enabled, or not available on a platform, the KVM
headers should not be included.  Instead, just define stub
__guest_[enter|exit] functions.

Cc: Frederic Weisbecker fweis...@gmail.com
Signed-off-by: Kevin Hilman khil...@linaro.org
---
 kernel/context_tracking.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 65349f0..64b0f80 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -15,12 +15,18 @@
  */
 
 #include linux/context_tracking.h
-#include linux/kvm_host.h
 #include linux/rcupdate.h
 #include linux/sched.h
 #include linux/hardirq.h
 #include linux/export.h
 
+#if IS_ENABLED(CONFIG_KVM)
+#include linux/kvm_host.h
+#else
+#define __guest_enter()
+#define __guest_exit()
+#endif
+
 DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
 #ifdef CONFIG_CONTEXT_TRACKING_FORCE
.active = true,
-- 
1.8.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH] KVM: x86: Avoid busy loops over uninjectable pending APIC timers

2013-03-21 Thread Zhang, Yang Z
Marcelo Tosatti wrote on 2013-03-22:
 On Thu, Mar 21, 2013 at 11:13:39PM +0200, Gleb Natapov wrote:
 On Thu, Mar 21, 2013 at 05:51:50PM -0300, Marcelo Tosatti wrote:
 But current PI patches do break them, thats my point. So we either
 need to revise them again, or drop LAPIC timer reinjection. Making
 apic_accept_irq semantics it returns coalescing info, but only
 sometimes is dubious though.
 We may rollback to the initial idea: test both irr and pir to get 
 coalescing
 info. In this case, inject LAPIC timer always in vcpu context. So 
 apic_accept_irq()
 will return right coalescing info.
 Also, we need to add comments to tell caller, apic_accept_irq() can
 ensure the return value is correct only when caller is in target
 vcpu context.
 
 We cannot touch irr while vcpu is in non-root operation, so we will have
 to pass flag to apic_accept_irq() to let it know that it is called
 synchronously. While all this is possible I want to know which guests
 exactly will we break if we will not track interrupt coalescing for
 lapic timer. If only 2.0 smp kernels will break we can probably drop it.
 
 RHEL4 / RHEL5 guests.
 RHEL5 has kvmclock no? We should not break RHEL4 though.
 
 kvmclock provides no timer interrupt... either LAPIC or PIT must be used
 with kvmclock.
Ok, Here is the conclusion: 
-- According Marcelo's comments, RHEL4/RHEL5 rely on precise LAPIC timer 
injection. So LAPIC timer injection logic is necessary.
--LAPIC timer injection always occurred in vcpu context, so it's safe to touch 
irr and pir for LAPIC timer injection.
--We cannot touch virtual apic page while vcpu is in non-root operation, so the 
best solution is pass a flag to apic_accept_irq and check whether it's safe to 
touch vIRR according this flag.

Right?

Best regards,
Yang

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 0/7] KVM: MMU: fast zap all shadow pages

2013-03-21 Thread Xiao Guangrong
On 03/22/2013 06:21 AM, Marcelo Tosatti wrote:
 On Wed, Mar 20, 2013 at 04:30:20PM +0800, Xiao Guangrong wrote:
 Changlog:
 V2:
   - do not reset n_requested_mmu_pages and n_max_mmu_pages
   - batch free root shadow pages to reduce vcpu notification and mmu-lock
 contention
   - remove the first patch that introduce kvm-arch.mmu_cache since we only
 'memset zero' on hashtable rather than all mmu cache members in this
 version
   - remove unnecessary kvm_reload_remote_mmus after kvm_mmu_zap_all

 * Issue
 The current kvm_mmu_zap_all is really slow - it is holding mmu-lock to
 walk and zap all shadow pages one by one, also it need to zap all guest
 page's rmap and all shadow page's parent spte list. Particularly, things
 become worse if guest uses more memory or vcpus. It is not good for
 scalability.
 
 Xiao, 
 
 The bulk removal of shadow pages from mmu cache is nerving - it creates
 two codepaths to delete a data structure: the usual, single entry one
 and the bulk one.
 
 There are two main usecases for kvm_mmu_zap_all(): to invalidate the
 current mmu tree (from kvm_set_memory) and to tear down all pages
 (VM shutdown).
 
 The first usecase can use your idea of an invalid generation number
 on shadow pages. That is, increment the VM generation number, nuke the root
 pages and thats it. 
 
 The modifications should be contained to kvm_mmu_get_page() mostly,
 correct? (would also have to keep counters to increase SLAB freeing 
 ratio, relative to number of outdated shadow pages).

Yes.

 
 And then have codepaths that nuke shadow pages break from the spinlock,

I think this is not needed any more. We can let mmu_notify use the generation
number to invalid all shadow pages, then we only need to free them after
all vcpus down and mmu_notify unregistered - at this point, no lock contention,
we can directly free them.

 such as kvm_mmu_slot_remove_write_access does now (spin_needbreak).

BTW, to my honest, i do not think spin_needbreak is a good way - it does
not fix the hot-lock contention and it just occupies more cpu time to avoid
possible soft lock-ups.

Especially, zap-all-shadow-pages can let other vcpus fault and vcpus contest
mmu-lock, then zap-all-shadow-pages release mmu-lock and wait, other vcpus
create page tables again. zap-all-shadow-page need long time to be finished,
the worst case is, it can not completed forever on intensive vcpu and memory
usage.

I still think the right way to fix this kind of thing is optimization for
mmu-lock.

 That would also solve the current issues without using more memory 
 for pte_list_desc and without the delicate Reset MMU cache step.
 
 What you think?

I agree your point, Marcelo! I will redesign it. Thank you!

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 4/7] KVM: MMU: delete shadow page from hash list in kvm_mmu_prepare_zap_page

2013-03-21 Thread Xiao Guangrong
On 03/21/2013 09:14 PM, Gleb Natapov wrote:
 On Wed, Mar 20, 2013 at 04:30:24PM +0800, Xiao Guangrong wrote:
 Move deletion shadow page from the hash list from kvm_mmu_commit_zap_page to
 kvm_mmu_prepare_zap_page, we that we can free the shadow page out of 
 mmu-lock.

 Also, delete the invalid shadow page from the hash list since this page can
 not be reused anymore. This makes reset mmu-cache more easier - we do not 
 need
 to care all hash entries after reset mmu-cache

 Signed-off-by: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com
 ---
  arch/x86/kvm/mmu.c |8 ++--
  1 files changed, 6 insertions(+), 2 deletions(-)

 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
 index dc37512..5578c91 100644
 --- a/arch/x86/kvm/mmu.c
 +++ b/arch/x86/kvm/mmu.c
 @@ -1472,7 +1472,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm 
 *kvm, int nr)
  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
  {
  ASSERT(is_empty_shadow_page(sp-spt));
 -hlist_del(sp-hash_link);
 +
  list_del(sp-link);
  free_page((unsigned long)sp-spt);
  if (!sp-role.direct)
 @@ -1660,7 +1660,8 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
  
  #define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) 
 \
  for_each_gfn_sp(_kvm, _sp, _gfn)\
 -if ((_sp)-role.direct || (_sp)-role.invalid) {} else
 +if ((_sp)-role.direct ||   \
 +  ((_sp)-role.invalid  WARN_ON(1))) {} else
  
  /* @sp-gfn should be write-protected at the call site */
  static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 @@ -2079,6 +2080,9 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, 
 struct kvm_mmu_page *sp,
  unaccount_shadowed(kvm, sp-gfn);
  if (sp-unsync)
  kvm_unlink_unsync_page(kvm, sp);
 +
 +hlist_del_init(sp-hash_link);
 +
 Now we delete roots from hash, but leave it on active_mmu_pages list. Is
 this OK?

It is okay i think. Hash-lish is only used to find gfn's shadow page. Invalid 
shadow page
does not contain any useful guest content and will be freed soon after vcpu 
reload.

IIRC, we did it when we used rcu to free shadow pages.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/2] tcm_vhost endpoint

2013-03-21 Thread Asias He
Asias He (2):
  tcm_vhost: Use vq-private_data to indicate if the endpoint is setup
  tcm_vhost: Initialize vq-last_used_idx when set endpoint

 drivers/vhost/tcm_vhost.c | 47 +--
 1 file changed, 41 insertions(+), 6 deletions(-)

-- 
1.8.1.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] tcm_vhost: Use vq-private_data to indicate if the endpoint is setup

2013-03-21 Thread Asias He
Currently, vs-vs_endpoint is used indicate if the endpoint is setup or
not. It is set or cleared in vhost_scsi_set_endpoint() or
vhost_scsi_clear_endpoint() under the vs-dev.mutex lock. However, when
we check it in vhost_scsi_handle_vq(), we ignored the lock.

Instead of using the vs-vs_endpoint and the vs-dev.mutex lock to
indicate the status of the endpoint, we use per virtqueue
vq-private_data to indicate it. In this way, we can only take the
vq-mutex lock which is per queue and make the concurrent multiqueue
process having less lock contention. Further, in the read side of
vq-private_data, we can even do not take only lock if it is accessed in
the vhost worker thread, because it is protected by vhost rcu.

Signed-off-by: Asias He as...@redhat.com
---
 drivers/vhost/tcm_vhost.c | 46 --
 1 file changed, 40 insertions(+), 6 deletions(-)

diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
index 43fb11e..099feef 100644
--- a/drivers/vhost/tcm_vhost.c
+++ b/drivers/vhost/tcm_vhost.c
@@ -67,7 +67,6 @@ struct vhost_scsi {
/* Protected by vhost_scsi-dev.mutex */
struct tcm_vhost_tpg *vs_tpg[VHOST_SCSI_MAX_TARGET];
char vs_vhost_wwpn[TRANSPORT_IQN_LEN];
-   bool vs_endpoint;
 
struct vhost_dev dev;
struct vhost_virtqueue vqs[VHOST_SCSI_MAX_VQ];
@@ -91,6 +90,24 @@ static int iov_num_pages(struct iovec *iov)
   ((unsigned long)iov-iov_base  PAGE_MASK))  PAGE_SHIFT;
 }
 
+static bool tcm_vhost_check_endpoint(struct vhost_virtqueue *vq)
+{
+   bool ret = false;
+
+   /*
+* We can handle the vq only after the endpoint is setup by calling the
+* VHOST_SCSI_SET_ENDPOINT ioctl.
+*
+* TODO: Check that we are running from vhost_worker which acts
+* as read-side critical section for vhost kind of RCU.
+* See the comments in struct vhost_virtqueue in drivers/vhost/vhost.h
+*/
+   if (rcu_dereference_check(vq-private_data, 1))
+   ret = true;
+
+   return ret;
+}
+
 static int tcm_vhost_check_true(struct se_portal_group *se_tpg)
 {
return 1;
@@ -581,8 +598,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs,
int head, ret;
u8 target;
 
-   /* Must use ioctl VHOST_SCSI_SET_ENDPOINT */
-   if (unlikely(!vs-vs_endpoint))
+   if (!tcm_vhost_check_endpoint(vq))
return;
 
mutex_lock(vq-mutex);
@@ -781,8 +797,9 @@ static int vhost_scsi_set_endpoint(
 {
struct tcm_vhost_tport *tv_tport;
struct tcm_vhost_tpg *tv_tpg;
+   struct vhost_virtqueue *vq;
bool match = false;
-   int index, ret;
+   int index, ret, i;
 
mutex_lock(vs-dev.mutex);
/* Verify that ring has been setup correctly. */
@@ -826,7 +843,13 @@ static int vhost_scsi_set_endpoint(
if (match) {
memcpy(vs-vs_vhost_wwpn, t-vhost_wwpn,
   sizeof(vs-vs_vhost_wwpn));
-   vs-vs_endpoint = true;
+   for (i = 0; i  VHOST_SCSI_MAX_VQ; i++) {
+   vq = vs-vqs[i];
+   /* Flushing the vhost_work acts as synchronize_rcu */
+   mutex_lock(vq-mutex);
+   rcu_assign_pointer(vq-private_data, vs);
+   mutex_unlock(vq-mutex);
+   }
ret = 0;
} else {
ret = -EEXIST;
@@ -842,6 +865,8 @@ static int vhost_scsi_clear_endpoint(
 {
struct tcm_vhost_tport *tv_tport;
struct tcm_vhost_tpg *tv_tpg;
+   struct vhost_virtqueue *vq;
+   bool match = false;
int index, ret, i;
u8 target;
 
@@ -877,9 +902,18 @@ static int vhost_scsi_clear_endpoint(
}
tv_tpg-tv_tpg_vhost_count--;
vs-vs_tpg[target] = NULL;
-   vs-vs_endpoint = false;
+   match = true;
mutex_unlock(tv_tpg-tv_tpg_mutex);
}
+   if (match) {
+   for (i = 0; i  VHOST_SCSI_MAX_VQ; i++) {
+   vq = vs-vqs[i];
+   /* Flushing the vhost_work acts as synchronize_rcu */
+   mutex_lock(vq-mutex);
+   rcu_assign_pointer(vq-private_data, NULL);
+   mutex_unlock(vq-mutex);
+   }
+   }
mutex_unlock(vs-dev.mutex);
return 0;
 
-- 
1.8.1.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] tcm_vhost: Initialize vq-last_used_idx when set endpoint

2013-03-21 Thread Asias He
This patch fixes guest hang when booting seabios and guest.

  [0.576238] scsi0 : Virtio SCSI HBA
  [0.616754] virtio_scsi virtio1: request:id 0 is not a head!

vq-last_used_idx is initialized only when /dev/vhost-scsi is
opened or closed.

   vhost_scsi_open - vhost_dev_init() - vhost_vq_reset()
   vhost_scsi_release() - vhost_dev_cleanup - vhost_vq_reset()

So, when guest talks to tcm_vhost after seabios does, vq-last_used_idx
still contains the old valule for seabios. This confuses guest.

Fix this by calling vhost_init_used() to init vq-last_used_idx when
we set endpoint.

Signed-off-by: Asias He as...@redhat.com
---
 drivers/vhost/tcm_vhost.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
index 099feef..0524267 100644
--- a/drivers/vhost/tcm_vhost.c
+++ b/drivers/vhost/tcm_vhost.c
@@ -848,6 +848,7 @@ static int vhost_scsi_set_endpoint(
/* Flushing the vhost_work acts as synchronize_rcu */
mutex_lock(vq-mutex);
rcu_assign_pointer(vq-private_data, vs);
+   vhost_init_used(vq);
mutex_unlock(vq-mutex);
}
ret = 0;
-- 
1.8.1.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: MMU: improve n_max_mmu_pages calculation with TDP

2013-03-21 Thread Xiao Guangrong
On 03/21/2013 10:29 PM, Marcelo Tosatti wrote:
 On Thu, Mar 21, 2013 at 01:41:59PM +0800, Xiao Guangrong wrote:
 On 03/21/2013 04:14 AM, Marcelo Tosatti wrote:

 kvm_mmu_calculate_mmu_pages numbers, 

 maximum number of shadow pages = 2% of mapped guest pages

 Does not make sense for TDP guests where mapping all of guest
 memory with 4k pages cannot exceed mapped guest pages / 512
 (not counting root pages).

 Allow that maximum for TDP, forcing the guest to recycle otherwise.

 Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
 index 956ca35..a9694a8d7 100644
 --- a/arch/x86/kvm/mmu.c
 +++ b/arch/x86/kvm/mmu.c
 @@ -4293,7 +4293,7 @@ nomem:
  unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
  {
 unsigned int nr_mmu_pages;
 -   unsigned int  nr_pages = 0;
 +   unsigned int i, nr_pages = 0;
 struct kvm_memslots *slots;
 struct kvm_memory_slot *memslot;

 @@ -4302,7 +4302,19 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm 
 *kvm)
 kvm_for_each_memslot(memslot, slots)
 nr_pages += memslot-npages;

 -   nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
 +   if (tdp_enabled) {
 +   /* one root page */
 +   nr_mmu_pages = 1;
 +   /* nr_pages / (512^i) per level, due to
 +* guest RAM map being linear */
 +   for (i = 1; i  4; i++) {
 +   int nr_pages_round = nr_pages + (1  (9*i));
 +   nr_mmu_pages += nr_pages_round  (9*i);
 +   }

 Marcelo,

 Can it work if nested guest is used? Did you see any problem in practice 
 (direct guest
 uses more memory than your calculation)?
 
 Direct guest can use more than the calculation by switching between
 different paging modes.

I mean guest runs on hardmmu (tdp is used but no nested guest). Its only
use one page table and seems can not use more memory than your calculation
(except some mmio page tables).

So, you calculation is only used to limit memory used if tdp + nested guest?

 
 About nested guest: at one point in time the working set cannot exceed 
 the number of physical pages visible by the guest.

But it can cause lots of #PF, it is the nightmare for performance, no?

 
 Allowing an excessively high number of shadow pages is a security

The security concern means optimization memory usage? Or something else?

 concern, also, as unpreemptable long operations are necessary to tear
 down the pages.

You mean limiting the shadow pages to let some patch run faster like
remove-write-access and zap-all-sp etc.? If yes, we can directly optimize
for these paths, this is more effective i think.

 
 And mmio also can build some page table that looks like not considered
 in this patch.
 
 Right, but its only a few pages. Same argument as above: working set at
 one given time is smaller than total RAM. Do you see any potential
 problem?

Marcelo, I just confused whether the limitation is reasonable, as i said,
the limitation is not effective enough on hardmmu-only guest (no nested).
and it seems too low for nested guests.


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 0/6] Use eoi to track RTC interrupt delivery status

2013-03-21 Thread Yang Zhang
From: Yang Zhang yang.z.zh...@intel.com

Current interrupt coalescing logci which only used by RTC has conflict
with Posted Interrupt.

This patch introduces a new mechinism to use eoi to track interrupt:
When delivering an interrupt to vcpu, the pending_eoi set to number of
vcpu that received the interrupt. And decrease it when each vcpu writing
eoi. No subsequent RTC interrupt can deliver to vcpu until all vcpus
write eoi.

Changes from v5 to v6
* Move set dest_map logic into __apic_accept_irq().
* Use RTC_GSI to distinguish different platform, and drop all CONFIG_X86.
* Rebase on top of KVM.

Changes from v4 to v5
* Calculate destination vcpu on interrupt injection not hook into ioapic
  modification.
* Rebase on top of KVM.

Changes from v3 to v4
* Call kvm_apic_match_dest() to check destination vcpu.
* Update RTC interrrupt's destination vcpu map when ioapic entry of RTC
  or apic register (id, ldr, dfr) is changed.

Changes from v2 to v3:
* Remove unused viarable irq_ack_notifier.
* Acquire ioapic-lock before calculte destination vcpu map.
* Copy vcpu_map to expected_eoi_timap on each RTC irq and clear it on eoi.

Yang Zhang (6):
  KVM: Add vcpu info to ioapic_update_eoi()
  KVM: Introduce struct rtc_status
  KVM : Return destination vcpu on interrupt injection
  KVM: Add reset/restore rtc_status support
  KVM : Force vmexit with virtual interrupt delivery
  KVM: Use eoi to track RTC interrupt delivery status

 arch/x86/kvm/lapic.c |   35 +++--
 arch/x86/kvm/lapic.h |7 +++-
 virt/kvm/ioapic.c|   82 +-
 virt/kvm/ioapic.h|   17 +-
 virt/kvm/irq_comm.c  |   12 
 5 files changed, 125 insertions(+), 28 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 1/6] KVM: Add vcpu info to ioapic_update_eoi()

2013-03-21 Thread Yang Zhang
From: Yang Zhang yang.z.zh...@intel.com

Add vcpu info to ioapic_update_eoi, so we can know which vcpu
issued this EOI.

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
---
 arch/x86/kvm/lapic.c |2 +-
 virt/kvm/ioapic.c|   12 ++--
 virt/kvm/ioapic.h|3 ++-
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index a8e9369..d3e322a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -786,7 +786,7 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int 
vector)
trigger_mode = IOAPIC_LEVEL_TRIG;
else
trigger_mode = IOAPIC_EDGE_TRIG;
-   kvm_ioapic_update_eoi(apic-vcpu-kvm, vector, trigger_mode);
+   kvm_ioapic_update_eoi(apic-vcpu, vector, trigger_mode);
}
 }
 
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 5ba005c..9379386 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -267,8 +267,8 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int 
irq_source_id)
spin_unlock(ioapic-lock);
 }
 
-static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
-int trigger_mode)
+static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
+   struct kvm_ioapic *ioapic, int vector, int trigger_mode)
 {
int i;
 
@@ -307,12 +307,12 @@ bool kvm_ioapic_handles_vector(struct kvm *kvm, int 
vector)
return test_bit(vector, ioapic-handled_vectors);
 }
 
-void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
+void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode)
 {
-   struct kvm_ioapic *ioapic = kvm-arch.vioapic;
+   struct kvm_ioapic *ioapic = vcpu-kvm-arch.vioapic;
 
spin_lock(ioapic-lock);
-   __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
+   __kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode);
spin_unlock(ioapic-lock);
 }
 
@@ -410,7 +410,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, 
gpa_t addr, int len,
break;
 #ifdef CONFIG_IA64
case IOAPIC_REG_EOI:
-   __kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG);
+   __kvm_ioapic_update_eoi(NULL, ioapic, data, IOAPIC_LEVEL_TRIG);
break;
 #endif
 
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 0400a46..2fc61a5 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -70,7 +70,8 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm 
*kvm)
 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
int short_hand, int dest, int dest_mode);
 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
-void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
+void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
+   int trigger_mode);
 bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector);
 int kvm_ioapic_init(struct kvm *kvm);
 void kvm_ioapic_destroy(struct kvm *kvm);
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 2/6] KVM: Introduce struct rtc_status

2013-03-21 Thread Yang Zhang
From: Yang Zhang yang.z.zh...@intel.com

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
---
 virt/kvm/ioapic.h |   12 
 1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 2fc61a5..cd30277 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -34,6 +34,17 @@ struct kvm_vcpu;
 #defineIOAPIC_INIT 0x5
 #defineIOAPIC_EXTINT   0x7
 
+#ifdef CONFIG_X86
+#define RTC_GSI 8
+#else
+#define RTC_GSI 255
+#endif
+
+struct rtc_status {
+   int pending_eoi;
+   DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS);
+};
+
 struct kvm_ioapic {
u64 base_address;
u32 ioregsel;
@@ -47,6 +58,7 @@ struct kvm_ioapic {
void (*ack_notifier)(void *opaque, int irq);
spinlock_t lock;
DECLARE_BITMAP(handled_vectors, 256);
+   struct rtc_status rtc_status;
 };
 
 #ifdef DEBUG
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 3/6] KVM : Return destination vcpu on interrupt injection

2013-03-21 Thread Yang Zhang
From: Yang Zhang yang.z.zh...@intel.com

Add a new parameter to know vcpus who received the interrupt.

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
---
 arch/x86/kvm/lapic.c |   25 -
 arch/x86/kvm/lapic.h |5 +++--
 virt/kvm/ioapic.c|2 +-
 virt/kvm/ioapic.h|2 +-
 virt/kvm/irq_comm.c  |   12 ++--
 5 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index d3e322a..d7915a1 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -431,14 +431,16 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
 }
 
 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
-int vector, int level, int trig_mode);
+int vector, int level, int trig_mode,
+unsigned long *dest_map);
 
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
+   unsigned long *dest_map)
 {
struct kvm_lapic *apic = vcpu-arch.apic;
 
return __apic_accept_irq(apic, irq-delivery_mode, irq-vector,
-   irq-level, irq-trig_mode);
+   irq-level, irq-trig_mode, dest_map);
 }
 
 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
@@ -611,7 +613,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct 
kvm_lapic *source,
 }
 
 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
-   struct kvm_lapic_irq *irq, int *r)
+   struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
 {
struct kvm_apic_map *map;
unsigned long bitmap = 1;
@@ -622,7 +624,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct 
kvm_lapic *src,
*r = -1;
 
if (irq-shorthand == APIC_DEST_SELF) {
-   *r = kvm_apic_set_irq(src-vcpu, irq);
+   *r = kvm_apic_set_irq(src-vcpu, irq, dest_map);
return true;
}
 
@@ -667,7 +669,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct 
kvm_lapic *src,
continue;
if (*r  0)
*r = 0;
-   *r += kvm_apic_set_irq(dst[i]-vcpu, irq);
+   *r += kvm_apic_set_irq(dst[i]-vcpu, irq, dest_map);
}
 
ret = true;
@@ -681,7 +683,8 @@ out:
  * Return 1 if successfully added and 0 if discarded.
  */
 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
-int vector, int level, int trig_mode)
+int vector, int level, int trig_mode,
+unsigned long *dest_map)
 {
int result = 0;
struct kvm_vcpu *vcpu = apic-vcpu;
@@ -694,6 +697,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int 
delivery_mode,
if (unlikely(!apic_enabled(apic)))
break;
 
+   if (dest_map)
+   set_bit(vcpu-vcpu_id, dest_map);
+
if (trig_mode) {
apic_debug(level trig mode for vector %d, vector);
apic_set_vector(vector, apic-regs + APIC_TMR);
@@ -852,7 +858,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
   irq.vector);
 
-   kvm_irq_delivery_to_apic(apic-vcpu-kvm, apic, irq);
+   kvm_irq_delivery_to_apic(apic-vcpu-kvm, apic, irq, NULL);
 }
 
 static u32 apic_get_tmcct(struct kvm_lapic *apic)
@@ -1488,7 +1494,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int 
lvt_type)
vector = reg  APIC_VECTOR_MASK;
mode = reg  APIC_MODE_MASK;
trig_mode = reg  APIC_LVT_LEVEL_TRIGGER;
-   return __apic_accept_irq(apic, mode, vector, 1, trig_mode);
+   return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
+   NULL);
}
return 0;
 }
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 2c721b9..967519c 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -55,11 +55,12 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu);
 
 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
+   unsigned long *dest_map);
 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
 
 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
-   struct kvm_lapic_irq *irq, int *r);
+   struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
 
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
 void 

[PATCH v6 4/6] KVM: Add reset/restore rtc_status support

2013-03-21 Thread Yang Zhang
From: Yang Zhang yang.z.zh...@intel.com

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
---
 arch/x86/kvm/lapic.c |8 
 arch/x86/kvm/lapic.h |2 ++
 virt/kvm/ioapic.c|   26 ++
 3 files changed, 36 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index d7915a1..7c17e82 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void *bitmap)
return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
 }
 
+bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
+{
+   struct kvm_lapic *apic = vcpu-arch.apic;
+
+   return apic_test_vector(vector, apic-regs + APIC_ISR) ||
+   apic_test_vector(vector, apic-regs + APIC_IRR);
+}
+
 static inline void apic_set_vector(int vec, void *bitmap)
 {
set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 967519c..004d2ad 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -170,4 +170,6 @@ static inline bool kvm_apic_has_events(struct kvm_vcpu 
*vcpu)
return vcpu-arch.apic-pending_events;
 }
 
+bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
+
 #endif
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 8664812..3897305 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -90,6 +90,30 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic 
*ioapic,
return result;
 }
 
+static void rtc_irq_reset(struct kvm_ioapic *ioapic)
+{
+   ioapic-rtc_status.pending_eoi = 0;
+   bitmap_zero(ioapic-rtc_status.dest_map, KVM_MAX_VCPUS);
+}
+
+static void rtc_irq_restore(struct kvm_ioapic *ioapic)
+{
+   struct kvm_vcpu *vcpu;
+   int vector, i, pending_eoi = 0;
+
+   if (RTC_GSI != 8)
+   return;
+
+   vector = ioapic-redirtbl[RTC_GSI].fields.vector;
+   kvm_for_each_vcpu(i, vcpu, ioapic-kvm) {
+   if (kvm_apic_pending_eoi(vcpu, vector)) {
+   pending_eoi++;
+   set_bit(vcpu-vcpu_id, ioapic-rtc_status.dest_map);
+   }
+   }
+   ioapic-rtc_status.pending_eoi = pending_eoi;
+}
+
 static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
 {
union kvm_ioapic_redirect_entry *pent;
@@ -431,6 +455,7 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
ioapic-ioregsel = 0;
ioapic-irr = 0;
ioapic-id = 0;
+   rtc_irq_reset(ioapic);
update_handled_vectors(ioapic);
 }
 
@@ -496,6 +521,7 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state 
*state)
spin_lock(ioapic-lock);
memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
update_handled_vectors(ioapic);
+   rtc_irq_restore(ioapic);
kvm_ioapic_make_eoibitmap_request(kvm);
spin_unlock(ioapic-lock);
return 0;
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 6/6] KVM: Use eoi to track RTC interrupt delivery status

2013-03-21 Thread Yang Zhang
From: Yang Zhang yang.z.zh...@intel.com

Current interrupt coalescing logci which only used by RTC has conflict
with Posted Interrupt.
This patch introduces a new mechinism to use eoi to track interrupt:
When delivering an interrupt to vcpu, the pending_eoi set to number of
vcpu that received the interrupt. And decrease it when each vcpu writing
eoi. No subsequent RTC interrupt can deliver to vcpu until all vcpus
write eoi.

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
---
 virt/kvm/ioapic.c |   40 +++-
 1 files changed, 39 insertions(+), 1 deletions(-)

diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index c991e58..df16daf 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -114,6 +114,29 @@ static void rtc_irq_restore(struct kvm_ioapic *ioapic)
ioapic-rtc_status.pending_eoi = pending_eoi;
 }
 
+static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
+   struct rtc_status *rtc_status, int irq)
+{
+   if (irq != RTC_GSI)
+   return;
+
+   if (test_and_clear_bit(vcpu-vcpu_id, rtc_status-dest_map))
+   --rtc_status-pending_eoi;
+
+   WARN_ON(rtc_status-pending_eoi  0);
+}
+
+static bool rtc_irq_check(struct kvm_ioapic *ioapic, int irq)
+{
+   if (irq != RTC_GSI)
+   return false;
+
+   if (ioapic-rtc_status.pending_eoi  0)
+   return true; /* coalesced */
+
+   return false;
+}
+
 static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
 {
union kvm_ioapic_redirect_entry *pent;
@@ -229,6 +252,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int 
irq)
 {
union kvm_ioapic_redirect_entry *entry = ioapic-redirtbl[irq];
struct kvm_lapic_irq irqe;
+   int ret;
 
ioapic_debug(dest=%x dest_mode=%x delivery_mode=%x 
 vector=%x trig_mode=%x\n,
@@ -244,7 +268,14 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int 
irq)
irqe.level = 1;
irqe.shorthand = 0;
 
-   return kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe, NULL);
+   if (irq == RTC_GSI) {
+   ret = kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe,
+   ioapic-rtc_status.dest_map);
+   ioapic-rtc_status.pending_eoi = ret;
+   } else
+   ret = kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe, NULL);
+
+   return ret;
 }
 
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
@@ -268,6 +299,11 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, 
int irq_source_id,
ret = 1;
} else {
int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
+
+   if (rtc_irq_check(ioapic, irq)) {
+   ret = 0; /* coalesced */
+   goto out;
+   }
ioapic-irr |= mask;
if ((edge  old_irr != ioapic-irr) ||
(!edge  !entry.fields.remote_irr))
@@ -275,6 +311,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, 
int irq_source_id,
else
ret = 0; /* report coalesced interrupt */
}
+out:
trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
spin_unlock(ioapic-lock);
 
@@ -302,6 +339,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
if (ent-fields.vector != vector)
continue;
 
+   rtc_irq_ack_eoi(vcpu, ioapic-rtc_status, i);
/*
 * We are dropping lock while calling ack notifiers because ack
 * notifier callbacks for assigned devices call into IOAPIC
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 5/6] KVM : Force vmexit with virtual interrupt delivery

2013-03-21 Thread Yang Zhang
From: Yang Zhang yang.z.zh...@intel.com

Need the EOI to track interrupt deliver status, so force vmexit
on EOI for rtc interrupt when enabling virtual interrupt delivery.

Signed-off-by: Yang Zhang yang.z.zh...@intel.com
---
 virt/kvm/ioapic.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 3897305..c991e58 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -158,7 +158,7 @@ void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
if (!e-fields.mask 
(e-fields.trig_mode == IOAPIC_LEVEL_TRIG ||
 kvm_irq_has_notifier(ioapic-kvm, KVM_IRQCHIP_IOAPIC,
-index))) {
+index) || index == RTC_GSI)) {
irqe.dest_id = e-fields.dest_id;
irqe.vector = e-fields.vector;
irqe.dest_mode = e-fields.dest_mode;
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V4 0/2] tcm_vhost hotplug

2013-03-21 Thread Asias He
Asias He (2):
  tcm_vhost: Introduce tcm_vhost_check_feature()
  tcm_vhost: Add hotplug/hotunplug support

 drivers/vhost/tcm_vhost.c | 224 --
 drivers/vhost/tcm_vhost.h |  10 +++
 2 files changed, 229 insertions(+), 5 deletions(-)

-- 
1.8.1.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V4 1/2] tcm_vhost: Introduce tcm_vhost_check_feature()

2013-03-21 Thread Asias He
This helper is useful to check if a feature is supported.

Signed-off-by: Asias He as...@redhat.com
Reviewed-by: Stefan Hajnoczi stefa...@redhat.com
---
 drivers/vhost/tcm_vhost.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
index 0524267..d81e3a9 100644
--- a/drivers/vhost/tcm_vhost.c
+++ b/drivers/vhost/tcm_vhost.c
@@ -90,6 +90,18 @@ static int iov_num_pages(struct iovec *iov)
   ((unsigned long)iov-iov_base  PAGE_MASK))  PAGE_SHIFT;
 }
 
+static bool tcm_vhost_check_feature(struct vhost_scsi *vs, int feature)
+{
+   bool ret = false;
+
+   mutex_lock(vs-dev.mutex);
+   if (vhost_has_feature(vs-dev, feature))
+   ret = true;
+   mutex_unlock(vs-dev.mutex);
+
+   return ret;
+}
+
 static bool tcm_vhost_check_endpoint(struct vhost_virtqueue *vq)
 {
bool ret = false;
-- 
1.8.1.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V4 2/2] tcm_vhost: Add hotplug/hotunplug support

2013-03-21 Thread Asias He
In commit 365a7150094 ([SCSI] virtio-scsi: hotplug support for
virtio-scsi), hotplug support is added to virtio-scsi.

This patch adds hotplug and hotunplug support to tcm_vhost.

You can create or delete a LUN in targetcli to hotplug or hotunplug a
LUN in guest.

Changes in v4:
- Drop tcm_vhost_check_endpoint in tcm_vhost_send_evt
- Add tcm_vhost_check_endpoint in vhost_scsi_evt_handle_kick

Changes in v3:
- Separate the bug fix to another thread

Changes in v2:
- Remove code duplication in tcm_vhost_{hotplug,hotunplug}
- Fix racing of vs_events_nr
- Add flush fix patch to this series

Signed-off-by: Asias He as...@redhat.com
Reviewed-by: Stefan Hajnoczi stefa...@redhat.com
---
 drivers/vhost/tcm_vhost.c | 212 --
 drivers/vhost/tcm_vhost.h |  10 +++
 2 files changed, 217 insertions(+), 5 deletions(-)

diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
index d81e3a9..e734ead 100644
--- a/drivers/vhost/tcm_vhost.c
+++ b/drivers/vhost/tcm_vhost.c
@@ -62,6 +62,9 @@ enum {
 
 #define VHOST_SCSI_MAX_TARGET  256
 #define VHOST_SCSI_MAX_VQ  128
+#define VHOST_SCSI_MAX_EVENT   128
+
+#define VHOST_SCSI_FEATURES (VHOST_FEATURES | (1ULL  VIRTIO_SCSI_F_HOTPLUG))
 
 struct vhost_scsi {
/* Protected by vhost_scsi-dev.mutex */
@@ -73,6 +76,12 @@ struct vhost_scsi {
 
struct vhost_work vs_completion_work; /* cmd completion work item */
struct llist_head vs_completion_list; /* cmd completion queue */
+
+   struct vhost_work vs_event_work; /* evt injection work item */
+   struct llist_head vs_event_list; /* evt injection queue */
+
+   bool vs_events_dropped; /* any missed events, protected by dev.mutex */
+   u64 vs_events_nr; /* num of pending events, protected by dev.mutex */
 };
 
 /* Local pointer to allocated TCM configfs fabric module */
@@ -120,6 +129,16 @@ static bool tcm_vhost_check_endpoint(struct 
vhost_virtqueue *vq)
return ret;
 }
 
+static bool tcm_vhost_check_events_dropped(struct vhost_scsi *vs)
+{
+   bool ret;
+
+   mutex_lock(vs-dev.mutex);
+   ret = vs-vs_events_dropped;
+   mutex_unlock(vs-dev.mutex);
+
+   return ret;
+}
 static int tcm_vhost_check_true(struct se_portal_group *se_tpg)
 {
return 1;
@@ -370,6 +389,36 @@ static int tcm_vhost_queue_tm_rsp(struct se_cmd *se_cmd)
return 0;
 }
 
+static void tcm_vhost_free_evt(struct vhost_scsi *vs, struct tcm_vhost_evt 
*evt)
+{
+   mutex_lock(vs-dev.mutex);
+   vs-vs_events_nr--;
+   kfree(evt);
+   mutex_unlock(vs-dev.mutex);
+}
+
+static struct tcm_vhost_evt *tcm_vhost_allocate_evt(struct vhost_scsi *vs,
+   u32 event, u32 reason)
+{
+   struct tcm_vhost_evt *evt;
+
+   mutex_lock(vs-dev.mutex);
+   if (vs-vs_events_nr  VHOST_SCSI_MAX_EVENT) {
+   mutex_unlock(vs-dev.mutex);
+   return NULL;
+   }
+
+   evt = kzalloc(sizeof(*evt), GFP_KERNEL);
+   if (evt) {
+   evt-event.event = event;
+   evt-event.reason = reason;
+   vs-vs_events_nr++;
+   }
+   mutex_unlock(vs-dev.mutex);
+
+   return evt;
+}
+
 static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd)
 {
struct se_cmd *se_cmd = tv_cmd-tvc_se_cmd;
@@ -388,6 +437,77 @@ static void vhost_scsi_free_cmd(struct tcm_vhost_cmd 
*tv_cmd)
kfree(tv_cmd);
 }
 
+static void tcm_vhost_do_evt_work(struct vhost_scsi *vs,
+   struct virtio_scsi_event *event)
+{
+   struct vhost_virtqueue *vq = vs-vqs[VHOST_SCSI_VQ_EVT];
+   struct virtio_scsi_event __user *eventp;
+   unsigned out, in;
+   int head, ret;
+
+   if (!tcm_vhost_check_endpoint(vq))
+   return;
+
+   mutex_lock(vq-mutex);
+again:
+   vhost_disable_notify(vs-dev, vq);
+   head = vhost_get_vq_desc(vs-dev, vq, vq-iov,
+   ARRAY_SIZE(vq-iov), out, in,
+   NULL, NULL);
+   if (head  0) {
+   mutex_lock(vs-dev.mutex);
+   vs-vs_events_dropped = true;
+   mutex_unlock(vs-dev.mutex);
+   goto out;
+   }
+   if (head == vq-num) {
+   if (vhost_enable_notify(vs-dev, vq))
+   goto again;
+   mutex_lock(vs-dev.mutex);
+   vs-vs_events_dropped = true;
+   mutex_unlock(vs-dev.mutex);
+   goto out;
+   }
+
+   if ((vq-iov[out].iov_len != sizeof(struct virtio_scsi_event))) {
+   vq_err(vq, Expecting virtio_scsi_event, got %zu bytes\n,
+   vq-iov[out].iov_len);
+   goto out;
+   }
+
+   mutex_lock(vs-dev.mutex);
+   if (vs-vs_events_dropped) {
+   event-event |= VIRTIO_SCSI_T_EVENTS_MISSED;
+   vs-vs_events_dropped = false;
+   }
+   mutex_unlock(vs-dev.mutex);
+
+   eventp = vq-iov[out].iov_base;
+   ret = __copy_to_user(eventp, event, sizeof(*event));
+ 

[PATCH 0/4 v2] KVM :PPC: Userspace Debug support

2013-03-21 Thread Bharat Bhushan
From: Bharat Bhushan bharat.bhus...@freescale.com

This patchset adds the userspace debug support for booke/bookehv.
this is tested on powerpc e500v2/e500mc devices.

v1-v2
 - Debug registers are save/restore in vcpu_put/vcpu_get.
   Earlier the debug registers are saved/restored in guest entry/exit

Bharat Bhushan (4):
  Added ONE_REG interface for debug instruction
  KVM: PPC: debug stub interface parameter defined
  Rename EMULATE_DO_PAPR to EMULATE_EXIT_USER
  KVM: PPC: Add userspace debug stub support

 Documentation/virtual/kvm/api.txt |1 +
 arch/powerpc/include/asm/kvm_book3s.h |2 +
 arch/powerpc/include/asm/kvm_booke.h  |2 +
 arch/powerpc/include/asm/kvm_host.h   |   10 ++
 arch/powerpc/include/asm/kvm_ppc.h|2 +-
 arch/powerpc/include/uapi/asm/kvm.h   |   41 ++
 arch/powerpc/kvm/book3s.c |   12 ++
 arch/powerpc/kvm/book3s_emulate.c |4 +-
 arch/powerpc/kvm/book3s_pr.c  |4 +-
 arch/powerpc/kvm/booke.c  |  252 +++--
 arch/powerpc/kvm/e500_emulate.c   |   10 ++
 arch/powerpc/kvm/powerpc.c|6 -
 12 files changed, 323 insertions(+), 23 deletions(-)


--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/4 v2] Added ONE_REG interface for debug instruction

2013-03-21 Thread Bharat Bhushan
This patch adds the one_reg interface to get the special instruction
to be used for setting software breakpoint from userspace.

Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
---
v2:
 - Corrected trap tw always opcode.

 Documentation/virtual/kvm/api.txt |1 +
 arch/powerpc/include/asm/kvm_book3s.h |2 ++
 arch/powerpc/include/asm/kvm_booke.h  |2 ++
 arch/powerpc/include/uapi/asm/kvm.h   |4 
 arch/powerpc/kvm/book3s.c |6 ++
 arch/powerpc/kvm/booke.c  |6 ++
 6 files changed, 21 insertions(+), 0 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index cce500a..dbfcc04 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1766,6 +1766,7 @@ registers, find a list below:
   PPC   | KVM_REG_PPC_TSR  | 32
   PPC   | KVM_REG_PPC_OR_TSR   | 32
   PPC   | KVM_REG_PPC_CLEAR_TSR| 32
+  PPC   | KVM_REG_PPC_DEBUG_INST| 32
 
 4.69 KVM_GET_ONE_REG
 
diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index 5a56e1c..bc81842 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -458,6 +458,8 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu 
*vcpu)
 #define OSI_SC_MAGIC_R40x77810F9B
 
 #define INS_DCBZ   0x7c0007ec
+/* TO = 31 for unconditional trap */
+#define INS_TW 0x7fe8
 
 /* LPIDs we support with this build -- runtime limit may be lower */
 #define KVMPPC_NR_LPIDS(LPID_RSVD + 1)
diff --git a/arch/powerpc/include/asm/kvm_booke.h 
b/arch/powerpc/include/asm/kvm_booke.h
index b7cd335..d3c1eb3 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -26,6 +26,8 @@
 /* LPIDs we support with this build -- runtime limit may be lower */
 #define KVMPPC_NR_LPIDS64
 
+#define KVMPPC_INST_EHPRIV 0x7c00021c
+
 static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
 {
vcpu-arch.gpr[num] = val;
diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index ef072b1..c2ff99c 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -422,4 +422,8 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_CLEAR_TSR  (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x88)
 #define KVM_REG_PPC_TCR(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x89)
 #define KVM_REG_PPC_TSR(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8a)
+
+/* Debugging: Special instruction for software breakpoint */
+#define KVM_REG_PPC_DEBUG_INST (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8b)
+
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index a4b6452..975a401 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -530,6 +530,12 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 
struct kvm_one_reg *reg)
val = get_reg_val(reg-id, vcpu-arch.vscr.u[3]);
break;
 #endif /* CONFIG_ALTIVEC */
+   case KVM_REG_PPC_DEBUG_INST: {
+   u32 opcode = INS_TW;
+   r = copy_to_user((u32 __user *)(long)reg-addr,
+opcode, sizeof(u32));
+   break;
+   }
default:
r = -EINVAL;
break;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 8b553c0..a41cd6d 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1448,6 +1448,12 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 
struct kvm_one_reg *reg)
case KVM_REG_PPC_TSR:
r = put_user(vcpu-arch.tsr, (u32 __user *)(long)reg-addr);
break;
+   case KVM_REG_PPC_DEBUG_INST: {
+   u32 opcode = KVMPPC_INST_EHPRIV;
+   r = copy_to_user((u32 __user *)(long)reg-addr,
+opcode, sizeof(u32));
+   break;
+   }
default:
break;
}
-- 
1.7.0.4


--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/4 v2] KVM: PPC: debug stub interface parameter defined

2013-03-21 Thread Bharat Bhushan
From: Bharat Bhushan bharat.bhus...@freescale.com

This patch defines the interface parameter for KVM_SET_GUEST_DEBUG
ioctl support. Follow up patches will use this for setting up
hardware breakpoints, watchpoints and software breakpoints.

Also kvm_arch_vcpu_ioctl_set_guest_debug() is brought one level below.
This is because I am not sure what is required for book3s. So this ioctl
behaviour will not change for book3s.

Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
---
v2:
 - No Change

 arch/powerpc/include/uapi/asm/kvm.h |   23 +++
 arch/powerpc/kvm/book3s.c   |6 ++
 arch/powerpc/kvm/booke.c|6 ++
 arch/powerpc/kvm/powerpc.c  |6 --
 4 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index c2ff99c..15f9a00 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -272,8 +272,31 @@ struct kvm_debug_exit_arch {
 
 /* for KVM_SET_GUEST_DEBUG */
 struct kvm_guest_debug_arch {
+   struct {
+   /* H/W breakpoint/watchpoint address */
+   __u64 addr;
+   /*
+* Type denotes h/w breakpoint, read watchpoint, write
+* watchpoint or watchpoint (both read and write).
+*/
+#define KVMPPC_DEBUG_NOTYPE0x0
+#define KVMPPC_DEBUG_BREAKPOINT(1UL  1)
+#define KVMPPC_DEBUG_WATCH_WRITE   (1UL  2)
+#define KVMPPC_DEBUG_WATCH_READ(1UL  3)
+   __u32 type;
+   __u32 reserved;
+   } bp[16];
 };
 
+/* Debug related defines */
+/*
+ * kvm_guest_debug-control is a 32 bit field. The lower 16 bits are generic
+ * and upper 16 bits are architecture specific. Architecture specific defines
+ * that ioctl is for setting hardware breakpoint or software breakpoint.
+ */
+#define KVM_GUESTDBG_USE_SW_BP 0x0001
+#define KVM_GUESTDBG_USE_HW_BP 0x0002
+
 /* definition of registers in kvm_run */
 struct kvm_sync_regs {
 };
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 975a401..cb85d73 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -613,6 +613,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
return 0;
 }
 
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+   struct kvm_guest_debug *dbg)
+{
+   return -EINVAL;
+}
+
 void kvmppc_decrementer_func(unsigned long data)
 {
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index a41cd6d..1de93a8 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1527,6 +1527,12 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 
struct kvm_one_reg *reg)
return r;
 }
 
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+struct kvm_guest_debug *dbg)
+{
+   return -EINVAL;
+}
+
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
return -ENOTSUPP;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 934413c..4c94ca9 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -532,12 +532,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 #endif
 }
 
-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
-struct kvm_guest_debug *dbg)
-{
-   return -EINVAL;
-}
-
 static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
  struct kvm_run *run)
 {
-- 
1.7.0.4


--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


  1   2   >