[PATCH 3/4] PF: Provide additional direct page notification

2013-07-05 Thread Dominik Dingel
By setting a Kconfig option, the architecture can control when
guest notifications will be presented by the apf backend.
So there is the default batch mechanism, working as before, where the vcpu 
thread
should pull in this information. On the other hand there is now the direct
mechanism, this will directly push the information to the guest.

Still the vcpu thread should call check_completion to cleanup leftovers,
that leaves most of the common code untouched.

Signed-off-by: Dominik Dingel 
---
 arch/x86/kvm/mmu.c   |  2 +-
 include/linux/kvm_host.h |  2 +-
 virt/kvm/Kconfig |  4 
 virt/kvm/async_pf.c  | 22 +++---
 4 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 0d094da..b8632e9 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3343,7 +3343,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, 
gva_t gva, gfn_t gfn)
arch.direct_map = vcpu->arch.mmu.direct_map;
arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu);
 
-   return kvm_setup_async_pf(vcpu, gva, gfn, &arch);
+   return kvm_setup_async_pf(vcpu, gva, gfn_to_hva(vcpu->kvm, gfn), &arch);
 }
 
 static bool can_do_async_pf(struct kvm_vcpu *vcpu)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 210f493..969d575 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -175,7 +175,7 @@ struct kvm_async_pf {
 
 void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
 void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu);
-int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
+int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
   struct kvm_arch_async_pf *arch);
 int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
 #endif
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 779262f..715e6b5 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -22,6 +22,10 @@ config KVM_MMIO
 config KVM_ASYNC_PF
bool
 
+# Toggle to switch between direct notification and batch job
+config KVM_ASYNC_PF_DIRECT
+   bool
+
 config HAVE_KVM_MSI
bool
 
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index ea475cd..b8df37a 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -28,6 +28,21 @@
 #include "async_pf.h"
 #include 
 
+static inline void kvm_async_page_direct_present(struct kvm_vcpu *vcpu,
+struct kvm_async_pf *work)
+{
+#ifdef CONFIG_KVM_ASYNC_PF_DIRECT
+   kvm_arch_async_page_present(vcpu, work);
+#endif
+}
+static inline void kvm_async_page_batch_present(struct kvm_vcpu *vcpu,
+   struct kvm_async_pf *work)
+{
+#ifndef CONFIG_KVM_ASYNC_PF_DIRECT
+   kvm_arch_async_page_present(vcpu, work);
+#endif
+}
+
 static struct kmem_cache *async_pf_cache;
 
 int kvm_async_pf_init(void)
@@ -70,6 +85,7 @@ static void async_pf_execute(struct work_struct *work)
down_read(&mm->mmap_sem);
get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL);
up_read(&mm->mmap_sem);
+   kvm_async_page_direct_present(vcpu, apf);
unuse_mm(mm);
 
spin_lock(&vcpu->async_pf.lock);
@@ -134,7 +150,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
 
if (work->page)
kvm_arch_async_page_ready(vcpu, work);
-   kvm_arch_async_page_present(vcpu, work);
+   kvm_async_page_batch_present(vcpu, work);
 
list_del(&work->queue);
vcpu->async_pf.queued--;
@@ -144,7 +160,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
}
 }
 
-int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
+int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
   struct kvm_arch_async_pf *arch)
 {
struct kvm_async_pf *work;
@@ -166,7 +182,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, 
gfn_t gfn,
work->done = false;
work->vcpu = vcpu;
work->gva = gva;
-   work->addr = gfn_to_hva(vcpu->kvm, gfn);
+   work->addr = hva;
work->arch = *arch;
work->mm = current->mm;
atomic_inc(&work->mm->mm_count);
-- 
1.8.2.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/4] PF: Add FAULT_FLAG_RETRY_NOWAIT for guest fault

2013-07-05 Thread Dominik Dingel
In case of a fault retry exit sie64() with gmap_fault indication for the
running thread set. This makes it possible to handle async page faults
without the need for mm notifiers.

Based on a patch from Martin Schwidefsky.

Signed-off-by: Dominik Dingel 
---
 arch/s390/include/asm/pgtable.h   |  2 ++
 arch/s390/include/asm/processor.h |  1 +
 arch/s390/kvm/kvm-s390.c  | 13 +
 arch/s390/mm/fault.c  | 26 ++
 4 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 0ea4e59..4a4cc64 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -740,6 +740,7 @@ static inline void pgste_set_pte(pte_t *ptep, pte_t entry)
  * @table: pointer to the page directory
  * @asce: address space control element for gmap page table
  * @crst_list: list of all crst tables used in the guest address space
+ * @pfault_enabled: defines if pfaults are applicable for the guest
  */
 struct gmap {
struct list_head list;
@@ -748,6 +749,7 @@ struct gmap {
unsigned long asce;
void *private;
struct list_head crst_list;
+   unsigned long pfault_enabled;
 };
 
 /**
diff --git a/arch/s390/include/asm/processor.h 
b/arch/s390/include/asm/processor.h
index 6b49987..4fa96ca 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -77,6 +77,7 @@ struct thread_struct {
 unsigned long ksp;  /* kernel stack pointer */
mm_segment_t mm_segment;
unsigned long gmap_addr;/* address of last gmap fault. */
+   unsigned int gmap_pfault;   /* signal of a pending guest pfault */
struct per_regs per_user;   /* User specified PER registers */
struct per_event per_event; /* Cause of the last PER trap */
unsigned long per_flags;/* Flags to control debug behavior */
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ba694d2..702daca 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -682,6 +682,15 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
return 0;
 }
 
+static void kvm_arch_fault_in_sync(struct kvm_vcpu *vcpu)
+{
+   hva_t fault = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
+   struct mm_struct *mm = current->mm;
+   down_read(&mm->mmap_sem);
+   get_user_pages(current, mm, fault, 1, 1, 0, NULL, NULL);
+   up_read(&mm->mmap_sem);
+}
+
 static int __vcpu_run(struct kvm_vcpu *vcpu)
 {
int rc;
@@ -715,6 +724,10 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
if (rc < 0) {
if (kvm_is_ucontrol(vcpu->kvm)) {
rc = SIE_INTERCEPT_UCONTROL;
+   } else if (current->thread.gmap_pfault) {
+   kvm_arch_fault_in_sync(vcpu);
+   current->thread.gmap_pfault = 0;
+   rc = 0;
} else {
VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
trace_kvm_s390_sie_fault(vcpu);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 047c3e4..7d4c4b1 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -50,6 +50,7 @@
 #define VM_FAULT_BADMAP0x02
 #define VM_FAULT_BADACCESS 0x04
 #define VM_FAULT_SIGNAL0x08
+#define VM_FAULT_PFAULT0x10
 
 static unsigned long store_indication __read_mostly;
 
@@ -232,6 +233,7 @@ static noinline void do_fault_error(struct pt_regs *regs, 
int fault)
return;
}
case VM_FAULT_BADCONTEXT:
+   case VM_FAULT_PFAULT:
do_no_context(regs);
break;
case VM_FAULT_SIGNAL:
@@ -269,6 +271,9 @@ static noinline void do_fault_error(struct pt_regs *regs, 
int fault)
  */
 static inline int do_exception(struct pt_regs *regs, int access)
 {
+#ifdef CONFIG_PGSTE
+   struct gmap *gmap;
+#endif
struct task_struct *tsk;
struct mm_struct *mm;
struct vm_area_struct *vma;
@@ -307,9 +312,10 @@ static inline int do_exception(struct pt_regs *regs, int 
access)
down_read(&mm->mmap_sem);
 
 #ifdef CONFIG_PGSTE
-   if ((current->flags & PF_VCPU) && S390_lowcore.gmap) {
-   address = __gmap_fault(address,
-(struct gmap *) S390_lowcore.gmap);
+   gmap = (struct gmap *)
+   ((current->flags & PF_VCPU) ? S390_lowcore.gmap : 0);
+   if (gmap) {
+   address = __gmap_fault(address, gmap);
if (address == -EFAULT) {
fault = VM_FAULT_BADMAP;
goto out_up;
@@ -318,6 +324,8 @@ static inline int do_exception(struct pt_regs *regs, int 
access)
fault = VM_FAULT_OOM;
goto out_up;
   

[PATCH 4/4] PF: Async page fault support on s390

2013-07-05 Thread Dominik Dingel
This patch enables async pfage faults for s390 kvm guests.
It proviedes the userspace API to enable, disable or get the status of this
feature. Also it includes the diagnose code, called by the guest to enable
async page faults from guest point.

The async page faults will use an already existing guest interface for this
purpose, as described in "CP Programming Services" (SC24-6084)".

Signed-off-by: Dominik Dingel 
---
 Documentation/s390/kvm.txt   |  24 ++
 arch/s390/include/asm/kvm_host.h |  22 +
 arch/s390/include/uapi/asm/kvm.h |  10 
 arch/s390/kvm/Kconfig|   2 +
 arch/s390/kvm/Makefile   |   2 +-
 arch/s390/kvm/diag.c |  57 ++
 arch/s390/kvm/interrupt.c|  38 ---
 arch/s390/kvm/kvm-s390.c | 100 ++-
 arch/s390/kvm/kvm-s390.h |   4 ++
 arch/s390/kvm/sigp.c |   2 +
 include/uapi/linux/kvm.h |   2 +
 11 files changed, 254 insertions(+), 9 deletions(-)

diff --git a/Documentation/s390/kvm.txt b/Documentation/s390/kvm.txt
index 85f3280..707b7e9 100644
--- a/Documentation/s390/kvm.txt
+++ b/Documentation/s390/kvm.txt
@@ -70,6 +70,30 @@ floating interrupts are:
 KVM_S390_INT_VIRTIO
 KVM_S390_INT_SERVICE
 
+ioctl:  KVM_S390_APF_ENABLE:
+args:   none
+This ioctl is used to enable the async page fault interface. So in a
+host page fault case the host can now submit pfault tokens to the guest.
+
+ioctl:  KVM_S390_APF_DISABLE:
+args:   none
+This ioctl is used to disable the async page fault interface. From this point
+on no new pfault tokens will be issued to the guest. Already existing async
+page faults are not covered by this and will be normally handled.
+
+ioctl:  KVM_S390_APF_STATUS:
+args:   none
+This ioctl allows the userspace to get the current status of the APF feature.
+The main purpose for this, is to ensure that no pfault tokens will be lost
+during live migration or similar management operations.
+The possible return values are:
+KVM_S390_APF_DISABLED_NON_PENDING
+KVM_S390_APF_DISABLED_PENDING
+KVM_S390_APF_ENABLED_NON_PENDING
+KVM_S390_APF_ENABLED_PENDING
+Caution: if KVM_S390_APF is enabled the PENDING status could be already changed
+as soon as the ioctl returns to userspace.
+
 3. ioctl calls to the kvm-vcpu file descriptor
 KVM does support the following ioctls on s390 that are common with other
 architectures and do behave the same:
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 152..ed57362 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -257,6 +257,10 @@ struct kvm_vcpu_arch {
u64 stidp_data;
};
struct gmap *gmap;
+#define KVM_S390_PFAULT_TOKEN_INVALID  (-1UL)
+   unsigned long pfault_token;
+   unsigned long pfault_select;
+   unsigned long pfault_compare;
 };
 
 struct kvm_vm_stat {
@@ -277,6 +281,24 @@ struct kvm_arch{
 #define KVM_HVA_ERR_BAD(-1UL)
 #define KVM_HVA_ERR_RO_BAD (-1UL)
 
+#define ASYNC_PF_PER_VCPU  64
+struct kvm_vcpu;
+struct kvm_async_pf;
+struct kvm_arch_async_pf {
+   unsigned long pfault_token;
+};
+
+bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
+
+void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
+  struct kvm_async_pf *work);
+
+void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+struct kvm_async_pf *work);
+
+void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+struct kvm_async_pf *work);
+
 static inline bool kvm_is_error_hva(unsigned long addr)
 {
/*
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index d25da59..b6c83e0 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -57,4 +57,14 @@ struct kvm_sync_regs {
 #define KVM_REG_S390_EPOCHDIFF (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x2)
 #define KVM_REG_S390_CPU_TIMER  (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x3)
 #define KVM_REG_S390_CLOCK_COMP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x4)
+
+/* ioctls used for setting/getting status of APF on s390x */
+#define KVM_S390_APF_ENABLE1
+#define KVM_S390_APF_DISABLE   2
+#define KVM_S390_APF_STATUS3
+#define KVM_S390_APF_DISABLED_NON_PENDING  0
+#define KVM_S390_APF_DISABLED_PENDING  1
+#define KVM_S390_APF_ENABLED_NON_PENDING   2
+#define KVM_S390_APF_ENABLED_PENDING   3
+
 #endif
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 70b46ea..4993eed 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -23,6 +23,8 @@ config KVM
select ANON_INODES
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_EVENTFD
+   select KVM_ASYNC_PF
+   select KVM_ASYNC_PF_DIRECT
---help---
  Support hosting paravirtualized guest machines 

[PATCH 2/4] PF: Move architecture specifics to the backends

2013-07-05 Thread Dominik Dingel
Current common codes uses PAGE_OFFSET to indicate a bad host virtual address.
As this check won't work on architectures that don't map kernel and user memory
into the same address space (e.g. s390), it is moved into architcture specific
code.

Signed-off-by: Dominik Dingel 
---
 arch/arm/include/asm/kvm_host.h |  8 
 arch/ia64/include/asm/kvm_host.h|  3 +++
 arch/mips/include/asm/kvm_host.h|  6 ++
 arch/powerpc/include/asm/kvm_host.h |  8 
 arch/s390/include/asm/kvm_host.h| 12 
 arch/x86/include/asm/kvm_host.h |  8 
 include/linux/kvm_host.h|  8 
 7 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 7d22517..557c2a1 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -74,6 +74,14 @@ struct kvm_arch {
struct vgic_distvgic;
 };
 
+#define KVM_HVA_ERR_BAD(PAGE_OFFSET)
+#define KVM_HVA_ERR_RO_BAD (PAGE_OFFSET + PAGE_SIZE)
+
+static inline bool kvm_is_error_hva(unsigned long addr)
+{
+   return addr >= PAGE_OFFSET;
+}
+
 #define KVM_NR_MEM_OBJS 40
 
 /*
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 989dd3f..d3afa6f 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -486,6 +486,9 @@ struct kvm_arch {
unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
 };
 
+#define KVM_HVA_ERR_BAD(PAGE_OFFSET)
+#define KVM_HVA_ERR_RO_BAD (PAGE_OFFSET + PAGE_SIZE)
+
 union cpuid3_t {
u64 value;
struct {
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 4d6fa0b..3a0a3f7 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -34,7 +34,13 @@
 #define KVM_NR_PAGE_SIZES  1
 #define KVM_PAGES_PER_HPAGE(x) 1
 
+#define KVM_HVA_ERR_BAD(PAGE_OFFSET)
+#define KVM_HVA_ERR_RO_BAD (PAGE_OFFSET + PAGE_SIZE)
 
+static inline bool kvm_is_error_hva(unsigned long addr)
+{
+   return addr >= PAGE_OFFSET;
+}
 
 /* Special address that contains the comm page, used for reducing # of traps */
 #define KVM_GUEST_COMMPAGE_ADDR 0x0
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index af326cd..be5d7f4 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -273,6 +273,14 @@ struct kvm_arch {
 #endif
 };
 
+#define KVM_HVA_ERR_BAD(PAGE_OFFSET)
+#define KVM_HVA_ERR_RO_BAD (PAGE_OFFSET + PAGE_SIZE)
+
+static inline bool kvm_is_error_hva(unsigned long addr)
+{
+   return addr >= PAGE_OFFSET;
+}
+
 /*
  * Struct for a virtual core.
  * Note: entry_exit_count combines an entry count in the bottom 8 bits
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 3238d40..152 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -274,6 +274,18 @@ struct kvm_arch{
int css_support;
 };
 
+#define KVM_HVA_ERR_BAD(-1UL)
+#define KVM_HVA_ERR_RO_BAD (-1UL)
+
+static inline bool kvm_is_error_hva(unsigned long addr)
+{
+   /*
+* on s390, this check is not needed as kernel and user memory
+* is not mapped into the same address space
+*/
+   return false;
+}
+
 extern int sie64a(struct kvm_s390_sie_block *, u64 *);
 extern char sie_exit;
 #endif
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f87f7fc..07e8570 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -96,6 +96,14 @@
 
 #define ASYNC_PF_PER_VCPU 64
 
+#define KVM_HVA_ERR_BAD(PAGE_OFFSET)
+#define KVM_HVA_ERR_RO_BAD (PAGE_OFFSET + PAGE_SIZE)
+
+static inline bool kvm_is_error_hva(unsigned long addr)
+{
+   return addr >= PAGE_OFFSET;
+}
+
 struct kvm_vcpu;
 struct kvm;
 struct kvm_async_pf;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a63d83e..210f493 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -85,14 +85,6 @@ static inline bool is_noslot_pfn(pfn_t pfn)
return pfn == KVM_PFN_NOSLOT;
 }
 
-#define KVM_HVA_ERR_BAD(PAGE_OFFSET)
-#define KVM_HVA_ERR_RO_BAD (PAGE_OFFSET + PAGE_SIZE)
-
-static inline bool kvm_is_error_hva(unsigned long addr)
-{
-   return addr >= PAGE_OFFSET;
-}
-
 #define KVM_ERR_PTR_BAD_PAGE   (ERR_PTR(-ENOENT))
 
 static inline bool is_error_page(struct page *page)
-- 
1.8.2.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH v2 0/4] Enable async page faults on s390

2013-07-05 Thread Dominik Dingel
Gleb, Paolo, 

based on the work from Martin and Carsten, this implementation enables async 
page faults.
To the guest it will provide the pfault interface, but internally it uses the
async page fault common code. 

The inital submission and it's discussion can be followed on 
http://www.mail-archive.com/kvm@vger.kernel.org/msg63359.html .

There is a slight modification for common code to move from a pull to a push 
based approch on s390. 
As s390 we don't want to wait till we leave the guest state to queue the 
notification interrupts.

To use this feature the controlling userspace hase to enable the capability.
With that knob we can later on disable this feature for live migration.

v1 -> v2:
 - Adding other architecture backends
 - Adding documentation for the ioctl
 - Improving the overall error handling
 - Reducing the needed modifications on the common code

Dominik Dingel (4):
  PF: Add FAULT_FLAG_RETRY_NOWAIT for guest fault
  PF: Move architecture specifics to the backends
  PF: Provide additional direct page notification
  PF: Async page fault support on s390

 Documentation/s390/kvm.txt  |  24 
 arch/arm/include/asm/kvm_host.h |   8 +++
 arch/ia64/include/asm/kvm_host.h|   3 +
 arch/mips/include/asm/kvm_host.h|   6 ++
 arch/powerpc/include/asm/kvm_host.h |   8 +++
 arch/s390/include/asm/kvm_host.h|  34 +++
 arch/s390/include/asm/pgtable.h |   2 +
 arch/s390/include/asm/processor.h   |   1 +
 arch/s390/include/uapi/asm/kvm.h|  10 
 arch/s390/kvm/Kconfig   |   2 +
 arch/s390/kvm/Makefile  |   2 +-
 arch/s390/kvm/diag.c|  57 ++
 arch/s390/kvm/interrupt.c   |  38 +---
 arch/s390/kvm/kvm-s390.c| 111 
 arch/s390/kvm/kvm-s390.h|   4 ++
 arch/s390/kvm/sigp.c|   2 +
 arch/s390/mm/fault.c|  26 +++--
 arch/x86/include/asm/kvm_host.h |   8 +++
 arch/x86/kvm/mmu.c  |   2 +-
 include/linux/kvm_host.h|  10 +---
 include/uapi/linux/kvm.h|   2 +
 virt/kvm/Kconfig|   4 ++
 virt/kvm/async_pf.c |  22 ++-
 23 files changed, 361 insertions(+), 25 deletions(-)

-- 
1.8.2.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


KVM: x86: stop IO emulation cycle if instruction pointer is modified

2013-07-05 Thread Marcelo Tosatti

MMIO/PIO emulation should be interrupted if the system is restarted.
Otherwise in progress IO emulation continues at the instruction pointer,
even after vcpus' IP has been modified by KVM_SET_REGS.

Use IP change as an indicator to reset MMIO/PIO emulation state.

Signed-off-by: Marcelo Tosatti 

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e8ba99c..9f7ce41 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6144,6 +6144,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, 
struct kvm_regs *regs)
kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
 #endif
 
+   /* stop IO emulation cycle if RIP changes */
+   if (kvm_rip_read(vcpu) != regs->rip)
+   vcpu->arch.complete_userspace_io = NULL;
+
kvm_rip_write(vcpu, regs->rip);
kvm_set_rflags(vcpu, regs->rflags);
 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] KVM: nVMX: Fix read/write to MSR_IA32_FEATURE_CONTROL

2013-07-05 Thread Arthur Chunqi Li
Fix read/write to IA32_FEATURE_CONTROL MSR in nested environment.

This patch simulate this MSR in nested_vmx and the default value is
0x0. BIOS should set it to 0x5 before VMXON. After setting the lock
bit, write to it will cause #GP(0).

Signed-off-by: Arthur Chunqi Li 
---
 arch/x86/kvm/vmx.c |   25 +
 arch/x86/kvm/x86.c |3 ++-
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 260a919..5e3d44e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -373,6 +373,7 @@ struct nested_vmx {
 * we must keep them pinned while L2 runs.
 */
struct page *apic_access_page;
+   u64 msr_ia32_feature_control;
 };
 
 #define POSTED_INTR_ON  0
@@ -2277,8 +2278,11 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 
msr_index, u64 *pdata)
 
switch (msr_index) {
case MSR_IA32_FEATURE_CONTROL:
-   *pdata = 0;
-   break;
+   if (nested_vmx_allowed(vcpu)){
+   *pdata = to_vmx(vcpu)->nested.msr_ia32_feature_control;
+   break;
+   }
+   return 0;
case MSR_IA32_VMX_BASIC:
/*
 * This MSR reports some information about VMX support. We
@@ -2356,9 +2360,13 @@ static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 
msr_index, u64 data)
if (!nested_vmx_allowed(vcpu))
return 0;
 
-   if (msr_index == MSR_IA32_FEATURE_CONTROL)
-   /* TODO: the right thing. */
+   if (msr_index == MSR_IA32_FEATURE_CONTROL){
+   if (to_vmx(vcpu)->nested.msr_ia32_feature_control
+   & FEATURE_CONTROL_LOCKED)
+   return 0;
+   to_vmx(vcpu)->nested.msr_ia32_feature_control = data;
return 1;
+   }
/*
 * No need to treat VMX capability MSRs specially: If we don't handle
 * them, handle_wrmsr will #GP(0), which is correct (they are readonly)
@@ -5595,6 +5603,15 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
skip_emulated_instruction(vcpu);
return 1;
}
+
+#define VMXON_NEEDED_FEATURES \
+ (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
+   if ((vmx->nested.msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
+   != VMXON_NEEDED_FEATURES) {
+   kvm_inject_gp(vcpu, 0);
+   return 1;
+   }
+
if (enable_shadow_vmcs) {
shadow_vmcs = alloc_vmcs();
if (!shadow_vmcs)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e8ba99c..2d4eb8e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -850,7 +850,8 @@ static u32 msrs_to_save[] = {
 #ifdef CONFIG_X86_64
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
 #endif
-   MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
+   MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
+   MSR_IA32_FEATURE_CONTROL
 };
 
 static unsigned num_msrs_to_save;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Bug 60518] Heavy network traffic between guest and host triggers kernel oops

2013-07-05 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=60518

Bart Van Assche  changed:

   What|Removed |Added

 Regression|No  |Yes

--- Comment #1 from Bart Van Assche  ---
This does not occur with kernel 3.8.12, so marking this bug as a regression.

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Bug 60505] Heavy network traffic triggers vhost_net lockup

2013-07-05 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=60505

--- Comment #1 from Bart Van Assche  ---
Note: this might be a consequence of bug 60518.

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] vhost: Avoid that vhost_work_flush() locks up

2013-07-05 Thread Bart Van Assche

On 07/05/13 10:36, Bart Van Assche wrote:

Wake up work->done waiters even if the TIF_NEED_RESCHED task flag
has been set. This patch fixes a regression introduced in commit
d550dda (kernel v3.4).

Reference: https://bugzilla.kernel.org/show_bug.cgi?id=60505


(replying to my own e-mail)

Please ignore this patch. Although it might help to wake up 
vhost_work_flush() earlier, the patch description does not match the 
patch itself and the patch does not fix the vhost_work_flush() lockup.


Bart.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Bug 60518] New: Heavy network traffic between guest and host triggers kernel oops

2013-07-05 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=60518

Bug ID: 60518
   Summary: Heavy network traffic between guest and host triggers
kernel oops
   Product: Virtualization
   Version: unspecified
Kernel Version: v3.9.8 and v3.9.9
  Hardware: All
OS: Linux
  Tree: Mainline
Status: NEW
  Severity: normal
  Priority: P1
 Component: kvm
  Assignee: virtualization_...@kernel-bugs.osdl.org
  Reporter: bvanass...@acm.org
Regression: No

When sending network packets at a high rate between KVM guests I either run
into bug 60505 or into the call trace below:

BUG: unable to handle kernel NULL pointer dereference at 001c
IP: [] put_compound_page+0x89/0x170
PGD 0 
Oops:  [#1] SMP 
Modules linked in: dm_queue_length dm_multipath rdma_ucm rdma_cm iw_cm ib_addr
ib_srp scsi_transport_srp scsi_tgt ib_ipoib ib_cm ib_uverbs ib_umad mlx4_en
mlx4_ib ib_sa ib_mad ib_core mlx4_core libcrc32c crc32c_intel nls_utf8
iscsi_tcp libisc rt_iscsi vhost_net tun fuse ip6table_filter ip6_tables
iptable_filter ip_tables ebtable_nat ebtables x_tables af_packet bridge stp llc
dm_mod hid_generic usbhid hid snd_hda_codec_hdmi snd_hda_codec_realtek
snd_hda_intel snd_hda_codec acpi_cpuf vm_intel snd_pcm cdrom pcspkr kvm snd_seq
sg snd_timer snd_seq_device snd r8169 ehci_pci ehci_hcd i2c_i801 soundcore wmi
snd_page_alloc mii microcode autofs4 ext4 jbd2 mbcache crc16 raid456
async_raid6_recov async_pq raid6_pq async_xor xor asy id0 raid1 sd_mod
crc_t10dif ahci xhci_hcd libahci i915 drm_kms_helper drm intel_agp i2c_algo_bit
intel_gtt agpgart usbcore i2c_core video usb_common button processor
thermal_sys hwmon scsi_dh_alua scsi_dh pata_acpi libata scsi_mod [last unload 
CPU 3 
Pid: 30508, comm: vhost-30506 Tainted: G   O 3.9.8+ #1 Gigabyte
Technology Co., Ltd. Z68X-UD3H-B3/Z68X-UD3H-B3
RIP: 0010:[]  []
put_compound_page+0x89/0x170
RSP: 0018:880036887bc8  EFLAGS: 00010286
RAX: 8800cdb8ae00 RBX: 8800cdb8a800 RCX: 8800cdb8b6c0
RDX: 0140 RSI: 0001 RDI: 8800cdb8a800
RBP: 880036887be8 R08: 88005ddb4518 R09: 0010
R10:  R11: 7f6f R12: 
R13: a079896c R14: 91aa R15: 880115200900
FS:  () GS:88011fac() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 001c CR3: 9f2ff000 CR4: 000427e0
DR0:  DR1:  DR2: 
DR3:  DR6: 0ff0 DR7: 0400
Process vhost-30506 (pid: 30508, threadinfo 880036886000, task
8801088cc020)
Stack:
ea00016fbfc0 0012 880115200900 a079896c
880036887bf8 810f77ec 880036887c18 8132046f
880115200900 880115200900 880036887c38 8132050e
Call Trace:
[] put_page+0x2c/0x40
[] skb_release_data+0x8f/0x110
[] __kfree_skb+0x1e/0xa0
[] kfree_skb+0x36/0xa0
[] tun_get_user+0x71c/0x810 [tun]
[] tun_sendmsg+0x5a/0x80 [tun]
[] handle_tx+0x287/0x680 [vhost_net]
[] handle_tx_kick+0x15/0x20 [vhost_net]
[] vhost_worker+0xfa/0x1a0 [vhost_net]
[] kthread+0xc0/0xd0
[] ret_from_fork+0x7c/0xb0
Code: 8b 6d f8 c9 c3 48 8b 07 f6 c4 80 75 0d f0 ff 4b 1c 0f 94 c0 84 c0 74 c9
eb bf 4c 8b 67 30 48 8b 07 f6 c4 80 74 e7 4c 39 e7 74 e2 <41> 8b 54 24 1c 49 8d
4c 24 1c 85 d2 74 d4 8d 72 01 89 d0 f0 0f 
RIP  [] put_compound_page+0x89/0x170
RSP 
CR2: 001c
---[ end trace 97d0c820ff95bb21 ]---

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [libvirt-users] libvirt & virtio_net - host.freeze@reset.domain

2013-07-05 Thread poma
On 04.07.2013 11:14, poma wrote:
> On 03.07.2013 13:43, Daniel P. Berrange wrote:
>> On Tue, Jul 02, 2013 at 01:25:21PM +0200, poma wrote:
>>> Hello people,
>>>
>>> libvirtd (libvirt) 1.0.5.2
>>> virsh 1.0.5.2
>>> virt-manager 0.10.0
>>>
>>> Host:
>>> Linux localhost 3.9.8-300.fc19.x86_64 #1 SMP Thu Jun 27 19:24:23 UTC
>>> 2013 x86_64 x86_64 x86_64 GNU/Linux
>>> Guest1:
>>> Linux localhost 3.9.8-300.fc19.i686.PAE #1 SMP Thu Jun 27 19:29:30 UTC
>>> 2013 i686 (none)
>>> Guest2:
>>> Linux localhost 3.9.8-300.fc19.x86_64 #1 SMP Thu Jun 27 19:24:23 UTC
>>> 2013 x86_64 x86_64 x86_64 GNU/Linux
>>>
>>>
>>> Virtual NIC - source & model:
>>> macvtap/NAT/bridge & virtio(virtio_net)
>>>
>>> Host freeze at "virsh reset " or "virt-manager - Force Reset"
>>> Need kernel.sysrq or power reset.
>>
>> I don't believe this is a libvirt issue - the 'virsh reset' command
>> will issue the 'system_reset' QEMU monitor command. This in turn
>> does an immediate reset of the guest CPUs/machine.
>>
>> Even if QEMU is doing the wrong thing, the kernel should obviously
>> never freeze/crash in this way - it should be robust against a
>> malicious QEMU process.
>>
>> You should probably send this message to the main QEMU and/or KVM
>> mailing lists so that it comes to the attention of people who are
>> more familiar with QEMU + virtio-net
>>
>>
>> Regards,
>> Daniel
>>
> 
> Thanks for your response.
> Mateusz hit the same issue[1] as well.
> OK, here we go.
> 
> 
> poma
> 
> 
> [1] https://lists.fedoraproject.org/pipermail/users/2013-July/436984.html

OK, is this a side effect or not, but certainly kernel[1] with
'bridge-timer-fix.patch'[2] resolves issue aforementioned, so far.
Thanks Cong, Josh.


poma


[1] http://koji.fedoraproject.org/koji/taskinfo?taskID=5569632
kernel-3.9.8-300.7.fc19.x86_64.rpm
[2] https://bugzilla.redhat.com/show_bug.cgi?id=880035#c53

Ref.
"fix for unreliable guest->host multicast triggers oops"
https://bugzilla.redhat.com/show_bug.cgi?id=980254

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] vhost: Avoid that vhost_work_flush() locks up

2013-07-05 Thread Bart Van Assche
Wake up work->done waiters even if the TIF_NEED_RESCHED task flag
has been set. This patch fixes a regression introduced in commit
d550dda (kernel v3.4).

Signed-off-by: Bart Van Assche 
Reference: https://bugzilla.kernel.org/show_bug.cgi?id=60505
Cc: Michael S. Tsirkin 
Cc: Asias He 
Cc: Nadav Har'El 
Cc: Abel Gordon 
Cc:  # v3.4+
---
 drivers/vhost/vhost.c |   10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 60aa5ad..cd544ae 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -227,8 +227,16 @@ static int vhost_worker(void *data)
if (work) {
__set_current_state(TASK_RUNNING);
work->fn(work);
-   if (need_resched())
+   if (need_resched()) {
+   spin_lock_irq(&dev->work_lock);
+   work->done_seq = seq;
+   if (work->flushing)
+   wake_up_all(&work->done);
+   spin_unlock_irq(&dev->work_lock);
+
+   work = NULL;
schedule();
+   }
} else
schedule();
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html