[PATCH 3/4] AER-KVM: Integration of KVM with AER for PCI pass-thru devices

2012-11-19 Thread Pandarathil, Vijaymohan R

- Register a notifier function to be called whenever a PCIe error is
detected by the AER subsystem.

- The notifier function bumps up a global count to keep track of the
error notifications.

- Before guest entry, each vcpu checks if there has been any new
notifications since last check. If any, check if the device impacted
is assigned to the guest. If impacted, return to qemu requesting that
the guest be brought down. If no device assigned to the guest is impacted,
sync up the per guest notified count to the global value.

- At guest start time, check if any of the PCI devices assigned to the
guest is faulty and if so, fail the guest startup.

Signed-off-by: Vijay Mohan Pandarathil 
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/x86.c  | 44 +
 include/linux/kvm_host.h|  4 
 include/uapi/linux/kvm.h|  1 +
 virt/kvm/assigned-dev.c | 34 +++
 virt/kvm/kvm_main.c | 34 +++
 6 files changed, 118 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b2e11f4..481ad94 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -951,6 +951,7 @@ enum {
  */
 asmlinkage void kvm_spurious_fault(void);
 extern bool kvm_rebooting;
+extern unsigned long kvm_aer_notified_cnt;
 
 #define kvm_handle_fault_on_reboot(insn, cleanup_insn) \
"666: " insn "\n\t" \
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4f76417..87e3c3e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5235,6 +5235,32 @@ static void process_nmi(struct kvm_vcpu *vcpu)
kvm_make_request(KVM_REQ_EVENT, vcpu);
 }
 
+/*
+ * This function checks if KVM has been notified of any PCI error since last
+ * checked by this guest. If so, it checks if any PCI device assigned to this
+ * guest has got the error. If not, adjust the per guest notified_cnt to match
+ * the global kvm notified_cnt
+ */
+static inline int kvm_aer_exit(struct kvm *kvm)
+{
+   if (kvm_aer_notified_cnt == kvm->aer_notified_cnt)
+   return 0;
+
+   /*
+* These errors are expected to be very rare. In the case
+* of an error notification, multiple vcpu threads could reach
+* here and do the device check below. However, functionally
+* it shouldn't cause a problem.
+*/
+   if (kvm_find_assigned_dev_err(kvm)) {
+   return 1;
+   } else {
+   spin_lock(>aer_lock);
+   kvm->aer_notified_cnt = kvm_aer_notified_cnt;
+   spin_unlock(>aer_lock);
+   return 0;
+   }
+}
 static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 {
int r;
@@ -5334,6 +5360,24 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto cancel_injection;
}
 
+   /*
+* If any of the PCI devices assigned to a guest is reported to have
+* uncorrected error, do not allow guest code to execute, instead
+* bring down the guest to contain the error. Note that there is a
+* small window here where a new error notification could come in while
+* while the check is being done or right after the check before the cpu
+* enters the guest mode. Not sure if this check needs to be after
+* kvm_guest_enter() ?
+*/
+   if (kvm_aer_exit(vcpu->kvm)) {
+   vcpu->mode = OUTSIDE_GUEST_MODE;
+   smp_wmb();
+   local_irq_enable();
+   preempt_enable();
+   r = 0;
+   vcpu->run->exit_reason = KVM_EXIT_AER_SHUTDOWN;
+   goto cancel_injection;
+   }
srcu_read_unlock(>kvm->srcu, vcpu->srcu_idx);
 
if (req_immediate_exit)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ecc5543..b3c2730 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -364,6 +364,8 @@ struct kvm {
long mmu_notifier_count;
 #endif
long tlbs_dirty;
+   spinlock_t aer_lock;
+   unsigned long aer_notified_cnt;
 };
 
 #define kvm_err(fmt, ...) \
@@ -933,6 +935,8 @@ static inline long kvm_vm_ioctl_assigned_device(struct kvm 
*kvm, unsigned ioctl,
 
 #endif
 
+int kvm_find_assigned_dev_err(struct kvm *kvm);
+
 static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
 {
set_bit(req, >requests);
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 0a6d6ba..6263c21 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -167,6 +167,7 @@ struct kvm_pit_config {
 #define KVM_EXIT_OSI  18
 #define KVM_EXIT_PAPR_HCALL  19
 #define KVM_EXIT_S390_UCONTROL   20
+#define KVM_EXIT_AER_SHUTDOWN 21
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 #define KVM_INTERNAL_ERROR_EMULATION 1
diff --git a/virt/kvm/assigned-dev.c 

[PATCH 3/4] AER-KVM: Integration of KVM with AER for PCI pass-thru devices

2012-11-19 Thread Pandarathil, Vijaymohan R

- Register a notifier function to be called whenever a PCIe error is
detected by the AER subsystem.

- The notifier function bumps up a global count to keep track of the
error notifications.

- Before guest entry, each vcpu checks if there has been any new
notifications since last check. If any, check if the device impacted
is assigned to the guest. If impacted, return to qemu requesting that
the guest be brought down. If no device assigned to the guest is impacted,
sync up the per guest notified count to the global value.

- At guest start time, check if any of the PCI devices assigned to the
guest is faulty and if so, fail the guest startup.

Signed-off-by: Vijay Mohan Pandarathil vijaymohan.pandarat...@hp.com
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/x86.c  | 44 +
 include/linux/kvm_host.h|  4 
 include/uapi/linux/kvm.h|  1 +
 virt/kvm/assigned-dev.c | 34 +++
 virt/kvm/kvm_main.c | 34 +++
 6 files changed, 118 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b2e11f4..481ad94 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -951,6 +951,7 @@ enum {
  */
 asmlinkage void kvm_spurious_fault(void);
 extern bool kvm_rebooting;
+extern unsigned long kvm_aer_notified_cnt;
 
 #define kvm_handle_fault_on_reboot(insn, cleanup_insn) \
666:  insn \n\t \
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4f76417..87e3c3e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5235,6 +5235,32 @@ static void process_nmi(struct kvm_vcpu *vcpu)
kvm_make_request(KVM_REQ_EVENT, vcpu);
 }
 
+/*
+ * This function checks if KVM has been notified of any PCI error since last
+ * checked by this guest. If so, it checks if any PCI device assigned to this
+ * guest has got the error. If not, adjust the per guest notified_cnt to match
+ * the global kvm notified_cnt
+ */
+static inline int kvm_aer_exit(struct kvm *kvm)
+{
+   if (kvm_aer_notified_cnt == kvm-aer_notified_cnt)
+   return 0;
+
+   /*
+* These errors are expected to be very rare. In the case
+* of an error notification, multiple vcpu threads could reach
+* here and do the device check below. However, functionally
+* it shouldn't cause a problem.
+*/
+   if (kvm_find_assigned_dev_err(kvm)) {
+   return 1;
+   } else {
+   spin_lock(kvm-aer_lock);
+   kvm-aer_notified_cnt = kvm_aer_notified_cnt;
+   spin_unlock(kvm-aer_lock);
+   return 0;
+   }
+}
 static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 {
int r;
@@ -5334,6 +5360,24 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto cancel_injection;
}
 
+   /*
+* If any of the PCI devices assigned to a guest is reported to have
+* uncorrected error, do not allow guest code to execute, instead
+* bring down the guest to contain the error. Note that there is a
+* small window here where a new error notification could come in while
+* while the check is being done or right after the check before the cpu
+* enters the guest mode. Not sure if this check needs to be after
+* kvm_guest_enter() ?
+*/
+   if (kvm_aer_exit(vcpu-kvm)) {
+   vcpu-mode = OUTSIDE_GUEST_MODE;
+   smp_wmb();
+   local_irq_enable();
+   preempt_enable();
+   r = 0;
+   vcpu-run-exit_reason = KVM_EXIT_AER_SHUTDOWN;
+   goto cancel_injection;
+   }
srcu_read_unlock(vcpu-kvm-srcu, vcpu-srcu_idx);
 
if (req_immediate_exit)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ecc5543..b3c2730 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -364,6 +364,8 @@ struct kvm {
long mmu_notifier_count;
 #endif
long tlbs_dirty;
+   spinlock_t aer_lock;
+   unsigned long aer_notified_cnt;
 };
 
 #define kvm_err(fmt, ...) \
@@ -933,6 +935,8 @@ static inline long kvm_vm_ioctl_assigned_device(struct kvm 
*kvm, unsigned ioctl,
 
 #endif
 
+int kvm_find_assigned_dev_err(struct kvm *kvm);
+
 static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
 {
set_bit(req, vcpu-requests);
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 0a6d6ba..6263c21 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -167,6 +167,7 @@ struct kvm_pit_config {
 #define KVM_EXIT_OSI  18
 #define KVM_EXIT_PAPR_HCALL  19
 #define KVM_EXIT_S390_UCONTROL   20
+#define KVM_EXIT_AER_SHUTDOWN 21
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 #define KVM_INTERNAL_ERROR_EMULATION 1
diff --git