Hello, I was trying to enable the use of nmi watchdog within a linux guest running in kvm. I have done it by allowing direct access to perfmon msrs using the MSR_BITMAP field in vmcs region.
Most of the times the NMI Watchdog Test in the guest fails, but with a finite number of NMI's received by the guest. But randomly it does work! Whenever it fails, i get this vmwrite error : vmwrite error: reg 4016 value 80000202 (err 164061) I have a few questions. 1. How are NMI's supposed to be delivered to the guest ? I did this by adding a new op to kvm_x86_ops. 2. How am I supposed to handle perfmon MSRs ? Direct access may pose problems during migration. But am not sure how costly emulation by abstraction would be.. I have not yet considered saving the MSRS upon vmexits to allow multiple VMs use the MSRs. I think i can do them easily when i get this working. Here's the code. Please tell me what dumb mistake I am doing. diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index c02541e..276048a 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -342,7 +342,7 @@ static const struct wd_ops k7_wd_ops = { #define P6_EVNTSEL_INT (1 << 20) #define P6_EVNTSEL_OS (1 << 17) #define P6_EVNTSEL_USR (1 << 16) -#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 +#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x3C #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED static int setup_p6_watchdog(unsigned nmi_hz) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 2cbee94..73e9361 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -25,6 +25,8 @@ #include <linux/hrtimer.h> #include <linux/io.h> #include <linux/module.h> +#include <linux/kdebug.h> +#include <linux/notifier.h> #include <asm/processor.h> #include <asm/msr.h> #include <asm/page.h> @@ -740,9 +742,12 @@ static void apic_mmio_write(struct kvm_io_device *this, apic_set_reg(apic, APIC_ICR2, val & 0xff000000); break; + case APIC_LVTPC: + /* Enable PC NMI*/ + if (val == APIC_DM_NMI) + apic_write(APIC_LVTPC,val); case APIC_LVTT: case APIC_LVTTHMR: - case APIC_LVTPC: case APIC_LVT0: case APIC_LVT1: case APIC_LVTERR: @@ -790,6 +795,18 @@ static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr) return ret; } +static int nmi_notify(struct notifier_block *self,unsigned long val, void *data) { + + struct kvm *kvm; + kvm = list_entry(vm_list.next, struct kvm, vm_list); + kvm_x86_ops->inject_nmi(kvm->vcpus[0]); + return NOTIFY_STOP; +} + +static struct notifier_block nmi_notifier = { + .notifier_call = nmi_notify, +}; + void kvm_free_lapic(struct kvm_vcpu *vcpu) { if (!vcpu->arch.apic) @@ -801,6 +818,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu) __free_page(vcpu->arch.apic->regs_page); kfree(vcpu->arch.apic); + unregister_die_notifier(&nmi_notifier); } /* @@ -1005,6 +1023,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) apic->dev.in_range = apic_mmio_range; apic->dev.private = apic; + register_die_notifier(&nmi_notifier); return 0; nomem_free_apic: kfree(apic); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 00a00e4..fcffab1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -89,6 +89,7 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs); static struct page *vmx_io_bitmap_a; static struct page *vmx_io_bitmap_b; +static struct page *vmx_msr_bitmap; static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); static DEFINE_SPINLOCK(vmx_vpid_lock); @@ -982,7 +983,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) u32 _vmexit_control = 0; u32 _vmentry_control = 0; - min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; + min = PIN_BASED_EXT_INTR_MASK; opt = 0; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, &_pin_based_exec_control) < 0) @@ -994,8 +995,10 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) CPU_BASED_CR8_STORE_EXITING | #endif CPU_BASED_USE_IO_BITMAPS | + CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_MOV_DR_EXITING | CPU_BASED_USE_TSC_OFFSETING; + opt = CPU_BASED_TPR_SHADOW | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, @@ -1568,6 +1571,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a)); vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b)); + /* MSR BITMAP */ + vmcs_write64(MSR_BITMAP, page_to_phys(vmx_msr_bitmap)); + vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ /* Control */ @@ -1786,6 +1792,14 @@ out: return ret; } +static void vmx_inject_nmi(struct kvm_vcpu *vcpu) { + + struct vcpu_vmx * vmx = to_vmx(vcpu); + if (vmx->launched) + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, + 2 | INTR_TYPE_NMI | INTR_INFO_VALID_MASK); +} + static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -2686,6 +2700,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .exception_injected = vmx_exception_injected, .inject_pending_irq = vmx_intr_assist, .inject_pending_vectors = do_interrupt_requests, + .inject_nmi = vmx_inject_nmi, .set_tss_addr = vmx_set_tss_addr, }; @@ -2700,7 +2715,11 @@ static int __init vmx_init(void) return -ENOMEM; vmx_io_bitmap_b = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); - if (!vmx_io_bitmap_b) { + if (!vmx_io_bitmap_b) + r = -ENOMEM; + + vmx_msr_bitmap = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + if (!vmx_msr_bitmap) { r = -ENOMEM; goto out; } @@ -2718,6 +2737,15 @@ static int __init vmx_init(void) memset(iova, 0xff, PAGE_SIZE); kunmap(vmx_io_bitmap_b); + iova = kmap(vmx_msr_bitmap); + memset(iova, 0xff, PAGE_SIZE); + /* Enable direct access to first perfmon MSR */ + clear_bit(MSR_P6_PERFCTR0, iova); + clear_bit(MSR_P6_EVNTSEL0, iova); + clear_bit(MSR_P6_PERFCTR0, iova + 2048); + clear_bit(MSR_P6_EVNTSEL0, iova + 2048); + kunmap(vmx_msr_bitmap); + set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE); @@ -2730,8 +2758,9 @@ static int __init vmx_init(void) return 0; out1: - __free_page(vmx_io_bitmap_b); + __free_page(vmx_msr_bitmap); out: + __free_page(vmx_io_bitmap_b); __free_page(vmx_io_bitmap_a); return r; } @@ -2740,6 +2769,7 @@ static void __exit vmx_exit(void) { __free_page(vmx_io_bitmap_b); __free_page(vmx_io_bitmap_a); + __free_page(vmx_msr_bitmap); kvm_exit(); } diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index 436ce0f..1b6d6a8 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h @@ -242,6 +242,7 @@ enum vmcs_field { #define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK #define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ +#define INTR_TYPE_NMI (2 << 8) #define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */ #define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 67ae307..f17248d 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -387,6 +387,7 @@ struct kvm_x86_ops { void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, bool has_error_code, u32 error_code); bool (*exception_injected)(struct kvm_vcpu *vcpu); + void (*inject_nmi)(struct kvm_vcpu *vcpu); void (*inject_pending_irq)(struct kvm_vcpu *vcpu); void (*inject_pending_vectors)(struct kvm_vcpu *vcpu, struct kvm_run *run); --- thanks, balaji rao ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ _______________________________________________ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel