[COMMIT master] apic: test nmi-after-sti
From: Avi Kivity While not required by the spec, some guests (Linux) rely on nmi being blocked by an IF-enabling sti. Add a unit test for this condition. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti diff --git a/x86/apic.c b/x86/apic.c index 165f820..2207040 100644 --- a/x86/apic.c +++ b/x86/apic.c @@ -1,6 +1,7 @@ #include "libcflat.h" #include "apic.h" #include "vm.h" +#include "smp.h" typedef struct { unsigned short offset0; @@ -274,9 +275,74 @@ static void test_ioapic_simultaneous(void) g_66 && g_78 && g_66_after_78 && g_66_rip == g_78_rip); } +volatile int nmi_counter_private, nmi_counter, nmi_hlt_counter, sti_loop_active; + +void sti_nop(char *p) +{ +asm volatile ( + ".globl post_sti \n\t" + "sti \n" + /* + * vmx won't exit on external interrupt if blocked-by-sti, + * so give it a reason to exit by accessing an unmapped page. + */ + "post_sti: testb $0, %0 \n\t" + "nop \n\t" + "cli" + : : "m"(*p) + ); +nmi_counter = nmi_counter_private; +} + +static void sti_loop(void *ignore) +{ +unsigned k = 0; + +while (sti_loop_active) { + sti_nop((char *)(ulong)((k++ * 4096) % (128 * 1024 * 1024))); +} +} + +static void nmi_handler(isr_regs_t *regs) +{ +extern void post_sti(void); +++nmi_counter_private; +nmi_hlt_counter += regs->rip == (ulong)post_sti; +} + +static void update_cr3(void *cr3) +{ +write_cr3((ulong)cr3); +} + +static void test_sti_nmi(void) +{ +unsigned old_counter; + +if (cpu_count() < 2) { + return; +} + +set_idt_entry(2, nmi_handler); +on_cpu(1, update_cr3, (void *)read_cr3()); + +sti_loop_active = 1; +on_cpu_async(1, sti_loop, 0); +while (nmi_counter < 3) { + old_counter = nmi_counter; + apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI | APIC_INT_ASSERT, 1); + while (nmi_counter == old_counter) { + ; + } +} +sti_loop_active = 0; +report("nmi-after-sti", nmi_hlt_counter == 0); +} + int main() { setup_vm(); +smp_init(); test_lapic_existence(); @@ -288,6 +354,7 @@ int main() test_ioapic_intr(); test_ioapic_simultaneous(); +test_sti_nmi(); printf("\nsummary: %d tests, %d failures\n", g_tests, g_fail); -- To unsubscribe from this list: send the line "unsubscribe kvm-commits" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] apic: use boot idt instead of a locally allocated idt
From: Avi Kivity This allows the smp support, which uses the boot idt, to work. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti diff --git a/x86/apic.c b/x86/apic.c index 48fa0f7..165f820 100644 --- a/x86/apic.c +++ b/x86/apic.c @@ -89,7 +89,7 @@ asm ( #endif ); -static idt_entry_t idt[256]; +static idt_entry_t *idt = 0; static int g_fail; static int g_tests; @@ -127,19 +127,6 @@ void test_enable_x2apic(void) } } -static void init_idt(void) -{ -struct { -u16 limit; -ulong idt; -} __attribute__((packed)) idt_ptr = { -sizeof(idt_entry_t) * 256 - 1, -(ulong)&idt, -}; - -asm volatile("lidt %0" : : "m"(idt_ptr)); -} - static void set_idt_entry(unsigned vec, void (*func)(isr_regs_t *regs)) { u8 *thunk = vmalloc(50); @@ -296,7 +283,6 @@ int main() mask_pic_interrupts(); enable_apic(); test_enable_x2apic(); -init_idt(); test_self_ipi(); -- To unsubscribe from this list: send the line "unsubscribe kvm-commits" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: fast-path msi injection with irqfd
From: Michael S. Tsirkin Store irq routing table pointer in the irqfd object, and use that to inject MSI directly without bouncing out to a kernel thread. While we touch this structure, rearrange irqfd fields to make fastpath better packed for better cache utilization. This also adds some comments about locking rules and rcu usage in code. Some notes on the design: - Use pointer into the rt instead of copying an entry, to make it possible to use rcu, thus side-stepping locking complexities. We also save some memory this way. - Old workqueue code is still used for level irqs. I don't think we DTRT with level anyway, however, it seems easier to keep the code around as it has been thought through and debugged, and fix level later than rip out and re-instate it later. Signed-off-by: Michael S. Tsirkin Acked-by: Marcelo Tosatti Acked-by: Gregory Haskins Signed-off-by: Avi Kivity diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4bd663d..f17beae 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -240,6 +241,10 @@ struct kvm { struct mutex irq_lock; #ifdef CONFIG_HAVE_KVM_IRQCHIP + /* +* Update side is protected by irq_lock and, +* if configured, irqfds.lock. +*/ struct kvm_irq_routing_table __rcu *irq_routing; struct hlist_head mask_notifier_list; struct hlist_head irq_ack_notifier_list; @@ -511,6 +516,8 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, unsigned long *deliver_bitmask); #endif int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level); +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, + int irq_source_id, int level); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); @@ -652,17 +659,26 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {} void kvm_eventfd_init(struct kvm *kvm); int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags); void kvm_irqfd_release(struct kvm *kvm); +void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *); int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); #else static inline void kvm_eventfd_init(struct kvm *kvm) {} + static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) { return -EINVAL; } static inline void kvm_irqfd_release(struct kvm *kvm) {} + +static inline void kvm_irq_routing_update(struct kvm *kvm, + struct kvm_irq_routing_table *irq_rt) +{ + rcu_assign_pointer(kvm->irq_routing, irq_rt); +} + static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) { return -ENOSYS; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index c1f1e3c..2ca4535 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -44,14 +44,19 @@ */ struct _irqfd { - struct kvm *kvm; - struct eventfd_ctx *eventfd; - int gsi; - struct list_head list; - poll_tablept; - wait_queue_t wait; - struct work_structinject; - struct work_structshutdown; + /* Used for MSI fast-path */ + struct kvm *kvm; + wait_queue_t wait; + /* Update side is protected by irqfds.lock */ + struct kvm_kernel_irq_routing_entry __rcu *irq_entry; + /* Used for level IRQ fast-path */ + int gsi; + struct work_struct inject; + /* Used for setup/shutdown */ + struct eventfd_ctx *eventfd; + struct list_head list; + poll_table pt; + struct work_struct shutdown; }; static struct workqueue_struct *irqfd_cleanup_wq; @@ -125,14 +130,22 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) { struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); unsigned long flags = (unsigned long)key; + struct kvm_kernel_irq_routing_entry *irq; + struct kvm *kvm = irqfd->kvm; - if (flags & POLLIN) + if (flags & POLLIN) { + rcu_read_lock(); + irq = rcu_dereference(irqfd->irq_entry); /* An event has been signaled, inject an interrupt */ - schedule_work(&irqfd->inject); + if (irq) + kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1); + else + schedule_work(&irqfd->inject); + rcu_read_unlock(); + } if (flags & POLLHUP) { /* The eventfd is closing, detach from KVM */ - struct kvm *kvm = irqfd->kvm; unsigned long flags;
[COMMIT master] KVM: Add instruction-set-specific exit qualifications to kvm_exit trace
From: Avi Kivity The exit reason alone is insufficient to understand exactly why an exit occured; add ISA-specific trace parameters for additional information. Because fetching these parameters is expensive on vmx, and because these parameters are fetched even if tracing is disabled, we fetch the parameters via a callback instead of as traditional trace arguments. Signed-off-by: Avi Kivity diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b04c0fa..54e42c8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -594,6 +594,7 @@ struct kvm_x86_ops { void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); + void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); const struct trace_print_flags *exit_reasons_str; }; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b83954e..2fd2f4d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2974,6 +2974,14 @@ void dump_vmcb(struct kvm_vcpu *vcpu) } +static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) +{ + struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control; + + *info1 = control->exit_info_1; + *info2 = control->exit_info_2; +} + static int handle_exit(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3678,7 +3686,9 @@ static struct kvm_x86_ops svm_x86_ops = { .get_tdp_level = get_npt_level, .get_mt_mask = svm_get_mt_mask, + .get_exit_info = svm_get_exit_info, .exit_reasons_str = svm_exit_reasons_str, + .get_lpage_level = svm_get_lpage_level, .cpuid_update = svm_cpuid_update, diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 1061022..1357d7c 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -192,18 +192,22 @@ TRACE_EVENT(kvm_exit, __field(unsigned int, exit_reason ) __field(unsigned long, guest_rip ) __field(u32,isa ) + __field(u64,info1 ) + __field(u64,info2 ) ), TP_fast_assign( __entry->exit_reason= exit_reason; __entry->guest_rip = kvm_rip_read(vcpu); __entry->isa= isa; + kvm_x86_ops->get_exit_info(vcpu, &__entry->info1, + &__entry->info2); ), - TP_printk("reason %s rip 0x%lx", + TP_printk("reason %s rip 0x%lx info %llx %llx", ftrace_print_symbols_seq(p, __entry->exit_reason, kvm_x86_ops->exit_reasons_str), -__entry->guest_rip) +__entry->guest_rip, __entry->info1, __entry->info2) ); /* diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4e2b8f3..caa967e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3690,6 +3690,12 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { static const int kvm_vmx_max_exit_handlers = ARRAY_SIZE(kvm_vmx_exit_handlers); +static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) +{ + *info1 = vmcs_readl(EXIT_QUALIFICATION); + *info2 = vmcs_read32(VM_EXIT_INTR_INFO); +} + /* * The guest has exited. See if we can fix it or if we need userspace * assistance. @@ -4339,7 +4345,9 @@ static struct kvm_x86_ops vmx_x86_ops = { .get_tdp_level = get_ept_level, .get_mt_mask = vmx_get_mt_mask, + .get_exit_info = vmx_get_exit_info, .exit_reasons_str = vmx_exit_reasons_str, + .get_lpage_level = vmx_get_lpage_level, .cpuid_update = vmx_cpuid_update, -- To unsubscribe from this list: send the line "unsubscribe kvm-commits" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Record instruction set in kvm_exit tracepoint
From: Avi Kivity exit_reason's meaning depend on the instruction set; record it so a trace taken on one machine can be interpreted on another. Signed-off-by: Avi Kivity diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c6a7798..b83954e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2980,7 +2980,7 @@ static int handle_exit(struct kvm_vcpu *vcpu) struct kvm_run *kvm_run = vcpu->run; u32 exit_code = svm->vmcb->control.exit_code; - trace_kvm_exit(exit_code, vcpu); + trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK)) vcpu->arch.cr0 = svm->vmcb->save.cr0; diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index a6544b8..1061022 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -178,21 +178,26 @@ TRACE_EVENT(kvm_apic, #define trace_kvm_apic_read(reg, val) trace_kvm_apic(0, reg, val) #define trace_kvm_apic_write(reg, val) trace_kvm_apic(1, reg, val) +#define KVM_ISA_VMX 1 +#define KVM_ISA_SVM 2 + /* * Tracepoint for kvm guest exit: */ TRACE_EVENT(kvm_exit, - TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu), - TP_ARGS(exit_reason, vcpu), + TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu, u32 isa), + TP_ARGS(exit_reason, vcpu, isa), TP_STRUCT__entry( __field(unsigned int, exit_reason ) __field(unsigned long, guest_rip ) + __field(u32,isa ) ), TP_fast_assign( __entry->exit_reason= exit_reason; __entry->guest_rip = kvm_rip_read(vcpu); + __entry->isa= isa; ), TP_printk("reason %s rip 0x%lx", diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 58e5913..4e2b8f3 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3700,7 +3700,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) u32 exit_reason = vmx->exit_reason; u32 vectoring_info = vmx->idt_vectoring_info; - trace_kvm_exit(exit_reason, vcpu); + trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); /* If guest state is invalid, start emulating */ if (vmx->emulation_required && emulate_invalid_guest_state) -- To unsubscribe from this list: send the line "unsubscribe kvm-commits" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git/
From: Marcelo Tosatti Conflicts: arch/x86/kvm/svm.c kernel/sched.c Signed-off-by: Marcelo Tosatti -- To unsubscribe from this list: send the line "unsubscribe kvm-commits" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: VMX: Fold __vmx_vcpu_run() into vmx_vcpu_run()
From: Avi Kivity cea15c2 ("KVM: Move KVM context switch into own function") split vmx_vcpu_run() to prevent multiple copies of the context switch from being generated (causing problems due to a label). This patch folds them back together again and adds the __noclone attribute to prevent the label from being duplicated. Signed-off-by: Avi Kivity diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a9ad174..58e5913 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3904,17 +3904,33 @@ static void vmx_cancel_injection(struct kvm_vcpu *vcpu) #define Q "l" #endif -/* - * We put this into a separate noinline function to prevent the compiler - * from duplicating the code. This is needed because this code - * uses non local labels that cannot be duplicated. - * Do not put any flow control into this function. - * Better would be to put this whole monstrosity into a .S file. - */ -static void noinline do_vmx_vcpu_run(struct kvm_vcpu *vcpu) +static void vmx_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - asm volatile( + + /* Record the guest's net vcpu time for enforced NMI injections. */ + if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) + vmx->entry_time = ktime_get(); + + /* Don't enter VMX if guest state is invalid, let the exit handler + start emulation until we arrive back to a valid state */ + if (vmx->emulation_required && emulate_invalid_guest_state) + return; + + if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) + vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); + if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) + vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); + + /* When single-stepping over STI and MOV SS, we must clear the +* corresponding interruptibility bits in the guest state. Otherwise +* vmentry fails as it then expects bit 14 (BS) in pending debug +* exceptions being set, but that's not correct for the guest debugging +* case. */ + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) + vmx_set_interrupt_shadow(vcpu, 0); + + asm( /* Store host registers */ "push %%"R"dx; push %%"R"bp;" "push %%"R"cx \n\t" @@ -4009,35 +4025,6 @@ static void noinline do_vmx_vcpu_run(struct kvm_vcpu *vcpu) , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" #endif ); -} - -static void vmx_vcpu_run(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - /* Record the guest's net vcpu time for enforced NMI injections. */ - if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) - vmx->entry_time = ktime_get(); - - /* Don't enter VMX if guest state is invalid, let the exit handler - start emulation until we arrive back to a valid state */ - if (vmx->emulation_required && emulate_invalid_guest_state) - return; - - if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) - vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); - if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) - vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); - - /* When single-stepping over STI and MOV SS, we must clear the -* corresponding interruptibility bits in the guest state. Otherwise -* vmentry fails as it then expects bit 14 (BS) in pending debug -* exceptions being set, but that's not correct for the guest debugging -* case. */ - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - vmx_set_interrupt_shadow(vcpu, 0); - - do_vmx_vcpu_run(vcpu); vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) | (1 << VCPU_EXREG_PDPTR)); -- To unsubscribe from this list: send the line "unsubscribe kvm-commits" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: x86 emulator: drop DPRINTF()
From: Avi Kivity Failed emulation is reported via a tracepoint; the cmps printk is pointless. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index ffd6e01..3325b47 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -22,7 +22,6 @@ #include #include "kvm_cache_regs.h" -#define DPRINTF(x...) do {} while (0) #include #include @@ -2796,10 +2795,8 @@ done_prefixes: c->execute = opcode.u.execute; /* Unrecognised? */ - if (c->d == 0 || (c->d & Undefined)) { - DPRINTF("Cannot emulate %02x\n", c->b); + if (c->d == 0 || (c->d & Undefined)) return -1; - } if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack)) c->op_bytes = 8; @@ -3261,7 +3258,6 @@ special_insn: break; case 0xa6 ... 0xa7: /* cmps */ c->dst.type = OP_NONE; /* Disable writeback. */ - DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.addr.mem, c->dst.addr.mem); goto cmp; case 0xa8 ... 0xa9: /* test ax, imm */ goto test; @@ -3778,6 +3774,5 @@ twobyte_insn: goto writeback; cannot_emulate: - DPRINTF("Cannot emulate %02x\n", c->b); return -1; } -- To unsubscribe from this list: send the line "unsubscribe kvm-commits" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: x86 emulator: do not perform address calculations on linear addresses
From: Avi Kivity Linear addresses are supposed to already have segment checks performed on them; if we play with these addresses the checks become invalid. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e967055..bdbbb18 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -568,7 +568,8 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, ctxt->vcpu, NULL); if (rc != X86EMUL_CONTINUE) return rc; - rc = ops->read_std(linear(ctxt, addr) + 2, address, op_bytes, + addr.ea += 2; + rc = ops->read_std(linear(ctxt, addr), address, op_bytes, ctxt->vcpu, NULL); return rc; } -- To unsubscribe from this list: send the line "unsubscribe kvm-commits" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: x86 emulator: preserve an operand's segment identity
From: Avi Kivity Currently the x86 emulator converts the segment register associated with an operand into a segment base which is added into the operand address. This loss of information results in us not doing segment limit checks properly. Replace struct operand's addr.mem field by a segmented_address structure which holds both the effetive address and segment. This will allow us to do the limit check at the point of access. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index b36c6b3..b48c133 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -159,7 +159,10 @@ struct operand { }; union { unsigned long *reg; - unsigned long mem; + struct segmented_address { + ulong ea; + unsigned seg; + } mem; } addr; union { unsigned long val; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 3325b47..e967055 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -410,9 +410,9 @@ address_mask(struct decode_cache *c, unsigned long reg) } static inline unsigned long -register_address(struct decode_cache *c, unsigned long base, unsigned long reg) +register_address(struct decode_cache *c, unsigned long reg) { - return base + address_mask(c, reg); + return address_mask(c, reg); } static inline void @@ -444,26 +444,26 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, return ops->get_cached_segment_base(seg, ctxt->vcpu); } -static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, - struct decode_cache *c) +static unsigned seg_override(struct x86_emulate_ctxt *ctxt, +struct x86_emulate_ops *ops, +struct decode_cache *c) { if (!c->has_seg_override) return 0; - return seg_base(ctxt, ops, c->seg_override); + return c->seg_override; } -static unsigned long es_base(struct x86_emulate_ctxt *ctxt, -struct x86_emulate_ops *ops) +static ulong linear(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr) { - return seg_base(ctxt, ops, VCPU_SREG_ES); -} + struct decode_cache *c = &ctxt->decode; + ulong la; -static unsigned long ss_base(struct x86_emulate_ctxt *ctxt, -struct x86_emulate_ops *ops) -{ - return seg_base(ctxt, ops, VCPU_SREG_SS); + la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; + if (c->ad_bytes != 8) + la &= (u32)-1; + return la; } static void emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, @@ -556,7 +556,7 @@ static void *decode_register(u8 modrm_reg, unsigned long *regs, static int read_descriptor(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, - ulong addr, + struct segmented_address addr, u16 *size, unsigned long *address, int op_bytes) { int rc; @@ -564,10 +564,12 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, if (op_bytes == 2) op_bytes = 3; *address = 0; - rc = ops->read_std(addr, (unsigned long *)size, 2, ctxt->vcpu, NULL); + rc = ops->read_std(linear(ctxt, addr), (unsigned long *)size, 2, + ctxt->vcpu, NULL); if (rc != X86EMUL_CONTINUE) return rc; - rc = ops->read_std(addr + 2, address, op_bytes, ctxt->vcpu, NULL); + rc = ops->read_std(linear(ctxt, addr) + 2, address, op_bytes, + ctxt->vcpu, NULL); return rc; } @@ -760,7 +762,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, break; } } - op->addr.mem = modrm_ea; + op->addr.mem.ea = modrm_ea; done: return rc; } @@ -775,13 +777,13 @@ static int decode_abs(struct x86_emulate_ctxt *ctxt, op->type = OP_MEM; switch (c->ad_bytes) { case 2: - op->addr.mem = insn_fetch(u16, 2, c->eip); + op->addr.mem.ea = insn_fetch(u16, 2, c->eip); break; case 4: - op->addr.mem = insn_fetch(u32, 4, c->eip); + op->addr.mem.ea = insn_fetch(u32, 4, c->eip); break; case 8: - op->addr.mem = insn_fetch(u64, 8, c->eip); + op->addr.mem.ea = insn_fetch(u64, 8, c->eip); break; } done: @@ -800,7 +802,7 @@ static void fetch_bit_operand(struct decode_cache *c) else if (c->src.bytes == 4) sv = (s3
[COMMIT master] KVM: x86 emulator: drop unused #ifndef __KERNEL__
From: Avi Kivity Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 38b6e8d..ffd6e01 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -20,16 +20,9 @@ * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4 */ -#ifndef __KERNEL__ -#include -#include -#include -#define DPRINTF(_f, _a ...) printf(_f , ## _a) -#else #include #include "kvm_cache_regs.h" #define DPRINTF(x...) do {} while (0) -#endif #include #include -- To unsubscribe from this list: send the line "unsubscribe kvm-commits" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: rename hardware_[dis|en]able() to *_nolock() and add locking wrappers
From: Takuya Yoshikawa The naming convension of hardware_[dis|en]able family is little bit confusing because only hardware_[dis|en]able_all are using _nolock suffix. Renaming current hardware_[dis|en]able() to *_nolock() and using hardware_[dis|en]able() as wrapper functions which take kvm_lock for them reduces extra confusion. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0fdd911..fb93ff9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2067,7 +2067,7 @@ static struct miscdevice kvm_dev = { &kvm_chardev_ops, }; -static void hardware_enable(void *junk) +static void hardware_enable_nolock(void *junk) { int cpu = raw_smp_processor_id(); int r; @@ -2087,7 +2087,14 @@ static void hardware_enable(void *junk) } } -static void hardware_disable(void *junk) +static void hardware_enable(void *junk) +{ + spin_lock(&kvm_lock); + hardware_enable_nolock(junk); + spin_unlock(&kvm_lock); +} + +static void hardware_disable_nolock(void *junk) { int cpu = raw_smp_processor_id(); @@ -2097,13 +2104,20 @@ static void hardware_disable(void *junk) kvm_arch_hardware_disable(NULL); } +static void hardware_disable(void *junk) +{ + spin_lock(&kvm_lock); + hardware_disable_nolock(junk); + spin_unlock(&kvm_lock); +} + static void hardware_disable_all_nolock(void) { BUG_ON(!kvm_usage_count); kvm_usage_count--; if (!kvm_usage_count) - on_each_cpu(hardware_disable, NULL, 1); + on_each_cpu(hardware_disable_nolock, NULL, 1); } static void hardware_disable_all(void) @@ -2122,7 +2136,7 @@ static int hardware_enable_all(void) kvm_usage_count++; if (kvm_usage_count == 1) { atomic_set(&hardware_enable_failed, 0); - on_each_cpu(hardware_enable, NULL, 1); + on_each_cpu(hardware_enable_nolock, NULL, 1); if (atomic_read(&hardware_enable_failed)) { hardware_disable_all_nolock(); @@ -2148,16 +2162,12 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, case CPU_DYING: printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", cpu); - spin_lock(&kvm_lock); hardware_disable(NULL); - spin_unlock(&kvm_lock); break; case CPU_STARTING: printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", cpu); - spin_lock(&kvm_lock); hardware_enable(NULL); - spin_unlock(&kvm_lock); break; } return NOTIFY_OK; @@ -2188,7 +2198,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val, */ printk(KERN_INFO "kvm: exiting hardware virtualization\n"); kvm_rebooting = true; - on_each_cpu(hardware_disable, NULL, 1); + on_each_cpu(hardware_disable_nolock, NULL, 1); return NOTIFY_OK; } @@ -2358,7 +2368,7 @@ static void kvm_exit_debug(void) static int kvm_suspend(struct sys_device *dev, pm_message_t state) { if (kvm_usage_count) - hardware_disable(NULL); + hardware_disable_nolock(NULL); return 0; } @@ -2366,7 +2376,7 @@ static int kvm_resume(struct sys_device *dev) { if (kvm_usage_count) { WARN_ON(spin_is_locked(&kvm_lock)); - hardware_enable(NULL); + hardware_enable_nolock(NULL); } return 0; } @@ -2543,7 +2553,7 @@ void kvm_exit(void) sysdev_class_unregister(&kvm_sysdev_class); unregister_reboot_notifier(&kvm_reboot_notifier); unregister_cpu_notifier(&kvm_cpu_notifier); - on_each_cpu(hardware_disable, NULL, 1); + on_each_cpu(hardware_disable_nolock, NULL, 1); kvm_arch_hardware_unsetup(); kvm_arch_exit(); free_cpumask_var(cpus_hardware_enabled); -- To unsubscribe from this list: send the line "unsubscribe kvm-commits" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: VMX: Inform user about INTEL_TXT dependency
From: Shane Wang Inform user to either disable TXT in the BIOS or do TXT launch with tboot before enabling KVM since some BIOSes do not set FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX bit when TXT is enabled. Signed-off-by: Shane Wang Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0badeac..a9ad174 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1305,8 +1305,11 @@ static __init int vmx_disabled_by_bios(void) && tboot_enabled()) return 1; if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) - && !tboot_enabled()) + && !tboot_enabled()) { + printk(KERN_WARNING "kvm: disable TXT in the BIOS or " + " activate TXT before enabling KVM\n"); return 1; + } } return 0; -- To unsubscribe from this list: send the line "unsubscribe kvm-commits" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Document device assigment API
From: Jan Kiszka Adds API documentation for KVM_[DE]ASSIGN_PCI_DEVICE, KVM_[DE]ASSIGN_DEV_IRQ, KVM_SET_GSI_ROUTING, KVM_ASSIGN_SET_MSIX_NR, and KVM_ASSIGN_SET_MSIX_ENTRY. Acked-by: Alex Williamson Acked-by: Michael S. Tsirkin Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index b336266..e1a9297 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -1085,6 +1085,184 @@ of 4 instructions that make up a hypercall. If any additional field gets added to this structure later on, a bit for that additional piece of information will be set in the flags bitmap. +4.47 KVM_ASSIGN_PCI_DEVICE + +Capability: KVM_CAP_DEVICE_ASSIGNMENT +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_assigned_pci_dev (in) +Returns: 0 on success, -1 on error + +Assigns a host PCI device to the VM. + +struct kvm_assigned_pci_dev { + __u32 assigned_dev_id; + __u32 busnr; + __u32 devfn; + __u32 flags; + __u32 segnr; + union { + __u32 reserved[11]; + }; +}; + +The PCI device is specified by the triple segnr, busnr, and devfn. +Identification in succeeding service requests is done via assigned_dev_id. The +following flags are specified: + +/* Depends on KVM_CAP_IOMMU */ +#define KVM_DEV_ASSIGN_ENABLE_IOMMU(1 << 0) + +4.48 KVM_DEASSIGN_PCI_DEVICE + +Capability: KVM_CAP_DEVICE_DEASSIGNMENT +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_assigned_pci_dev (in) +Returns: 0 on success, -1 on error + +Ends PCI device assignment, releasing all associated resources. + +See KVM_CAP_DEVICE_ASSIGNMENT for the data structure. Only assigned_dev_id is +used in kvm_assigned_pci_dev to identify the device. + +4.49 KVM_ASSIGN_DEV_IRQ + +Capability: KVM_CAP_ASSIGN_DEV_IRQ +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_assigned_irq (in) +Returns: 0 on success, -1 on error + +Assigns an IRQ to a passed-through device. + +struct kvm_assigned_irq { + __u32 assigned_dev_id; + __u32 host_irq; + __u32 guest_irq; + __u32 flags; + union { + struct { + __u32 addr_lo; + __u32 addr_hi; + __u32 data; + } guest_msi; + __u32 reserved[12]; + }; +}; + +The following flags are defined: + +#define KVM_DEV_IRQ_HOST_INTX(1 << 0) +#define KVM_DEV_IRQ_HOST_MSI (1 << 1) +#define KVM_DEV_IRQ_HOST_MSIX(1 << 2) + +#define KVM_DEV_IRQ_GUEST_INTX (1 << 8) +#define KVM_DEV_IRQ_GUEST_MSI(1 << 9) +#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10) + +It is not valid to specify multiple types per host or guest IRQ. However, the +IRQ type of host and guest can differ or can even be null. + +4.50 KVM_DEASSIGN_DEV_IRQ + +Capability: KVM_CAP_ASSIGN_DEV_IRQ +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_assigned_irq (in) +Returns: 0 on success, -1 on error + +Ends an IRQ assignment to a passed-through device. + +See KVM_ASSIGN_DEV_IRQ for the data structure. The target device is specified +by assigned_dev_id, flags must correspond to the IRQ type specified on +KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed. + +4.51 KVM_SET_GSI_ROUTING + +Capability: KVM_CAP_IRQ_ROUTING +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_irq_routing (in) +Returns: 0 on success, -1 on error + +Sets the GSI routing table entries, overwriting any previously set entries. + +struct kvm_irq_routing { + __u32 nr; + __u32 flags; + struct kvm_irq_routing_entry entries[0]; +}; + +No flags are specified so far, the corresponding field must be set to zero. + +struct kvm_irq_routing_entry { + __u32 gsi; + __u32 type; + __u32 flags; + __u32 pad; + union { + struct kvm_irq_routing_irqchip irqchip; + struct kvm_irq_routing_msi msi; + __u32 pad[8]; + } u; +}; + +/* gsi routing entry types */ +#define KVM_IRQ_ROUTING_IRQCHIP 1 +#define KVM_IRQ_ROUTING_MSI 2 + +No flags are specified so far, the corresponding field must be set to zero. + +struct kvm_irq_routing_irqchip { + __u32 irqchip; + __u32 pin; +}; + +struct kvm_irq_routing_msi { + __u32 address_lo; + __u32 address_hi; + __u32 data; + __u32 pad; +}; + +4.52 KVM_ASSIGN_SET_MSIX_NR + +Capability: KVM_CAP_DEVICE_MSIX +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_assigned_msix_nr (in) +Returns: 0 on success, -1 on error + +Set the number of MSI-X interrupts for an assigned device. This service can +only be called once in the lifetime of an assigned device. + +struct kvm_assigned_msix_nr { + __u32 assigned_dev_id; + __u16 entry_nr; + __u16 padding; +}; + +#define KVM_MAX_MSIX_PER_DEV 256 + +4.53 KVM_ASSIGN_SET_MSIX_ENTRY + +Capability: KVM_CAP_DEVICE_MSIX +Architectures: x86 ia64 +Type: v
[COMMIT master] KVM: MMU: don't mark spte notrap if reserved bit set
From: Xiao Guangrong If reserved bit is set, we need inject the #PF with PFEC.RSVD=1, but shadow_notrap_nonpresent_pte injects #PF with PFEC.RSVD=0 only Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index ba00eef..590bf12 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -395,8 +395,10 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, gpte = gptep[i]; - if (!is_present_gpte(gpte) || - is_rsvd_bits_set(mmu, gpte, PT_PAGE_TABLE_LEVEL)) { + if (is_rsvd_bits_set(mmu, gpte, PT_PAGE_TABLE_LEVEL)) + continue; + + if (!is_present_gpte(gpte)) { if (!sp->unsync) __set_spte(spte, shadow_notrap_nonpresent_pte); continue; @@ -760,6 +762,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, pt_element_t gpte; gpa_t pte_gpa; gfn_t gfn; + bool rsvd_bits_set; if (!is_shadow_present_pte(sp->spt[i])) continue; @@ -771,12 +774,14 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, return -EINVAL; gfn = gpte_to_gfn(gpte); - if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL) - || gfn != sp->gfns[i] || !is_present_gpte(gpte) - || !(gpte & PT_ACCESSED_MASK)) { + rsvd_bits_set = is_rsvd_bits_set(&vcpu->arch.mmu, gpte, +PT_PAGE_TABLE_LEVEL); + if (rsvd_bits_set || gfn != sp->gfns[i] || + !is_present_gpte(gpte) || !(gpte & PT_ACCESSED_MASK)) { u64 nonpresent; - if (is_present_gpte(gpte) || !clear_unsync) + if (rsvd_bits_set || is_present_gpte(gpte) || + !clear_unsync) nonpresent = shadow_trap_nonpresent_pte; else nonpresent = shadow_notrap_nonpresent_pte; -- To unsubscribe from this list: send the line "unsubscribe kvm-commits" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: take kvm_lock for hardware_disable() during cpu hotplug
From: Takuya Yoshikawa In kvm_cpu_hotplug(), only CPU_STARTING case is protected by kvm_lock. This patch adds missing protection for CPU_DYING case. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 339dd43..0fdd911 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2148,7 +2148,9 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, case CPU_DYING: printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", cpu); + spin_lock(&kvm_lock); hardware_disable(NULL); + spin_unlock(&kvm_lock); break; case CPU_STARTING: printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", -- To unsubscribe from this list: send the line "unsubscribe kvm-commits" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Refactor IRQ names of assigned devices
From: Jan Kiszka Cosmetic change, but it helps to correlate IRQs with PCI devices. Acked-by: Alex Williamson Acked-by: Michael S. Tsirkin Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9fe7fef..4bd663d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -489,6 +489,7 @@ struct kvm_assigned_dev_kernel { struct pci_dev *dev; struct kvm *kvm; spinlock_t intx_lock; + char irq_name[32]; }; struct kvm_irq_mask_notifier { diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 1d77ce1..7623408 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -231,8 +231,7 @@ static int assigned_device_enable_host_intx(struct kvm *kvm, * are going to be long delays in accepting, acking, etc. */ if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, -IRQF_ONESHOT, "kvm_assigned_intx_device", -(void *)dev)) +IRQF_ONESHOT, dev->irq_name, (void *)dev)) return -EIO; return 0; } @@ -251,7 +250,7 @@ static int assigned_device_enable_host_msi(struct kvm *kvm, dev->host_irq = dev->dev->irq; if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, -0, "kvm_assigned_msi_device", (void *)dev)) { +0, dev->irq_name, (void *)dev)) { pci_disable_msi(dev->dev); return -EIO; } @@ -278,8 +277,7 @@ static int assigned_device_enable_host_msix(struct kvm *kvm, for (i = 0; i < dev->entries_nr; i++) { r = request_threaded_irq(dev->host_msix_entries[i].vector, NULL, kvm_assigned_dev_thread, -0, "kvm_assigned_msix_device", -(void *)dev); +0, dev->irq_name, (void *)dev); if (r) goto err; } @@ -336,6 +334,9 @@ static int assign_host_irq(struct kvm *kvm, if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) return r; + snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s", +pci_name(dev->dev)); + switch (host_irq_type) { case KVM_DEV_IRQ_HOST_INTX: r = assigned_device_enable_host_intx(kvm, dev); -- To unsubscribe from this list: send the line "unsubscribe kvm-commits" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Save/restore state of assigned PCI device
From: Jan Kiszka The guest may change states that pci_reset_function does not touch. So we better save/restore the assigned device across guest usage. Acked-by: Alex Williamson Acked-by: Michael S. Tsirkin Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 7623408..d389207 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -197,7 +197,8 @@ static void kvm_free_assigned_device(struct kvm *kvm, { kvm_free_assigned_irq(kvm, assigned_dev); - pci_reset_function(assigned_dev->dev); + __pci_reset_function(assigned_dev->dev); + pci_restore_state(assigned_dev->dev); pci_release_regions(assigned_dev->dev); pci_disable_device(assigned_dev->dev); @@ -514,6 +515,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, } pci_reset_function(dev); + pci_save_state(dev); match->assigned_dev_id = assigned_dev->assigned_dev_id; match->host_segnr = assigned_dev->segnr; @@ -544,6 +546,7 @@ out: mutex_unlock(&kvm->lock); return r; out_list_del: + pci_restore_state(dev); list_del(&match->list); pci_release_regions(dev); out_disable: -- To unsubscribe from this list: send the line "unsubscribe kvm-commits" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Clean up kvm_vm_ioctl_assigned_device
From: Jan Kiszka Any arch not supporting device assigment will also not build assigned-dev.c. So testing for KVM_CAP_DEVICE_DEASSIGNMENT is pointless. KVM_CAP_ASSIGN_DEV_IRQ is unconditinally set. Moreover, add a default case for dispatching the ioctl. Acked-by: Alex Williamson Acked-by: Michael S. Tsirkin Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index d389207..ae72ae6 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -674,7 +674,7 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, unsigned long arg) { void __user *argp = (void __user *)arg; - int r = -ENOTTY; + int r; switch (ioctl) { case KVM_ASSIGN_PCI_DEVICE: { @@ -692,7 +692,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, r = -EOPNOTSUPP; break; } -#ifdef KVM_CAP_ASSIGN_DEV_IRQ case KVM_ASSIGN_DEV_IRQ: { struct kvm_assigned_irq assigned_irq; @@ -715,8 +714,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, goto out; break; } -#endif -#ifdef KVM_CAP_DEVICE_DEASSIGNMENT case KVM_DEASSIGN_PCI_DEVICE: { struct kvm_assigned_pci_dev assigned_dev; @@ -728,7 +725,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, goto out; break; } -#endif #ifdef KVM_CAP_IRQ_ROUTING case KVM_SET_GSI_ROUTING: { struct kvm_irq_routing routing; @@ -781,6 +777,9 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, break; } #endif + default: + r = -ENOTTY; + break; } out: return r; -- To unsubscribe from this list: send the line "unsubscribe kvm-commits" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: VMX: Fix host userspace gsbase corruption
From: Avi Kivity We now use load_gs_index() to load gs safely; unfortunately this also changes MSR_KERNEL_GS_BASE, which we managed separately. This resulted in confusion and breakage running 32-bit host userspace on a 64-bit kernel. Fix by - saving guest MSR_KERNEL_GS_BASE before we we reload the host's gs - doing the host save/load unconditionally, instead of only when in guest long mode Things can be cleaned up further, but this is the minmal fix for now. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9367abc..0badeac 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -821,10 +821,9 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) #endif #ifdef CONFIG_X86_64 - if (is_long_mode(&vmx->vcpu)) { - rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); + rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); + if (is_long_mode(&vmx->vcpu)) wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); - } #endif for (i = 0; i < vmx->save_nmsrs; ++i) kvm_set_shared_msr(vmx->guest_msrs[i].index, @@ -839,11 +838,14 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) ++vmx->vcpu.stat.host_state_reload; vmx->host_state.loaded = 0; +#ifdef CONFIG_X86_64 + if (is_long_mode(&vmx->vcpu)) + rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); +#endif if (vmx->host_state.gs_ldt_reload_needed) { kvm_load_ldt(vmx->host_state.ldt_sel); #ifdef CONFIG_X86_64 load_gs_index(vmx->host_state.gs_sel); - wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); #else loadsegment(gs, vmx->host_state.gs_sel); #endif @@ -852,10 +854,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) loadsegment(fs, vmx->host_state.fs_sel); reload_tss(); #ifdef CONFIG_X86_64 - if (is_long_mode(&vmx->vcpu)) { - rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); - wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); - } + wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); #endif if (current_thread_info()->status & TS_USEDFPU) clts(); -- To unsubscribe from this list: send the line "unsubscribe kvm-commits" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Clear assigned guest IRQ on release
From: Jan Kiszka When we deassign a guest IRQ, clear the potentially asserted guest line. There might be no chance for the guest to do this, specifically if we switch from INTx to MSI mode. Acked-by: Alex Williamson Acked-by: Michael S. Tsirkin Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 7c98928..ecc4419 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -141,6 +141,9 @@ static void deassign_guest_irq(struct kvm *kvm, kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); assigned_dev->ack_notifier.gsi = -1; + kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, + assigned_dev->guest_irq, 0); + if (assigned_dev->irq_source_id != -1) kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); assigned_dev->irq_source_id = -1; -- To unsubscribe from this list: send the line "unsubscribe kvm-commits" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Switch assigned device IRQ forwarding to threaded handler
From: Jan Kiszka This improves the IRQ forwarding for assigned devices: By using the kernel's threaded IRQ scheme, we can get rid of the latency-prone work queue and simplify the code in the same run. Moreover, we no longer have to hold assigned_dev_lock while raising the guest IRQ, which can be a lenghty operation as we may have to iterate over all VCPUs. The lock is now only used for synchronizing masking vs. unmasking of INTx-type IRQs, thus is renames to intx_lock. Acked-by: Alex Williamson Acked-by: Michael S. Tsirkin Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2d63f2c..9fe7fef 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -470,16 +470,8 @@ struct kvm_irq_ack_notifier { void (*irq_acked)(struct kvm_irq_ack_notifier *kian); }; -#define KVM_ASSIGNED_MSIX_PENDING 0x1 -struct kvm_guest_msix_entry { - u32 vector; - u16 entry; - u16 flags; -}; - struct kvm_assigned_dev_kernel { struct kvm_irq_ack_notifier ack_notifier; - struct work_struct interrupt_work; struct list_head list; int assigned_dev_id; int host_segnr; @@ -490,13 +482,13 @@ struct kvm_assigned_dev_kernel { bool host_irq_disabled; struct msix_entry *host_msix_entries; int guest_irq; - struct kvm_guest_msix_entry *guest_msix_entries; + struct msix_entry *guest_msix_entries; unsigned long irq_requested_type; int irq_source_id; int flags; struct pci_dev *dev; struct kvm *kvm; - spinlock_t assigned_dev_lock; + spinlock_t intx_lock; }; struct kvm_irq_mask_notifier { diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index ecc4419..1d77ce1 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -55,58 +55,31 @@ static int find_index_from_host_irq(struct kvm_assigned_dev_kernel return index; } -static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) +static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id) { - struct kvm_assigned_dev_kernel *assigned_dev; - int i; + struct kvm_assigned_dev_kernel *assigned_dev = dev_id; + u32 vector; + int index; - assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, - interrupt_work); + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_INTX) { + spin_lock(&assigned_dev->intx_lock); + disable_irq_nosync(irq); + assigned_dev->host_irq_disabled = true; + spin_unlock(&assigned_dev->intx_lock); + } - spin_lock_irq(&assigned_dev->assigned_dev_lock); if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { - struct kvm_guest_msix_entry *guest_entries = - assigned_dev->guest_msix_entries; - for (i = 0; i < assigned_dev->entries_nr; i++) { - if (!(guest_entries[i].flags & - KVM_ASSIGNED_MSIX_PENDING)) - continue; - guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING; + index = find_index_from_host_irq(assigned_dev, irq); + if (index >= 0) { + vector = assigned_dev-> + guest_msix_entries[index].vector; kvm_set_irq(assigned_dev->kvm, - assigned_dev->irq_source_id, - guest_entries[i].vector, 1); + assigned_dev->irq_source_id, vector, 1); } } else kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, assigned_dev->guest_irq, 1); - spin_unlock_irq(&assigned_dev->assigned_dev_lock); -} - -static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) -{ - unsigned long flags; - struct kvm_assigned_dev_kernel *assigned_dev = - (struct kvm_assigned_dev_kernel *) dev_id; - - spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags); - if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { - int index = find_index_from_host_irq(assigned_dev, irq); - if (index < 0) - goto out; - assigned_dev->guest_msix_entries[index].flags |= - KVM_ASSIGNED_MSIX_PENDING; - } - - schedule_work(&assigned_dev->interrupt_work); - - if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { - disable_irq_nosync(irq); - assigned_dev->host_irq_disabled = true; - } - -out: - spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags); return IRQ_HANDLED; } @@ -114,7 +8
[COMMIT master] KVM: SVM: Replace svm_has() by standard Linux cpuid accessors
From: Avi Kivity Instead of querying cpuid directly, use the Linux accessors (boot_cpu_has, etc.). This allows the things like the clearcpuid kernel command line to work (when it's fixed wrt scattered cpuid bits). Acked-by: Joerg Roedel Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1a5757a..c6a7798 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -271,11 +271,6 @@ static u32 svm_msrpm_offset(u32 msr) #define MAX_INST_SIZE 15 -static inline u32 svm_has(u32 feat) -{ - return svm_features & feat; -} - static inline void clgi(void) { asm volatile (__ex(SVM_CLGI)); @@ -381,7 +376,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, nested_svm_check_exception(svm, nr, has_error_code, error_code)) return; - if (nr == BP_VECTOR && !svm_has(SVM_FEATURE_NRIP)) { + if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) { unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu); /* @@ -677,7 +672,7 @@ static __init int svm_hardware_setup(void) svm_features = cpuid_edx(SVM_CPUID_FUNC); - if (!svm_has(SVM_FEATURE_NPT)) + if (!boot_cpu_has(X86_FEATURE_NPT)) npt_enabled = false; if (npt_enabled && !npt) { @@ -876,7 +871,7 @@ static void init_vmcb(struct vcpu_svm *svm) svm->nested.vmcb = 0; svm->vcpu.arch.hflags = 0; - if (svm_has(SVM_FEATURE_PAUSE_FILTER)) { + if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) { control->pause_filter_count = 3000; control->intercept |= (1ULL << INTERCEPT_PAUSE); } @@ -2743,7 +2738,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) svm->vmcb->save.sysenter_esp = data; break; case MSR_IA32_DEBUGCTLMSR: - if (!svm_has(SVM_FEATURE_LBRV)) { + if (!boot_cpu_has(X86_FEATURE_LBRV)) { pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", __func__, data); break; @@ -3527,7 +3522,7 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) additional features */ /* Support next_rip if host supports it */ - if (svm_has(SVM_FEATURE_NRIP)) + if (boot_cpu_has(X86_FEATURE_NRIPS)) entry->edx |= SVM_FEATURE_NRIP; /* Support NPT for the guest if enabled */ -- To unsubscribe from this list: send the line "unsubscribe kvm-commits" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Mask KVM_GET_SUPPORTED_CPUID data with Linux cpuid info
From: Avi Kivity This allows Linux to mask cpuid bits if, for example, nx is enabled on only some cpus. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 003a0ca..410d2d1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2232,6 +2232,11 @@ out: return r; } +static void cpuid_mask(u32 *word, int wordnum) +{ + *word &= boot_cpu_data.x86_capability[wordnum]; +} + static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, u32 index) { @@ -2306,7 +2311,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, break; case 1: entry->edx &= kvm_supported_word0_x86_features; + cpuid_mask(&entry->edx, 0); entry->ecx &= kvm_supported_word4_x86_features; + cpuid_mask(&entry->ecx, 4); /* we support x2apic emulation even if host does not support * it since we emulate x2apic in software */ entry->ecx |= F(X2APIC); @@ -2397,7 +2404,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, break; case 0x8001: entry->edx &= kvm_supported_word1_x86_features; + cpuid_mask(&entry->edx, 1); entry->ecx &= kvm_supported_word6_x86_features; + cpuid_mask(&entry->ecx, 6); break; } -- To unsubscribe from this list: send the line "unsubscribe kvm-commits" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] device-assignment: Register as un-migratable
From: Alex Williamson Use register_device_unmigratable() to declare ourselves as non-migratable. Signed-off-by: Alex Williamson Signed-off-by: Marcelo Tosatti diff --git a/hw/device-assignment.c b/hw/device-assignment.c index 5f5bde1..c2a7b27 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -1434,6 +1434,10 @@ static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev) dev->msix_table_page = NULL; } +static const VMStateDescription vmstate_assigned_device = { +.name = "pci-assign" +}; + static int assigned_initfn(struct PCIDevice *pci_dev) { AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev); @@ -1495,6 +1499,12 @@ static int assigned_initfn(struct PCIDevice *pci_dev) assigned_dev_load_option_rom(dev); QLIST_INSERT_HEAD(&devs, dev, next); + +/* Register a vmsd so that we can mark it unmigratable. */ +vmstate_register(&dev->dev.qdev, 0, &vmstate_assigned_device, dev); +register_device_unmigratable(&dev->dev.qdev, + vmstate_assigned_device.name, dev); + return 0; assigned_out: @@ -1508,6 +1518,7 @@ static int assigned_exitfn(struct PCIDevice *pci_dev) { AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev); +vmstate_unregister(&dev->dev.qdev, &vmstate_assigned_device, dev); QLIST_REMOVE(dev, next); deassign_device(dev); free_assigned_device(dev); -- To unsubscribe from this list: send the line "unsubscribe kvm-commits" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] make-release: fix mtime for a wider range of git versions
From: Bernhard Kohl With the latest git versions, e.g. 1.7.2.3, git still prints out the tag info in addition to the requested format. So let's simply fetch the first line from the output. In addition I use the --pretty option instead of --format which is not recognized in very old git versions, e.g. 1.5.5.6. Tested with git versions 1.5.5.6 and 1.7.2.3. Signed-off-by: Bernhard Kohl Signed-off-by: Marcelo Tosatti diff --git a/kvm/scripts/make-release b/kvm/scripts/make-release index 56302c3..2d050fc 100755 --- a/kvm/scripts/make-release +++ b/kvm/scripts/make-release @@ -51,7 +51,7 @@ cd "$(dirname "$0")"/../.. mkdir -p "$(dirname "$tarball")" git archive --prefix="$name/" --format=tar "$commit" > "$tarball" -mtime=`git show --format=%ct "$commit""^{commit}" --` +mtime=`git show --pretty=format:%ct "$commit""^{commit}" -- | head -n 1` tarargs="--owner=root --group=root" mkdir -p "$tmpdir/$name" -- To unsubscribe from this list: send the line "unsubscribe kvm-commits" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] device-assignment: register a reset function
From: Bernhard Kohl This is necessary because during reboot of a VM the assigned devices continue DMA transfers which causes memory corruption. Acked-by: Alex Williamson Acked-by: Jan Kiszka Signed-off-by: Thomas Ostler Signed-off-by: Bernhard Kohl Signed-off-by: Marcelo Tosatti diff --git a/hw/device-assignment.c b/hw/device-assignment.c index c2a7b27..369bff9 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -1438,6 +1438,17 @@ static const VMStateDescription vmstate_assigned_device = { .name = "pci-assign" }; +static void reset_assigned_device(DeviceState *dev) +{ +PCIDevice *d = DO_UPCAST(PCIDevice, qdev, dev); + +/* + * When a 0 is written to the command register, the device is logically + * disconnected from the PCI bus. This avoids further DMA transfers. + */ +assigned_dev_pci_write_config(d, PCI_COMMAND, 0, 2); +} + static int assigned_initfn(struct PCIDevice *pci_dev) { AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev); @@ -1555,6 +1566,7 @@ static PCIDeviceInfo assign_info = { .qdev.name= "pci-assign", .qdev.desc= "pass through host pci devices to the guest", .qdev.size= sizeof(AssignedDevice), +.qdev.reset = reset_assigned_device, .init = assigned_initfn, .exit = assigned_exitfn, .config_read = assigned_dev_pci_read_config, -- To unsubscribe from this list: send the line "unsubscribe kvm-commits" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html