On Sun, Jun 22, 2008 at 08:16:19AM +0300, Avi Kivity wrote: > Marcelo Tosatti wrote: >> On Sat, Jun 21, 2008 at 10:04:18AM +0300, Avi Kivity wrote: >> >>>> /* >>>> * Sync the rsp and rip registers into the vcpu structure. This allows >>>> * registers to be accessed by indexing vcpu->arch.regs. >>>> */ >>>> >>>> But I think it just refers to the interface in general, so that nobody >>>> would try to access RSP or RIP (and RAX in AMD's case) before calling >>>> ->cache_regs(). >>>> >>> It refers to the fact that sometimes you don't know which registers >>> you refer to, e.g. in the emulator. >>> >> >> How's this? >> >> > > Looks good, but we can aim higher. The cache_regs() API was always > confusing (I usually swap the two parts). If we replace all ->regs > access with accessors, we can make it completely transparent. > > It will be tricky in the emulator, but worthwhile, no?
OK, in the emulator an interface on top of guest_register_write() is needed to save registers so that the original contents can be restored on failure. Some brave soul can do it later, so I added a TODO in x86.c. Smells better now? --- dev/null 2008-06-24 14:36:42.383774904 -0300 +++ b/arch/x86/kvm/kvm_cache_regs.h 2008-06-24 15:26:02.000000000 -0300 @@ -0,0 +1,21 @@ +#ifndef ASM_KVM_CACHE_REGS_H +#define ASM_KVM_CACHE_REGS_H + +static inline unsigned long guest_register_read(struct kvm_vcpu *vcpu, + enum kvm_reg reg) +{ + if (!__test_and_set_bit(reg, &vcpu->arch.regs_available)) + kvm_x86_ops->cache_regs(vcpu, reg); + + return vcpu->arch.regs[reg]; +} + +static inline void guest_register_write(struct kvm_vcpu *vcpu, + enum kvm_reg reg, + unsigned long val) +{ + vcpu->arch.regs[reg] = val; + __set_bit(reg, &vcpu->arch.regs_dirty); +} + +#endif diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 73f43de..97919b6 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -32,6 +32,7 @@ #include <asm/current.h> #include <asm/apicdef.h> #include <asm/atomic.h> +#include "kvm_cache_regs.h" #include "irq.h" #define PRId64 "d" @@ -558,8 +559,7 @@ static void __report_tpr_access(struct kvm_lapic *apic, bool write) struct kvm_run *run = vcpu->run; set_bit(KVM_REQ_REPORT_TPR_ACCESS, &vcpu->requests); - kvm_x86_ops->cache_regs(vcpu); - run->tpr_access.rip = vcpu->arch.rip; + run->tpr_access.rip = guest_register_read(vcpu, VCPU_REGS_RIP); run->tpr_access.is_write = write; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 238e8f3..acd96f6 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -18,6 +18,7 @@ #include "kvm_svm.h" #include "irq.h" #include "mmu.h" +#include "kvm_cache_regs.h" #include <linux/module.h> #include <linux/kernel.h> @@ -241,7 +242,8 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) svm->vmcb->save.rip, svm->next_rip); - vcpu->arch.rip = svm->vmcb->save.rip = svm->next_rip; + svm->vmcb->save.rip = svm->next_rip; + guest_register_write(vcpu, VCPU_REGS_RIP, svm->vmcb->save.rip); svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; vcpu->arch.interrupt_window_open = 1; @@ -709,21 +711,42 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) rdtscll(vcpu->arch.host_tsc); } -static void svm_cache_regs(struct kvm_vcpu *vcpu) +static void svm_cache_regs(struct kvm_vcpu *vcpu, enum kvm_reg reg) { struct vcpu_svm *svm = to_svm(vcpu); - vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; - vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; - vcpu->arch.rip = svm->vmcb->save.rip; + switch (reg) { + case VCPU_REGS_RAX: + vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; + break; + case VCPU_REGS_RSP: + vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; + break; + case VCPU_REGS_RIP: + vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; + break; + default: + break; + } } -static void svm_decache_regs(struct kvm_vcpu *vcpu) +static void svm_decache_regs(struct kvm_vcpu *vcpu, enum kvm_reg reg) { struct vcpu_svm *svm = to_svm(vcpu); - svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; - svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; - svm->vmcb->save.rip = vcpu->arch.rip; + + switch (reg) { + case VCPU_REGS_RAX: + svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; + break; + case VCPU_REGS_RSP: + svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; + break; + case VCPU_REGS_RIP: + svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; + break; + default: + break; + } } static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6e4278d..240f16a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -26,6 +26,7 @@ #include <linux/highmem.h> #include <linux/sched.h> #include <linux/moduleparam.h> +#include "kvm_cache_regs.h" #include <asm/io.h> #include <asm/desc.h> @@ -707,9 +708,9 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) unsigned long rip; u32 interruptibility; - rip = vmcs_readl(GUEST_RIP); + rip = guest_register_read(vcpu, VCPU_REGS_RIP); rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - vmcs_writel(GUEST_RIP, rip); + guest_register_write(vcpu, VCPU_REGS_RIP, rip); /* * We emulated an instruction, so temporary interrupt blocking @@ -931,24 +932,32 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) return ret; } -/* - * Sync the rsp and rip registers into the vcpu structure. This allows - * registers to be accessed by indexing vcpu->arch.regs. - */ -static void vcpu_load_rsp_rip(struct kvm_vcpu *vcpu) +static void vmx_cache_regs(struct kvm_vcpu *vcpu, enum kvm_reg reg) { - vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP); - vcpu->arch.rip = vmcs_readl(GUEST_RIP); + switch (reg) { + case VCPU_REGS_RSP: + vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP); + break; + case VCPU_REGS_RIP: + vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP); + break; + default: + break; + } } -/* - * Syncs rsp and rip back into the vmcs. Should be called after possible - * modification. - */ -static void vcpu_put_rsp_rip(struct kvm_vcpu *vcpu) +static void vmx_decache_regs(struct kvm_vcpu *vcpu, enum kvm_reg reg) { - vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); - vmcs_writel(GUEST_RIP, vcpu->arch.rip); + switch (reg) { + case VCPU_REGS_RSP: + vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); + break; + case VCPU_REGS_RIP: + vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); + break; + default: + break; + } } static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) @@ -2370,22 +2379,18 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) (u32)((u64)vcpu->arch.regs[reg] >> 32), handler); switch (cr) { case 0: - vcpu_load_rsp_rip(vcpu); kvm_set_cr0(vcpu, vcpu->arch.regs[reg]); skip_emulated_instruction(vcpu); return 1; case 3: - vcpu_load_rsp_rip(vcpu); kvm_set_cr3(vcpu, vcpu->arch.regs[reg]); skip_emulated_instruction(vcpu); return 1; case 4: - vcpu_load_rsp_rip(vcpu); kvm_set_cr4(vcpu, vcpu->arch.regs[reg]); skip_emulated_instruction(vcpu); return 1; case 8: - vcpu_load_rsp_rip(vcpu); kvm_set_cr8(vcpu, vcpu->arch.regs[reg]); skip_emulated_instruction(vcpu); if (irqchip_in_kernel(vcpu->kvm)) @@ -2395,7 +2400,6 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) }; break; case 2: /* clts */ - vcpu_load_rsp_rip(vcpu); vmx_fpu_deactivate(vcpu); vcpu->arch.cr0 &= ~X86_CR0_TS; vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); @@ -2406,9 +2410,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) case 1: /*mov from cr*/ switch (cr) { case 3: - vcpu_load_rsp_rip(vcpu); vcpu->arch.regs[reg] = vcpu->arch.cr3; - vcpu_put_rsp_rip(vcpu); KVMTRACE_3D(CR_READ, vcpu, (u32)cr, (u32)vcpu->arch.regs[reg], (u32)((u64)vcpu->arch.regs[reg] >> 32), @@ -2416,9 +2418,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) skip_emulated_instruction(vcpu); return 1; case 8: - vcpu_load_rsp_rip(vcpu); vcpu->arch.regs[reg] = kvm_get_cr8(vcpu); - vcpu_put_rsp_rip(vcpu); KVMTRACE_2D(CR_READ, vcpu, (u32)cr, (u32)vcpu->arch.regs[reg], handler); skip_emulated_instruction(vcpu); @@ -2452,7 +2452,6 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) exit_qualification = vmcs_readl(EXIT_QUALIFICATION); dr = exit_qualification & 7; reg = (exit_qualification >> 8) & 15; - vcpu_load_rsp_rip(vcpu); if (exit_qualification & 16) { /* mov from dr */ switch (dr) { @@ -2465,12 +2464,11 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) default: val = 0; } - vcpu->arch.regs[reg] = val; + guest_register_write(vcpu, reg, val); KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler); } else { /* mov to dr */ } - vcpu_put_rsp_rip(vcpu); skip_emulated_instruction(vcpu); return 1; } @@ -3213,8 +3211,8 @@ static struct kvm_x86_ops vmx_x86_ops = { .set_idt = vmx_set_idt, .get_gdt = vmx_get_gdt, .set_gdt = vmx_set_gdt, - .cache_regs = vcpu_load_rsp_rip, - .decache_regs = vcpu_put_rsp_rip, + .cache_regs = vmx_cache_regs, + .decache_regs = vmx_decache_regs, .get_rflags = vmx_get_rflags, .set_rflags = vmx_set_rflags, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 26b051b..9495dd4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -19,6 +19,7 @@ #include "mmu.h" #include "i8254.h" #include "tss.h" +#include "kvm_cache_regs.h" #include <linux/clocksource.h> #include <linux/kvm.h> @@ -61,6 +62,7 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries); struct kvm_x86_ops *kvm_x86_ops; +EXPORT_SYMBOL(kvm_x86_ops); struct kvm_stats_debugfs_item debugfs_entries[] = { { "pf_fixed", VCPU_STAT(pf_fixed) }, @@ -1778,6 +1780,16 @@ static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, return dev; } +static void flush_regs(struct kvm_vcpu *vcpu) +{ + if (__test_and_clear_bit(VCPU_REGS_RSP, &vcpu->arch.regs_dirty)) + kvm_x86_ops->decache_regs(vcpu, VCPU_REGS_RSP); + if (__test_and_clear_bit(VCPU_REGS_RIP, &vcpu->arch.regs_dirty)) + kvm_x86_ops->decache_regs(vcpu, VCPU_REGS_RIP); + if (__test_and_clear_bit(VCPU_REGS_RAX, &vcpu->arch.regs_dirty)) + kvm_x86_ops->decache_regs(vcpu, VCPU_REGS_RAX); +} + int emulator_read_std(unsigned long addr, void *val, unsigned int bytes, @@ -2028,7 +2040,7 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) { u8 opcodes[4]; - unsigned long rip = vcpu->arch.rip; + unsigned long rip = guest_register_read(vcpu, VCPU_REGS_RIP); unsigned long rip_linear; if (!printk_ratelimit()) @@ -2050,6 +2062,23 @@ static struct x86_emulate_ops emulate_ops = { .cmpxchg_emulated = emulator_cmpxchg_emulated, }; +void cache_all_regs(struct kvm_vcpu *vcpu) +{ + kvm_x86_ops->cache_regs(vcpu, VCPU_REGS_RAX); + kvm_x86_ops->cache_regs(vcpu, VCPU_REGS_RSP); + kvm_x86_ops->cache_regs(vcpu, VCPU_REGS_RIP); +} + +void decache_all_regs(struct kvm_vcpu *vcpu) +{ + guest_register_write(vcpu, VCPU_REGS_RAX, + vcpu->arch.regs[VCPU_REGS_RAX]); + guest_register_write(vcpu, VCPU_REGS_RSP, + vcpu->arch.regs[VCPU_REGS_RSP]); + guest_register_write(vcpu, VCPU_REGS_RIP, + vcpu->arch.regs[VCPU_REGS_RIP]); +} + int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run, unsigned long cr2, @@ -2060,7 +2089,13 @@ int emulate_instruction(struct kvm_vcpu *vcpu, struct decode_cache *c; vcpu->arch.mmio_fault_cr2 = cr2; - kvm_x86_ops->cache_regs(vcpu); + /* + * TODO: fix x86_emulate.c to use guest_read/write_register + * instead of direct ->regs accesses, can save hundred cycles + * on Intel for instructions that don't read/change RSP, for + * for example. + */ + cache_all_regs(vcpu); vcpu->mmio_is_write = 0; vcpu->arch.pio.string = 0; @@ -2141,7 +2176,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, return EMULATE_DO_MMIO; } - kvm_x86_ops->decache_regs(vcpu); + decache_all_regs(vcpu); kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); if (vcpu->mmio_is_write) { @@ -2194,18 +2229,19 @@ int complete_pio(struct kvm_vcpu *vcpu) struct kvm_pio_request *io = &vcpu->arch.pio; long delta; int r; - - kvm_x86_ops->cache_regs(vcpu); + unsigned long val; if (!io->string) { - if (io->in) - memcpy(&vcpu->arch.regs[VCPU_REGS_RAX], vcpu->arch.pio_data, - io->size); + if (io->in) { + val = guest_register_read(vcpu, VCPU_REGS_RAX); + memcpy(&val, vcpu->arch.pio_data, io->size); + guest_register_write(vcpu, VCPU_REGS_RAX, val); + } } else { if (io->in) { r = pio_copy_data(vcpu); if (r) { - kvm_x86_ops->cache_regs(vcpu); + kvm_x86_ops->cache_regs(vcpu, VCPU_REGS_RAX); return r; } } @@ -2217,19 +2253,24 @@ int complete_pio(struct kvm_vcpu *vcpu) * The size of the register should really depend on * current address size. */ - vcpu->arch.regs[VCPU_REGS_RCX] -= delta; + val = guest_register_read(vcpu, VCPU_REGS_RCX); + val -= delta; + guest_register_write(vcpu, VCPU_REGS_RCX, val); } if (io->down) delta = -delta; delta *= io->size; - if (io->in) - vcpu->arch.regs[VCPU_REGS_RDI] += delta; - else - vcpu->arch.regs[VCPU_REGS_RSI] += delta; + if (io->in) { + val = guest_register_read(vcpu, VCPU_REGS_RDI); + val += delta; + guest_register_write(vcpu, VCPU_REGS_RDI, val); + } else { + val = guest_register_read(vcpu, VCPU_REGS_RSI); + val += delta; + guest_register_write(vcpu, VCPU_REGS_RSI, val); + } } - kvm_x86_ops->decache_regs(vcpu); - io->count -= io->cur_count; io->cur_count = 0; @@ -2282,6 +2323,7 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, int size, unsigned port) { struct kvm_io_device *pio_dev; + unsigned long val; vcpu->run->exit_reason = KVM_EXIT_IO; vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; @@ -2302,8 +2344,8 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size, handler); - kvm_x86_ops->cache_regs(vcpu); - memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4); + val = guest_register_read(vcpu, VCPU_REGS_RAX); + memcpy(vcpu->arch.pio_data, &val, 4); kvm_x86_ops->skip_emulated_instruction(vcpu); @@ -2488,13 +2530,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) unsigned long nr, a0, a1, a2, a3, ret; int r = 1; - kvm_x86_ops->cache_regs(vcpu); - - nr = vcpu->arch.regs[VCPU_REGS_RAX]; - a0 = vcpu->arch.regs[VCPU_REGS_RBX]; - a1 = vcpu->arch.regs[VCPU_REGS_RCX]; - a2 = vcpu->arch.regs[VCPU_REGS_RDX]; - a3 = vcpu->arch.regs[VCPU_REGS_RSI]; + nr = guest_register_read(vcpu, VCPU_REGS_RAX); + a0 = guest_register_read(vcpu, VCPU_REGS_RBX); + a1 = guest_register_read(vcpu, VCPU_REGS_RCX); + a2 = guest_register_read(vcpu, VCPU_REGS_RDX); + a3 = guest_register_read(vcpu, VCPU_REGS_RSI); KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler); @@ -2517,8 +2557,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) ret = -KVM_ENOSYS; break; } - vcpu->arch.regs[VCPU_REGS_RAX] = ret; - kvm_x86_ops->decache_regs(vcpu); + guest_register_write(vcpu, VCPU_REGS_RAX, ret); ++vcpu->stat.hypercalls; return r; } @@ -2528,6 +2567,7 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) { char instruction[3]; int ret = 0; + unsigned long rip = guest_register_read(vcpu, VCPU_REGS_RIP); /* @@ -2537,9 +2577,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) */ kvm_mmu_zap_all(vcpu->kvm); - kvm_x86_ops->cache_regs(vcpu); kvm_x86_ops->patch_hypercall(vcpu, instruction); - if (emulator_write_emulated(vcpu->arch.rip, instruction, 3, vcpu) + if (emulator_write_emulated(rip, instruction, 3, vcpu) != X86EMUL_CONTINUE) ret = -EFAULT; @@ -2669,13 +2708,12 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) u32 function, index; struct kvm_cpuid_entry2 *e, *best; - kvm_x86_ops->cache_regs(vcpu); - function = vcpu->arch.regs[VCPU_REGS_RAX]; - index = vcpu->arch.regs[VCPU_REGS_RCX]; - vcpu->arch.regs[VCPU_REGS_RAX] = 0; - vcpu->arch.regs[VCPU_REGS_RBX] = 0; - vcpu->arch.regs[VCPU_REGS_RCX] = 0; - vcpu->arch.regs[VCPU_REGS_RDX] = 0; + function = guest_register_read(vcpu, VCPU_REGS_RAX); + index = guest_register_read(vcpu, VCPU_REGS_RCX); + guest_register_write(vcpu, VCPU_REGS_RAX, 0); + guest_register_write(vcpu, VCPU_REGS_RBX, 0); + guest_register_write(vcpu, VCPU_REGS_RCX, 0); + guest_register_write(vcpu, VCPU_REGS_RDX, 0); best = NULL; for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { e = &vcpu->arch.cpuid_entries[i]; @@ -2693,12 +2731,11 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) best = e; } if (best) { - vcpu->arch.regs[VCPU_REGS_RAX] = best->eax; - vcpu->arch.regs[VCPU_REGS_RBX] = best->ebx; - vcpu->arch.regs[VCPU_REGS_RCX] = best->ecx; - vcpu->arch.regs[VCPU_REGS_RDX] = best->edx; + guest_register_write(vcpu, VCPU_REGS_RAX, best->eax); + guest_register_write(vcpu, VCPU_REGS_RBX, best->ebx); + guest_register_write(vcpu, VCPU_REGS_RCX, best->ecx); + guest_register_write(vcpu, VCPU_REGS_RDX, best->edx); } - kvm_x86_ops->decache_regs(vcpu); kvm_x86_ops->skip_emulated_instruction(vcpu); KVMTRACE_5D(CPUID, vcpu, function, (u32)vcpu->arch.regs[VCPU_REGS_RAX], @@ -2811,6 +2848,8 @@ again: } } + flush_regs(vcpu); + clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests); kvm_inject_pending_timer_irqs(vcpu); @@ -2865,6 +2904,8 @@ again: local_irq_enable(); ++vcpu->stat.exits; + vcpu->arch.regs_available = KVM_CACHED_REGS; + vcpu->arch.regs_dirty = 0; /* * We must have an instruction between local_irq_enable() and @@ -2884,8 +2925,8 @@ again: * Profile KVM exit RIPs: */ if (unlikely(prof_on == KVM_PROFILING)) { - kvm_x86_ops->cache_regs(vcpu); - profile_hit(KVM_PROFILING, (void *)vcpu->arch.rip); + unsigned long rip = guest_register_read(vcpu, VCPU_REGS_RIP); + profile_hit(KVM_PROFILING, (void *)rip); } if (vcpu->arch.exception.pending && kvm_x86_ops->exception_injected(vcpu)) @@ -2968,11 +3009,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } } #endif - if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) { - kvm_x86_ops->cache_regs(vcpu); - vcpu->arch.regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret; - kvm_x86_ops->decache_regs(vcpu); - } + if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) + guest_register_write(vcpu, VCPU_REGS_RAX, + kvm_run->hypercall.ret); r = __vcpu_run(vcpu, kvm_run); @@ -2988,28 +3027,26 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { vcpu_load(vcpu); - kvm_x86_ops->cache_regs(vcpu); - - regs->rax = vcpu->arch.regs[VCPU_REGS_RAX]; - regs->rbx = vcpu->arch.regs[VCPU_REGS_RBX]; - regs->rcx = vcpu->arch.regs[VCPU_REGS_RCX]; - regs->rdx = vcpu->arch.regs[VCPU_REGS_RDX]; - regs->rsi = vcpu->arch.regs[VCPU_REGS_RSI]; - regs->rdi = vcpu->arch.regs[VCPU_REGS_RDI]; - regs->rsp = vcpu->arch.regs[VCPU_REGS_RSP]; - regs->rbp = vcpu->arch.regs[VCPU_REGS_RBP]; + regs->rax = guest_register_read(vcpu, VCPU_REGS_RAX); + regs->rbx = guest_register_read(vcpu, VCPU_REGS_RBX); + regs->rcx = guest_register_read(vcpu, VCPU_REGS_RCX); + regs->rdx = guest_register_read(vcpu, VCPU_REGS_RDX); + regs->rsi = guest_register_read(vcpu, VCPU_REGS_RSI); + regs->rdi = guest_register_read(vcpu, VCPU_REGS_RDI); + regs->rsp = guest_register_read(vcpu, VCPU_REGS_RSP); + regs->rbp = guest_register_read(vcpu, VCPU_REGS_RBP); #ifdef CONFIG_X86_64 - regs->r8 = vcpu->arch.regs[VCPU_REGS_R8]; - regs->r9 = vcpu->arch.regs[VCPU_REGS_R9]; - regs->r10 = vcpu->arch.regs[VCPU_REGS_R10]; - regs->r11 = vcpu->arch.regs[VCPU_REGS_R11]; - regs->r12 = vcpu->arch.regs[VCPU_REGS_R12]; - regs->r13 = vcpu->arch.regs[VCPU_REGS_R13]; - regs->r14 = vcpu->arch.regs[VCPU_REGS_R14]; - regs->r15 = vcpu->arch.regs[VCPU_REGS_R15]; + regs->r8 = guest_register_read(vcpu, VCPU_REGS_R8); + regs->r9 = guest_register_read(vcpu, VCPU_REGS_R9); + regs->r10 = guest_register_read(vcpu, VCPU_REGS_R10); + regs->r11 = guest_register_read(vcpu, VCPU_REGS_R11); + regs->r12 = guest_register_read(vcpu, VCPU_REGS_R12); + regs->r13 = guest_register_read(vcpu, VCPU_REGS_R13); + regs->r14 = guest_register_read(vcpu, VCPU_REGS_R14); + regs->r15 = guest_register_read(vcpu, VCPU_REGS_R15); #endif - regs->rip = vcpu->arch.rip; + regs->rip = guest_register_read(vcpu, VCPU_REGS_RIP); regs->rflags = kvm_x86_ops->get_rflags(vcpu); /* @@ -3027,29 +3064,29 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { vcpu_load(vcpu); - vcpu->arch.regs[VCPU_REGS_RAX] = regs->rax; - vcpu->arch.regs[VCPU_REGS_RBX] = regs->rbx; - vcpu->arch.regs[VCPU_REGS_RCX] = regs->rcx; - vcpu->arch.regs[VCPU_REGS_RDX] = regs->rdx; - vcpu->arch.regs[VCPU_REGS_RSI] = regs->rsi; - vcpu->arch.regs[VCPU_REGS_RDI] = regs->rdi; - vcpu->arch.regs[VCPU_REGS_RSP] = regs->rsp; - vcpu->arch.regs[VCPU_REGS_RBP] = regs->rbp; + guest_register_write(vcpu, VCPU_REGS_RAX, regs->rax); + guest_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); + guest_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); + guest_register_write(vcpu, VCPU_REGS_RDX, regs->rdx); + guest_register_write(vcpu, VCPU_REGS_RSI, regs->rsi); + guest_register_write(vcpu, VCPU_REGS_RDI, regs->rdi); + guest_register_write(vcpu, VCPU_REGS_RSP, regs->rsp); + guest_register_write(vcpu, VCPU_REGS_RBP, regs->rbp); #ifdef CONFIG_X86_64 - vcpu->arch.regs[VCPU_REGS_R8] = regs->r8; - vcpu->arch.regs[VCPU_REGS_R9] = regs->r9; - vcpu->arch.regs[VCPU_REGS_R10] = regs->r10; - vcpu->arch.regs[VCPU_REGS_R11] = regs->r11; - vcpu->arch.regs[VCPU_REGS_R12] = regs->r12; - vcpu->arch.regs[VCPU_REGS_R13] = regs->r13; - vcpu->arch.regs[VCPU_REGS_R14] = regs->r14; - vcpu->arch.regs[VCPU_REGS_R15] = regs->r15; + guest_register_write(vcpu, VCPU_REGS_R8, regs->r8); + guest_register_write(vcpu, VCPU_REGS_R9, regs->r9); + guest_register_write(vcpu, VCPU_REGS_R10, regs->r10); + guest_register_write(vcpu, VCPU_REGS_R11, regs->r11); + guest_register_write(vcpu, VCPU_REGS_R12, regs->r12); + guest_register_write(vcpu, VCPU_REGS_R13, regs->r13); + guest_register_write(vcpu, VCPU_REGS_R14, regs->r14); + guest_register_write(vcpu, VCPU_REGS_R15, regs->r15); + #endif - vcpu->arch.rip = regs->rip; + guest_register_write(vcpu, VCPU_REGS_RIP, regs->rip); kvm_x86_ops->set_rflags(vcpu, regs->rflags); - kvm_x86_ops->decache_regs(vcpu); vcpu->arch.exception.pending = false; @@ -3323,17 +3360,17 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu, struct tss_segment_32 *tss) { tss->cr3 = vcpu->arch.cr3; - tss->eip = vcpu->arch.rip; + tss->eip = guest_register_read(vcpu, VCPU_REGS_RIP); tss->eflags = kvm_x86_ops->get_rflags(vcpu); - tss->eax = vcpu->arch.regs[VCPU_REGS_RAX]; - tss->ecx = vcpu->arch.regs[VCPU_REGS_RCX]; - tss->edx = vcpu->arch.regs[VCPU_REGS_RDX]; - tss->ebx = vcpu->arch.regs[VCPU_REGS_RBX]; - tss->esp = vcpu->arch.regs[VCPU_REGS_RSP]; - tss->ebp = vcpu->arch.regs[VCPU_REGS_RBP]; - tss->esi = vcpu->arch.regs[VCPU_REGS_RSI]; - tss->edi = vcpu->arch.regs[VCPU_REGS_RDI]; - + tss->eax = guest_register_read(vcpu, VCPU_REGS_RAX); + tss->eax = guest_register_read(vcpu, VCPU_REGS_RAX); + tss->ecx = guest_register_read(vcpu, VCPU_REGS_RCX); + tss->edx = guest_register_read(vcpu, VCPU_REGS_RDX); + tss->ebx = guest_register_read(vcpu, VCPU_REGS_RBX); + tss->esp = guest_register_read(vcpu, VCPU_REGS_RSP); + tss->ebp = guest_register_read(vcpu, VCPU_REGS_RBP); + tss->esi = guest_register_read(vcpu, VCPU_REGS_RSI); + tss->edi = guest_register_read(vcpu, VCPU_REGS_RDI); tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); @@ -3349,17 +3386,17 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu, { kvm_set_cr3(vcpu, tss->cr3); - vcpu->arch.rip = tss->eip; + guest_register_write(vcpu, VCPU_REGS_RIP, tss->eip); kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2); - vcpu->arch.regs[VCPU_REGS_RAX] = tss->eax; - vcpu->arch.regs[VCPU_REGS_RCX] = tss->ecx; - vcpu->arch.regs[VCPU_REGS_RDX] = tss->edx; - vcpu->arch.regs[VCPU_REGS_RBX] = tss->ebx; - vcpu->arch.regs[VCPU_REGS_RSP] = tss->esp; - vcpu->arch.regs[VCPU_REGS_RBP] = tss->ebp; - vcpu->arch.regs[VCPU_REGS_RSI] = tss->esi; - vcpu->arch.regs[VCPU_REGS_RDI] = tss->edi; + guest_register_write(vcpu, VCPU_REGS_RAX, tss->eax); + guest_register_write(vcpu, VCPU_REGS_RCX, tss->ecx); + guest_register_write(vcpu, VCPU_REGS_RDX, tss->edx); + guest_register_write(vcpu, VCPU_REGS_RBX, tss->ebx); + guest_register_write(vcpu, VCPU_REGS_RSP, tss->esp); + guest_register_write(vcpu, VCPU_REGS_RBP, tss->ebp); + guest_register_write(vcpu, VCPU_REGS_RSI, tss->esi); + guest_register_write(vcpu, VCPU_REGS_RDI, tss->edi); if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR)) return 1; @@ -3387,16 +3424,16 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu, static void save_state_to_tss16(struct kvm_vcpu *vcpu, struct tss_segment_16 *tss) { - tss->ip = vcpu->arch.rip; + tss->ip = guest_register_read(vcpu, VCPU_REGS_RIP); tss->flag = kvm_x86_ops->get_rflags(vcpu); - tss->ax = vcpu->arch.regs[VCPU_REGS_RAX]; - tss->cx = vcpu->arch.regs[VCPU_REGS_RCX]; - tss->dx = vcpu->arch.regs[VCPU_REGS_RDX]; - tss->bx = vcpu->arch.regs[VCPU_REGS_RBX]; - tss->sp = vcpu->arch.regs[VCPU_REGS_RSP]; - tss->bp = vcpu->arch.regs[VCPU_REGS_RBP]; - tss->si = vcpu->arch.regs[VCPU_REGS_RSI]; - tss->di = vcpu->arch.regs[VCPU_REGS_RDI]; + tss->ax = guest_register_read(vcpu, VCPU_REGS_RAX); + tss->cx = guest_register_read(vcpu, VCPU_REGS_RCX); + tss->dx = guest_register_read(vcpu, VCPU_REGS_RDX); + tss->bx = guest_register_read(vcpu, VCPU_REGS_RBX); + tss->sp = guest_register_read(vcpu, VCPU_REGS_RSP); + tss->bp = guest_register_read(vcpu, VCPU_REGS_RBP); + tss->si = guest_register_read(vcpu, VCPU_REGS_RSI); + tss->di = guest_register_read(vcpu, VCPU_REGS_RDI); tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); @@ -3409,16 +3446,16 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu, static int load_state_from_tss16(struct kvm_vcpu *vcpu, struct tss_segment_16 *tss) { - vcpu->arch.rip = tss->ip; + guest_register_write(vcpu, VCPU_REGS_RIP, tss->ip); kvm_x86_ops->set_rflags(vcpu, tss->flag | 2); - vcpu->arch.regs[VCPU_REGS_RAX] = tss->ax; - vcpu->arch.regs[VCPU_REGS_RCX] = tss->cx; - vcpu->arch.regs[VCPU_REGS_RDX] = tss->dx; - vcpu->arch.regs[VCPU_REGS_RBX] = tss->bx; - vcpu->arch.regs[VCPU_REGS_RSP] = tss->sp; - vcpu->arch.regs[VCPU_REGS_RBP] = tss->bp; - vcpu->arch.regs[VCPU_REGS_RSI] = tss->si; - vcpu->arch.regs[VCPU_REGS_RDI] = tss->di; + guest_register_write(vcpu, VCPU_REGS_RAX, tss->ax); + guest_register_write(vcpu, VCPU_REGS_RCX, tss->cx); + guest_register_write(vcpu, VCPU_REGS_RDX, tss->dx); + guest_register_write(vcpu, VCPU_REGS_RBX, tss->bx); + guest_register_write(vcpu, VCPU_REGS_RSP, tss->sp); + guest_register_write(vcpu, VCPU_REGS_RBP, tss->bp); + guest_register_write(vcpu, VCPU_REGS_RSI, tss->si); + guest_register_write(vcpu, VCPU_REGS_RDI, tss->di); if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR)) return 1; @@ -3526,7 +3563,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) } kvm_x86_ops->skip_emulated_instruction(vcpu); - kvm_x86_ops->cache_regs(vcpu); if (nseg_desc.type & 8) ret = kvm_task_switch_32(vcpu, tss_selector, &cseg_desc, @@ -3551,7 +3587,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) tr_seg.type = 11; kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); out: - kvm_x86_ops->decache_regs(vcpu); return ret; } EXPORT_SYMBOL_GPL(kvm_task_switch); diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 38926b7..c74b9d9 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -26,6 +26,7 @@ #define DPRINTF(_f, _a ...) printf(_f , ## _a) #else #include <linux/kvm_host.h> +#include "kvm_cache_regs.h" #define DPRINTF(x...) do {} while (0) #endif #include <linux/module.h> @@ -806,7 +807,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) /* Shadow copy of register state. Committed on successful emulation. */ memset(c, 0, sizeof(struct decode_cache)); - c->eip = ctxt->vcpu->arch.rip; + c->eip = guest_register_read(ctxt->vcpu, VCPU_REGS_RIP); memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); switch (mode) { @@ -1245,7 +1246,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) if (c->rep_prefix && (c->d & String)) { /* All REP prefixes have the same first termination condition */ if (c->regs[VCPU_REGS_RCX] == 0) { - ctxt->vcpu->arch.rip = c->eip; + guest_register_write(ctxt->vcpu, VCPU_REGS_RIP, c->eip); goto done; } /* The second termination condition only applies for REPE @@ -1259,17 +1260,20 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) (c->b == 0xae) || (c->b == 0xaf)) { if ((c->rep_prefix == REPE_PREFIX) && ((ctxt->eflags & EFLG_ZF) == 0)) { - ctxt->vcpu->arch.rip = c->eip; + guest_register_write(ctxt->vcpu, + VCPU_REGS_RIP, + c->eip); goto done; } if ((c->rep_prefix == REPNE_PREFIX) && ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) { - ctxt->vcpu->arch.rip = c->eip; + guest_register_write(ctxt->vcpu, VCPU_REGS_RIP, + c->eip); goto done; } } c->regs[VCPU_REGS_RCX]--; - c->eip = ctxt->vcpu->arch.rip; + c->eip = guest_register_read(ctxt->vcpu, VCPU_REGS_RIP); } if (c->src.type == OP_MEM) { @@ -1750,7 +1754,7 @@ writeback: /* Commit shadow register state. */ memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); - ctxt->vcpu->arch.rip = c->eip; + guest_register_write(ctxt->vcpu, VCPU_REGS_RIP, c->eip); done: if (rc == X86EMUL_UNHANDLEABLE) { @@ -1775,7 +1779,7 @@ twobyte_insn: goto done; /* Let the processor re-execute the fixed hypercall */ - c->eip = ctxt->vcpu->arch.rip; + c->eip = guest_register_read(ctxt->vcpu, VCPU_REGS_RIP); /* Disable writeback. */ c->dst.type = OP_NONE; break; @@ -1871,7 +1875,7 @@ twobyte_insn: rc = kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data); if (rc) { kvm_inject_gp(ctxt->vcpu, 0); - c->eip = ctxt->vcpu->arch.rip; + c->eip = guest_register_read(ctxt->vcpu, VCPU_REGS_RIP); } rc = X86EMUL_CONTINUE; c->dst.type = OP_NONE; @@ -1881,7 +1885,7 @@ twobyte_insn: rc = kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data); if (rc) { kvm_inject_gp(ctxt->vcpu, 0); - c->eip = ctxt->vcpu->arch.rip; + c->eip = guest_register_read(ctxt->vcpu, VCPU_REGS_RIP); } else { c->regs[VCPU_REGS_RAX] = (u32)msr_data; c->regs[VCPU_REGS_RDX] = msr_data >> 32; diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 851184d..cc5c94b 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -87,7 +87,7 @@ extern struct list_head vm_list; struct kvm_vcpu; struct kvm; -enum { +enum kvm_reg { VCPU_REGS_RAX = 0, VCPU_REGS_RCX = 1, VCPU_REGS_RDX = 2, @@ -106,9 +106,21 @@ enum { VCPU_REGS_R14 = 14, VCPU_REGS_R15 = 15, #endif + VCPU_REGS_RIP = 16, NR_VCPU_REGS }; +/* + * List of registers already read by kvm_x86_ops->run(). + */ +#define KVM_CACHED_REGS ((1 << VCPU_REGS_RCX) | (1 << VCPU_REGS_RDX) | \ + (1 << VCPU_REGS_RBX) | (1 << VCPU_REGS_RBP) | \ + (1 << VCPU_REGS_RSI) | (1 << VCPU_REGS_RDI) | \ + (1 << VCPU_REGS_R8) | (1 << VCPU_REGS_R9) | \ + (1 << VCPU_REGS_R10) | (1 << VCPU_REGS_R11) | \ + (1 << VCPU_REGS_R12) | (1 << VCPU_REGS_R13) | \ + (1 << VCPU_REGS_R14) | (1 << VCPU_REGS_R15)) + enum { VCPU_SREG_ES, VCPU_SREG_CS, @@ -217,8 +229,9 @@ struct kvm_vcpu_arch { int interrupt_window_open; unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS); - unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */ - unsigned long rip; /* needs vcpu_load_rsp_rip() */ + unsigned long regs[NR_VCPU_REGS]; /* needs cache_regs() */ + u32 regs_available; + u32 regs_dirty; unsigned long cr0; unsigned long cr2; @@ -410,8 +423,8 @@ struct kvm_x86_ops { unsigned long (*get_dr)(struct kvm_vcpu *vcpu, int dr); void (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value, int *exception); - void (*cache_regs)(struct kvm_vcpu *vcpu); - void (*decache_regs)(struct kvm_vcpu *vcpu); + void (*cache_regs)(struct kvm_vcpu *vcpu, enum kvm_reg reg); + void (*decache_regs)(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html