On Thu, 24 Apr 2008 11:05:39 -0500 Anthony Liguori <[EMAIL PROTECTED]> wrote:
> The first stage is to detect vmentry failures and run x86_emulate() for > a single instruction. If you look at the mailing list, you'll see > patches from myself and Guillaume. This should be enough to allow most > Ubuntu installer CDs to work under KVM. Howdy, Here is the last patch I have. It can detects a vmentry failure and it emulates one instruction. I added the emulation of several instructions like "ljmp", "mov Sreg, reg", "mov reg, Sreg"... The problem I'm working on is that once I entered in emulation of real mode I do not manage to recover a VMX friendly state (in my case cs.rpl == ss.rpl). So I emulate more and more instructions. I added a trace to see instructions that are emulated (emulation of 0xa8 is in progress so it currently fails): [60108.040894] emulation at (46e53) rip 6e13: ea 18 6e 18 [60108.072108] emulation at (46e58) rip 6e18: 66 b8 20 00 [60108.103997] emulation at (46e5c) rip 6e1c: 8e d8 8c d0 [60108.148114] emulation at (46e5e) rip 6e1e: 8c d0 81 e4 [60108.180117] emulation at (46e60) rip 6e20: 81 e4 ff ff [60108.212008] emulation at (46e66) rip 6e26: c1 e0 04 01 [60108.244926] emulation at (46e69) rip 6e29: 01 c4 66 b8 [60108.272948] emulation at (46e6b) rip 6e2b: 66 b8 08 00 [60108.304953] emulation at (46e6f) rip 6e2f: 8e d0 8e c0 [60108.348973] emulation at (46e71) rip 6e31: 8e c0 8e e0 [60108.396965] emulation at (46e73) rip 6e33: 8e e0 8e e8 [60108.445002] emulation at (46e75) rip 6e35: 8e e8 58 66 [60108.489021] emulation at (46e77) rip 6e37: 58 66 9d 66 [60108.521028] emulation at (46e78) rip 6e38: 66 9d 66 c3 [60108.552979] emulation at (46e7a) rip 6e3a: 66 c3 66 9c [60108.581048] emulation at (40e2a) rip dea: be 29 0a 00 [60108.613033] emulation at (40e2f) rip def: e8 41 12 00 [60108.644970] emulation at (42075) rip 2035: c6 05 84 07 [60108.673038] emulation at (4207c) rip 203c: e8 18 01 00 [60108.705039] emulation at (42199) rip 2159: 31 c0 80 3d [60108.736998] emulation at (4219b) rip 215b: 80 3d 86 07 [60108.765041] emulation at (421a2) rip 2162: 74 01 26 ac [60108.797044] emulation at (421a5) rip 2165: ac c3 80 3d [60108.829033] emulation at (421a6) rip 2166: c3 80 3d 86 [60108.857068] emulation at (42081) rip 2041: 09 c0 0f 84 [60108.889053] emulation at (42083) rip 2043: 0f 84 0f 01 [60108.921054] emulation at (42198) rip 2158: c3 31 c0 80 [60108.949076] emulation at (40e34) rip df4: 26 66 ff 35 [60108.981077] emulation at (40e3c) rip dfc: 66 8f 05 d0 [60109.013011] emulation at (40e43) rip e03: a1 b4 00 00 [60109.041079] emulation at (40e48) rip e08: 26 8a 40 03 [60109.073039] emulation at (40e4c) rip e0c: a8 01 74 4c [60109.101078] emulation failed (vmentry failure) rip e0c a8 01 74 4c So as we can see the first emulated instruction is the ljump and after we emulate gfxboot loader instruction. I suspect a problem with an update of SS segment or something like that in instructions that I emulate. I paste the patch. Don't worry about the last modification of the two header files it's not related to real mode emulation. Regards, Guillaume --- diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8e5d664..2c4c14d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1183,7 +1183,9 @@ static void enter_pmode(struct kvm_vcpu *vcpu) fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs); fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->arch.rmode.fs); +#if 0 vmcs_write16(GUEST_SS_SELECTOR, 0); +#endif vmcs_write32(GUEST_SS_AR_BYTES, 0x93); vmcs_write16(GUEST_CS_SELECTOR, @@ -2323,6 +2325,53 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return kvm_task_switch(vcpu, tss_selector, reason); } +static int handle_vmentry_failure(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run, u32 failure_reason) +{ + u16 ss, cs; + u8 opcodes[4]; + unsigned long rip = vcpu->arch.rip; + unsigned long rip_linear; + + ss = vmcs_read16(GUEST_SS_SELECTOR); + cs = vmcs_read16(GUEST_CS_SELECTOR); + + if ((ss & 0x03) != (cs & 0x03)) { + int err; + +#if 0 + printk(KERN_INFO "vmentry failure because ss.cpl != cs.cpl\n"); +#endif + rip_linear = rip + vmx_get_segment_base(vcpu, VCPU_SREG_CS); + emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu); + printk(KERN_INFO "emulation at (%lx) rip %lx: %02x %02x %02x %02x\n", + rip_linear, + rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); + err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); + switch (err) { + case EMULATE_DONE: +#if 0 + printk(KERN_INFO "successfully emulated instruction\n"); +#endif + return 1; + case EMULATE_DO_MMIO: + printk(KERN_INFO "mmio?\n"); + return 0; +#if 0 + case EMULATE_FAIL: +#endif + default: + kvm_report_emulation_failure(vcpu, "vmentry failure"); + break; + } + } + + kvm_run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason = failure_reason; + + return 0; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -2375,6 +2424,12 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) exit_reason != EXIT_REASON_EXCEPTION_NMI) printk(KERN_WARNING "%s: unexpected, valid vectoring info and " "exit reason is 0x%x\n", __func__, exit_reason); + + if ((exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) { + exit_reason &= ~VMX_EXIT_REASONS_FAILED_VMENTRY; + return handle_vmentry_failure(vcpu, kvm_run, exit_reason); + } + if (exit_reason < kvm_vmx_max_exit_handlers && kvm_vmx_exit_handlers[exit_reason]) return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0ce5563..f394efd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3267,8 +3267,8 @@ static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu, return 0; } -static int load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, - int type_bits, int seg) +int load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, + int type_bits, int seg) { struct kvm_segment kvm_seg; diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 2ca0838..287811c 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -138,7 +138,8 @@ static u16 opcode_table[256] = { /* 0x88 - 0x8F */ ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, - 0, ModRM | DstReg, 0, Group | Group1A, + DstMem | SrcReg | ModRM | Mov, ModRM | DstReg, DstReg |SrcMem |ModRM | Mov, + Group | Group1A, /* 0x90 - 0x9F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, @@ -148,11 +149,16 @@ static u16 opcode_table[256] = { ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, ByteOp | ImplicitOps | String, ImplicitOps | String, /* 0xA8 - 0xAF */ - 0, 0, ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, + 0, 0, ByteOp | ImplicitOps | Mov | String, + ImplicitOps | Mov | String, ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, ByteOp | ImplicitOps | String, ImplicitOps | String, - /* 0xB0 - 0xBF */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xB0 - 0xB7 */ + Mov | SrcImmByte, Mov | SrcImmByte, Mov | SrcImmByte, Mov | SrcImmByte, + Mov | SrcImmByte, Mov | SrcImmByte, Mov | SrcImmByte, Mov | SrcImmByte, + /* 0xB8 - 0xBF */ + Mov | SrcImm, Mov | SrcImm, Mov | SrcImm, Mov | SrcImm, + Mov | SrcImm, Mov | SrcImm, Mov | SrcImm, Mov | SrcImm, /* 0xC0 - 0xC7 */ ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM, 0, ImplicitOps | Stack, 0, 0, @@ -168,7 +174,7 @@ static u16 opcode_table[256] = { /* 0xE0 - 0xE7 */ 0, 0, 0, 0, 0, 0, 0, 0, /* 0xE8 - 0xEF */ - ImplicitOps | Stack, SrcImm|ImplicitOps, 0, SrcImmByte|ImplicitOps, + ImplicitOps | Stack, SrcImm|ImplicitOps, ImplicitOps, SrcImmByte|ImplicitOps, 0, 0, 0, 0, /* 0xF0 - 0xF7 */ 0, 0, 0, 0, @@ -215,7 +221,7 @@ static u16 twobyte_table[256] = { /* 0xA0 - 0xA7 */ 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0, /* 0xA8 - 0xAF */ - 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0, + ImplicitOps, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0, /* 0xB0 - 0xB7 */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0, DstMem | SrcReg | ModRM | BitOp, @@ -680,7 +686,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, c->modrm_val = *(unsigned long *) decode_register(c->modrm_rm, c->regs, c->d & ByteOp); return rc; - } + } if (c->ad_bytes == 2) { unsigned bx = c->regs[VCPU_REGS_RBX]; @@ -1483,6 +1489,9 @@ special_insn: goto cmp; } break; + case 0xa8: /* test imm,%%eax */ + c->dst.ptr = &c->regs[VCPU_REGS_RAX]; + c->dst.val = c->regs[VCPU_REGS_RAX]; case 0x84 ... 0x85: emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); break; @@ -1511,9 +1520,95 @@ special_insn: break; case 0x88 ... 0x8b: /* mov */ goto mov; + case 0x8c: { /* mov Sreg, r/m */ + struct kvm_save_segment *segreg; + + if (c->modrm_mod == 0x3) + c->src.val = c->modrm_val; + + switch ( c->modrm_reg ) { + case 0: + segreg = &ctxt->vcpu->arch.rmode.es; + break; + case 1: + segreg = &ctxt->vcpu->arch.rmode.cs; + break; + case 2: + segreg = &ctxt->vcpu->arch.rmode.ss; + break; + case 3: + segreg = &ctxt->vcpu->arch.rmode.ds; + break; + case 4: + segreg = &ctxt->vcpu->arch.rmode.fs; + break; + case 5: + segreg = &ctxt->vcpu->arch.rmode.gs; + break; + default: + printk(KERN_INFO "Invalid segreg in modrm byte 0x%02x\n", + c->modrm); + goto cannot_emulate; + } + c->dst.val = segreg->selector; + if (c->dst.type == OP_MEM) + c->dst.bytes = 2; + + break; + } case 0x8d: /* lea r16/r32, m */ c->dst.val = c->modrm_ea; break; + case 0x8e: { /* mov seg, r/m16 */ + struct kvm_save_segment *segreg; + int ar_type; + + if (c->modrm_mod == 0x3) + c->src.val = c->modrm_val; + + switch ( c->modrm_reg ) { + case 0: + segreg = &ctxt->vcpu->arch.rmode.es; + ar_type = 0x3; + printk(KERN_INFO "es_base = %lx\n", c->src.val * 0x10); + break; + case 1: + segreg = &ctxt->vcpu->arch.rmode.cs; + ar_type = 0xb; + printk(KERN_INFO "cs_base = %lx\n", c->src.val * 0x10); + break; + case 2: + segreg = &ctxt->vcpu->arch.rmode.ss; + ar_type = 0x3; + printk(KERN_INFO "ss_base = %lx\n", c->src.val * 0x10); + break; + case 3: + segreg = &ctxt->vcpu->arch.rmode.ds; + ar_type = 0x3; + printk(KERN_INFO "ds_base = %lx\n", c->src.val * 0x10); + break; + case 4: + segreg = &ctxt->vcpu->arch.rmode.fs; + ar_type = 0x3; + printk(KERN_INFO "fs_base = %lx\n", c->src.val * 0x10); + break; + case 5: + segreg = &ctxt->vcpu->arch.rmode.gs; + ar_type = 0x3; + printk(KERN_INFO "gs_base = %lx\n", c->src.val * 0x10); + break; + default: + printk(KERN_INFO "Invalid segreg in modrm byte 0x%02x\n", c->modrm); + goto cannot_emulate; + } + segreg->selector = c->src.val; + segreg->base = (u64) c->src.val << 4; + segreg->limit = 0xffff; + segreg->ar = ar_type; + + c->dst.type = OP_NONE; /* Disable writeback. */ + break; + } case 0x8f: /* pop (sole member of Grp1a) */ rc = emulate_grp1a(ctxt, ops); if (rc != 0) @@ -1619,6 +1714,23 @@ special_insn: case 0xae ... 0xaf: /* scas */ DPRINTF("Urk! I don't handle SCAS.\n"); goto cannot_emulate; + case 0xb0 ... 0xb3: /* mov rl, imm8 */ + c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX + (c->b & 0x3)]; + c->dst.val = c->src.val; + c->dst.type = OP_REG; + c->dst.bytes = 1; + break; + case 0xb4 ... 0xb7: /* mov rh, imm8 */ + c->dst.ptr = ((void *)&c->regs[VCPU_REGS_RAX + (c->b & 0x3)] + 1); + c->dst.val = c->src.val; + c->dst.type = OP_REG; + c->dst.bytes = 1; + break; + case 0xb8 ... 0xbf: /* mov r, imm */ + c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX + (c->b & 0x7)]; + c->dst.val = c->src.val; + c->dst.type = OP_REG; + break; case 0xc0 ... 0xc1: emulate_grp2(ctxt); break; @@ -1661,6 +1773,31 @@ special_insn: jmp_rel(c, c->src.val); c->dst.type = OP_NONE; /* Disable writeback. */ break; + case 0xea: /* jmp (far, absolute) */ { + uint32_t eip; + uint16_t sel; + + switch (c->op_bytes) { + case 2: + eip = insn_fetch(u16, 2, c->eip); + eip = eip & 0x0000FFFF; /* clear upper 16 bits */ + break; + case 4: + eip = insn_fetch(u32, 4, c->eip); + break; + default: + DPRINTF("Jmp far: Invalid op_bytes\n"); + goto cannot_emulate; + } + sel = insn_fetch(u16, 2, c->eip); + if (load_segment_descriptor(ctxt->vcpu, sel, 9, VCPU_SREG_CS) < 0) { + DPRINTF("Jmp far: Cannot load CS segment descriptor\n"); + goto cannot_emulate; + } + + c->eip = eip; + break; + } case 0xf4: /* hlt */ ctxt->vcpu->arch.halt_request = 1; goto done; diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 9d963cd..24c8bf9 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -271,7 +271,7 @@ struct kvm_vcpu_arch { unsigned long base; u32 limit; u32 ar; - } tr, es, ds, fs, gs; + } tr, cs, es, ds, fs, gs, ss; } rmode; int halt_request; /* real mode on Intel only */ @@ -488,6 +488,8 @@ int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value); +int load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, + int type_bits, int seg); int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason); void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); diff --git a/include/asm-x86/posix_types.h b/include/asm-x86/posix_types.h index fe312a5..0c8329e 100644 --- a/include/asm-x86/posix_types.h +++ b/include/asm-x86/posix_types.h @@ -1,7 +1,13 @@ #ifdef __KERNEL__ -# if defined(CONFIG_X86_32) || defined(__i386__) +# ifdef CONFIG_X86_32 # include "posix_types_32.h" # else # include "posix_types_64.h" -# endif +# endif +#else +# ifdef __i386__ +# include "posix_types_32.h" +# else +# include "posix_types_64.h" +# endif #endif diff --git a/include/asm-x86/unistd.h b/include/asm-x86/unistd.h index effc7ad..1dea24a 100644 --- a/include/asm-x86/unistd.h +++ b/include/asm-x86/unistd.h @@ -1,7 +1,13 @@ #ifdef __KERNEL__ -# if defined(CONFIG_X86_32) || defined(__i386__) +# ifdef CONFIG_X86_32 # include "unistd_32.h" # else # include "unistd_64.h" -# endif +# endif +#else +# ifdef __i386__ +# include "unistd_32.h" +# else +# include "unistd_64.h" +# endif #endif ------------------------------------------------------------------------- This SF.net email is sponsored by the 2008 JavaOne(SM) Conference Don't miss this year's exciting event. There's still time to save $100. Use priority code J8TL2D2. http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone _______________________________________________ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel