Hi,

this issue has already been talked about previously. Gfxboot on VMX is
broken, because it reads SS after switching from real to protected mode,
where SS contains an invalid value, which VMX does not allow.
As far as I know, gfxboot is the only application that suffers from this
issue.
The current "fix" is to make gfxboot use a previously stored SS value,
which works fine for new releases. Already shipped versions of the
software can not be changed though, so there needs to be another way to
make kvm work with older versions of gfxboot.

As everything except gfxboot works, we can simply change gfxboot in
runtime to use a different value. Unfortunately the mov instruction,
used to read the SS register is only 2 bytes long, so there is no way to
binary patch the mov to something that would contain an address. So the
only way I could think of was an invalid instruction. The UD exception
is intercepted in KVM and is already emulated for VMCALLs. This can be
extended to an opcode, that is officially unused (0f 0c) and have the
emulator do a mov realmode_ss, %eax.

This patch implements exactly this idea and fixes openSUSE < 11.0 and
Ubuntu CD booting on VMX for me. Comments are, as always, welcome.

Signed-off-by: Alexander Graf <[EMAIL PROTECTED]>


diff -ur kernel/include/asm-x86/kvm_host.h kernel.patched/include/asm-x86/kvm_host.h
--- kernel/include/asm-x86/kvm_host.h	2008-02-15 07:59:28.000000000 +0100
+++ kernel.patched/include/asm-x86/kvm_host.h	2008-02-15 07:42:41.000000000 +0100
@@ -195,6 +195,7 @@
 	unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
 	unsigned long rip;      /* needs vcpu_load_rsp_rip() */
 
+	u16 backup_ss;
 	unsigned long cr0;
 	unsigned long cr2;
 	unsigned long cr3;
diff -ur kernel/vmx.c kernel.patched/vmx.c
--- kernel/vmx.c	2008-02-15 07:59:28.000000000 +0100
+++ kernel.patched/vmx.c	2008-02-15 07:51:27.000000000 +0100
@@ -1112,6 +1112,8 @@
 static void enter_pmode(struct kvm_vcpu *vcpu)
 {
 	unsigned long flags;
+	unsigned long rip;
+	u8 opcodes[2];
 
 	vcpu->arch.rmode.active = 0;
 
@@ -1134,12 +1136,39 @@
 	fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
 	fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
 
+	/* Save real mode SS */
+	vcpu->arch.backup_ss = vmcs_read16(GUEST_SS_SELECTOR);
+
 	vmcs_write16(GUEST_SS_SELECTOR, 0);
 	vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
 
 	vmcs_write16(GUEST_CS_SELECTOR,
 		     vmcs_read16(GUEST_CS_SELECTOR) & ~SELECTOR_RPL_MASK);
 	vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
+
+	/* VMX checks for SS.CPL = CS.CPL on VM entry, if we are in
+	 * protected mode. This fails on the transistion from real mode
+	 * to protected mode, as just after that, SS still contains the
+	 * real mode segment, which does not know anything about CPLs.
+	 * 
+	 * As far as I know only gfxboot exploits this feature, by using
+	 * the old real mode SS value to find a new SS selector in protected
+	 * mode. This happens using a mov %ss, %eax instruction, which we
+	 * can patch to an invalid opcode and emulate later on, giving eax
+	 * the real SS value, that existed before the protected mode
+	 * switch. */
+	rip = vcpu->arch.rip + vmcs_readl(GUEST_CS_BASE) + 14;
+	emulator_read_std(rip, (void *)opcodes, 2, vcpu);
+
+	if ( opcodes[0] ==  0x8c && opcodes[1] == 0xd0 ) {
+		vcpu_printf(vcpu, "%s: patching mov SS\n", __FUNCTION__);
+		opcodes[0] = 0x0f;
+		opcodes[1] = 0x0c;
+		if (emulator_write_emulated(rip, opcodes,
+		    2, vcpu) != X86EMUL_CONTINUE)
+			vcpu_printf(vcpu, "%s: unable to patch mov SS\n",
+				__FUNCTION__);
+	}
 }
 
 static gva_t rmode_tss_base(struct kvm *kvm)
diff -ur kernel/x86.c kernel.patched/x86.c
--- kernel/x86.c	2008-02-15 07:59:28.000000000 +0100
+++ kernel.patched/x86.c	2008-02-14 16:47:27.000000000 +0100
@@ -1886,13 +1886,14 @@
 
 		r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
 
-		/* Reject the instructions other than VMCALL/VMMCALL when
+		/* Reject the instructions other than VMCALL/VMMCALL/HACKS when
 		 * try to emulate invalid opcode */
 		c = &vcpu->arch.emulate_ctxt.decode;
 		if ((emulation_type & EMULTYPE_TRAP_UD) &&
-		    (!(c->twobyte && c->b == 0x01 &&
+		    ((!(c->twobyte && c->b == 0x01 &&
 		      (c->modrm_reg == 0 || c->modrm_reg == 3) &&
-		       c->modrm_mod == 3 && c->modrm_rm == 1)))
+		       c->modrm_mod == 3 && c->modrm_rm == 1)) &&
+		       c->b != 0x0c))
 			return EMULATE_FAIL;
 
 		++vcpu->stat.insn_emulation;
diff -ur kernel/x86_emulate.c kernel.patched/x86_emulate.c
--- kernel/x86_emulate.c	2008-02-15 07:59:28.000000000 +0100
+++ kernel.patched/x86_emulate.c	2008-02-15 07:53:39.000000000 +0100
@@ -174,7 +175,7 @@
 static u16 twobyte_table[256] = {
 	/* 0x00 - 0x0F */
 	0, SrcMem | ModRM | DstReg, 0, 0, 0, 0, ImplicitOps, 0,
-	ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
+	ImplicitOps, ImplicitOps, 0, 0, ImplicitOps, ImplicitOps | ModRM, 0, 0,
 	/* 0x10 - 0x1F */
 	0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
 	/* 0x20 - 0x2F */
@@ -1744,6 +1745,16 @@
 	case 0x18:		/* Grp16 (prefetch/nop) */
 		c->dst.type = OP_NONE;
 		break;
+	case 0x0c: /* Invalid (used to patch mov %ss, %eax) */
+		/* This opcode is declared invalid, according to the Intel
+		 * specification. As it is only used on VMX, we do not have
+		 * to take AMD instructions into account. For more
+		 * information, why this is needed, please see
+		 * vmx.c:enter_pmode.
+		 */
+		c->dst.type = OP_NONE;
+		c->regs[VCPU_REGS_RAX] = ctxt->vcpu->arch.backup_ss;
+		break;
 	case 0x20: /* mov cr, reg */
 		if (c->modrm_mod != 3)
 			goto cannot_emulate;
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to