Mohammed Gamal wrote:
This patch aims to allow emulation whenever guest state is not valid for VMX operation. This usually happens in mode switches with guests such as older versions of gfxboot and FreeDOS with HIMEM.

The patch aims to address this issue, it introduces the following:

- A function that invokes the x86 emulator when the guest state is not valid 
(borrowed from Guillaume Thouvenin's real mode patches)
- A function that checks that guest register state is VMX compliant
- A module parameter that enables these operations. It is disabled by default, 
in order not to intervene with KVM's normal operation

This version adds the following:

- An "emulation required" flag, which is set on mode switches
- Improved guest state checking functions
- Emulation is done on guest entry, rather than directly on mode switching 
utilising the emulation flag.

@@ -1294,7 +1298,9 @@ static void fix_pmode_dataseg(int seg, struct 
kvm_save_segment *save)
 static void enter_pmode(struct kvm_vcpu *vcpu)
 {
        unsigned long flags;
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+ vmx->emulation_required = 1;
        vcpu->arch.rmode.active = 0;
vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base);
@@ -1311,17 +1317,19 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
update_exception_bitmap(vcpu); - fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
-       fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
-       fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
-       fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
+       if(!emulate_invalid_guest_state) {
+               fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
+               fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
+               fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
+               fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);

Instead of all this indentation, you can do an 'if (emulate_invalid_guest_state) return';

@@ -1351,7 +1359,9 @@ static void fix_rmode_seg(int seg, struct 
kvm_save_segment *save)
 static void enter_rmode(struct kvm_vcpu *vcpu)
 {
        unsigned long flags;
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+ vmx->emulation_required = 1;
        vcpu->arch.rmode.active = 1;
vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
@@ -1373,20 +1383,22 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
        vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
        update_exception_bitmap(vcpu);
- vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4);
-       vmcs_write32(GUEST_SS_LIMIT, 0xffff);
-       vmcs_write32(GUEST_SS_AR_BYTES, 0xf3);
+       if(!emulate_invalid_guest_state) {
+               vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4);
+               vmcs_write32(GUEST_SS_LIMIT, 0xffff);
+               vmcs_write32(GUEST_SS_AR_BYTES, 0xf3);

Again, except you'll have to use a goto. We can later refactor the state mangling into a separate function.

It's important to keep a complex change like this easy to review.

kvm_mmu_reset_context(vcpu);
        init_rmode(vcpu->kvm);
@@ -1721,6 +1733,206 @@ static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct 
descriptor_table *dt)
        vmcs_writel(GUEST_GDTR_BASE, dt->base);
 }
+static bool rmode_segment_invalid(struct kvm_vcpu *vcpu, int seg)
+{
+       struct kvm_segment var;
+       u32 ar;
+
+       vmx_get_segment(vcpu, &var, seg);
+       ar = vmx_segment_access_rights(&var);
+
+       if (var.limit != 0xffff)
+               return true;
+       if (ar != 0xf3)
+               return true;

Need additional check for base == selector << 4.

+
+static bool code_segment_invalid(struct kvm_vcpu *vcpu)
+{
+       struct kvm_segment cs;
+
+       vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
+
+       if (!(cs.type & (AR_TYPE_ACCESSES_MASK|AR_TYPE_CODE_MASK)))
+               return true;

That doesn't mean what you think it means. It will fail only if both bits are unset. To check if both bits are set, use (~cs.type & (MASK1|MASK2)).

+       if ((cs.type <= 0xb) && (cs.type >= 0x8)) {
+               if (cs.dpl != (cs.selector & 3))
+                       return true;
+       }
+       if ((cs.type <= 0xf) && (cs.type >= 0xc)) {
+               if (cs.dpl > (cs.selector & 3))
+                       return true;
+       }

You're looking at the conforming bit, right? If so please use an explicit bitmask. Also a temporary for the RPL will make the code clearer.

+       if((cs.limit & 0x00000fff) != 0x00000fff) {
+               if(cs.g)
+                       return true;
+       }
+       else if(cs.limit & 0xfff00000) {
+               if(!cs.g)
+                       return true;
+       }

This bit is generic, not cs specific. On the other hand there is no way the guest can generate an invalid combination, so this can be removed.

+static bool tr_segment_valid(struct kvm_vcpu *vcpu)
+{

Should be tr_invalid().

I recommend changing the meaning of all the checks to return true on valid segments, since that's how the specs are written. It will be easier to validate the code against the docs, and avoids double negatives.

+static bool ldtr_segment_valid(struct kvm_vcpu *vcpu)

invalid

+{
+       struct kvm_segment ldtr;
+
+       vmx_get_segment(vcpu, &ldtr, VCPU_SREG_TR);

VCPU_SREG_LDTR

+/*
+ * Check if guest state is valid. Returns true if valid, false if
+ * not.
+ * We assume that registers are always usable
+ */
+static bool guest_state_invalid(struct kvm_vcpu *vcpu)
+{
+       /* real mode guest state checks */
+       if (!(vcpu->arch.cr0 & X86_CR0_PE)) {
+               if(rmode_segment_invalid(vcpu, VCPU_SREG_CS))
+                       return true;
+               if(rmode_segment_invalid(vcpu, VCPU_SREG_SS))
+                       return true;
+               if(rmode_segment_invalid(vcpu, VCPU_SREG_DS))
+                       return true;
+               if(rmode_segment_invalid(vcpu, VCPU_SREG_ES))
+                       return true;
+               if(rmode_segment_invalid(vcpu, VCPU_SREG_FS))
+                       return true;
+               if(rmode_segment_invalid(vcpu, VCPU_SREG_GS))
+                       return true;
+       }
+       else { /* protected mode guest state checks */
+               if(code_segment_invalid(vcpu))
+                       return true;
+               if(stack_segment_invalid(vcpu))
+                       return true;
+               if(data_segment_invalid(vcpu, VCPU_SREG_DS))
+                       return true;
+               if(data_segment_invalid(vcpu, VCPU_SREG_ES))
+                       return true;
+               if(data_segment_invalid(vcpu, VCPU_SREG_FS))
+                       return true;
+               if(data_segment_invalid(vcpu, VCPU_SREG_GS))
+                       return true;
+       }
+
+       if (tr_segment_valid(vcpu))
+               return true;
+       if (ldtr_segment_valid(vcpu))
+               return true;

In real mode we aren't interested in tr and ldtr. We provide fake ones for v8086 mode, but they aren't related to the guest.

+static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
+                               struct kvm_run *kvm_run)
+{
+       u8 opcodes[4];
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       unsigned long rip = kvm_rip_read(vcpu);
+       unsigned long rip_linear;
+       int err;
+
+       while (guest_state_invalid(vcpu)) {
+               rip_linear = rip + vmx_get_segment_base(vcpu, VCPU_SREG_CS);
+               emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu);

unused.

+               err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
+               
+               switch (err) {
+                       case EMULATE_DONE:
+                               kvm_report_emulation_failure(vcpu, "emulation 
success");

looks redundant.

+                               break;
+                       case EMULATE_DO_MMIO:
+                               kvm_report_emulation_failure(vcpu, "mmio");
+                               /* TODO: Handle MMIO */
+                               return;
+                       default:
+                               kvm_report_emulation_failure(vcpu, "emulation 
failure");
+                               return;
+               }
+       }
+       

The loop is dangerous, if the guest is in an infinite loop we'll never exit the kernel.

Either check for signals and need_reshched, or avoid the loop completely.

+       /* Guest state should be valid now, no more emulation should be needed 
*/
+       vmx->emulation_required = 0;
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -2969,131 +3214,137 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, 
struct kvm_run *kvm_run)
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        u32 intr_info;
- if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
-               vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
-       if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
-               vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
+       /* Handle invalid guest state instrad of entering VMX */
+ if (vmx->emulation_required && emulate_invalid_guest_state) + handle_invalid_guest_state(vcpu, kvm_run);
+       
+       else {
+               if (test_bit(VCPU_REGS_RSP, (unsigned long 
*)&vcpu->arch.regs_dirty))
+                       vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
+               if (test_bit(VCPU_REGS_RIP, (unsigned long 
*)&vcpu->arch.regs_dirty))
+                       vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);

I'm very attached to vmx_cpu_run(). Please don't indent it like that.

Also, mind your coding style. 'if' is not a function, there should be a space after it.

--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to