Dave Voutila <[email protected]> writes:

> This diff removes instability from VMX-based hosts by either removing
> the possibility of the process sleeping while the VMCS is active or
> reloading it if we had no choice.
>
> A mutex is added to help guard the VMCS state so testing with witness
> has helped verify the diff.
>

Removed the mutex as it has served its purpose in ferreting out some
sleep points.

> The rwlock on the cpu originally used in the remote vmclear routine is
> changed to a mutex accordingly.
>

Reverted this. This update doesn't change the rwlock to a mutex...it's
fine if we sleep while we wait for a remote clear as it doesn't matter
which CPU we wake up on as we're about to reload the VMCS anyways.

> This diff does not remote possible calls to printf(9) via the DPRINTF
> macro as that's part of the next diff.
>

Moot at this point.

> One area of note: in vmx_load_pdptes() there's a XXX to call out that
> because of the printf(9) call on failure to km_alloc that the VMCS is
> potentially no longer valid. The upcoming diff to swap out printf(9) for
> log(9) will remove that.
>

Revisited the above now that we're holding off on this printf -> log
changeover.

It was in the previous diff as well, but just to point out this removes
the KERNEL_LOCK dance around uvm_fault. We were only doing this on Intel
hosts as it wasn't understood (at that time) what was causing the VMCS
corruption. AMD hosts haven't done this during nested page fault exit
handling since my work to unlock vmm(4) at k2k21.

ok?


blob - 8e588f7dcbd1cec2e61e7b7292ee32ff4eb9a2e1
blob + ac91b74fd4d5da774808ad1c78d75469ff89b458
--- sys/arch/amd64/amd64/vmm.c
+++ sys/arch/amd64/amd64/vmm.c
@@ -3028,12 +3028,22 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg
            IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) {
                if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
                    IA32_VMX_ENABLE_VPID, 1)) {
-                       if (vmm_alloc_vpid(&vpid)) {
+
+                       /* We may sleep during allocation, so reload VMCS. */
+                       vcpu->vc_last_pcpu = curcpu();
+                       ret = vmm_alloc_vpid(&vpid);
+                       if (vcpu_reload_vmcs_vmx(vcpu)) {
+                               printf("%s: failed to reload vmcs\n", __func__);
+                               ret = EINVAL;
+                               goto exit;
+                       }
+                       if (ret) {
                                DPRINTF("%s: could not allocate VPID\n",
                                    __func__);
                                ret = EINVAL;
                                goto exit;
                        }
+
                        if (vmwrite(VMCS_GUEST_VPID, vpid)) {
                                DPRINTF("%s: error setting guest VPID\n",
                                    __func__);
@@ -5549,7 +5559,7 @@ svm_handle_np_fault(struct vcpu *vcpu)
  *
  * Return Values:
  *  0: if successful
- *  EINVAL: if fault type could not be determined
+ *  EINVAL: if fault type could not be determined or VMCS reload fails
  *  EAGAIN: if a protection fault occurred, ie writing to a read-only page
  *  errno: if uvm_fault(9) fails to wire in the page
  */
@@ -5569,10 +5579,14 @@ vmx_fault_page(struct vcpu *vcpu, paddr_t gpa)
                return (EAGAIN);
        }

-       KERNEL_LOCK();
+       /* We may sleep during uvm_fault(9), so reload VMCS. */
+       vcpu->vc_last_pcpu = curcpu();
        ret = uvm_fault(vcpu->vc_parent->vm_map, gpa, VM_FAULT_WIRE,
            PROT_READ | PROT_WRITE | PROT_EXEC);
-       KERNEL_UNLOCK();
+       if (vcpu_reload_vmcs_vmx(vcpu)) {
+               printf("%s: failed to reload vmcs\n", __func__);
+               return (EINVAL);
+       }

        if (ret)
                printf("%s: uvm_fault returns %d, GPA=0x%llx, rip=0x%llx\n",
@@ -5962,7 +5976,16 @@ vmx_load_pdptes(struct vcpu *vcpu)

        ret = 0;

-       cr3_host_virt = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, &kp_none, 
&kd_waitok);
+       /* We may sleep during km_alloc(9), so reload VMCS. */
+       vcpu->vc_last_pcpu = curcpu();
+       cr3_host_virt = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, &kp_none,
+           &kd_waitok);
+       if (vcpu_reload_vmcs_vmx(vcpu)) {
+               printf("%s: failed to reload vmcs\n", __func__);
+               ret = EINVAL;
+               goto exit;
+       }
+
        if (!cr3_host_virt) {
                printf("%s: can't allocate address for guest CR3 mapping\n",
                    __func__);
@@ -5998,7 +6021,15 @@ vmx_load_pdptes(struct vcpu *vcpu)

 exit:
        pmap_kremove(cr3_host_virt, PAGE_SIZE);
+
+       /* km_free(9) might sleep, so we need to reload VMCS. */
+       vcpu->vc_last_pcpu = curcpu();
        km_free((void *)cr3_host_virt, PAGE_SIZE, &kv_any, &kp_none);
+       if (vcpu_reload_vmcs_vmx(vcpu)) {
+               printf("%s: failed to reload vmcs after km_free\n", __func__);
+               ret = EINVAL;
+       }
+
        return (ret);
 }

Reply via email to