On Mon, Apr 05, 2021 at 09:54:14AM -0400, Dave Voutila wrote: > > Dave Voutila writes: > > > The following diff cleans up and improves MSR-related event handling in > > vmm(4) for when the guest attempts a rdmsr/wrmsr instruction. As > > mentioned in a previous email to tech@ about fixing support for 9front > > guests [1], we found some discprepencies between vmm(4)'s handling on > > Intel hosts and AMD hosts. > > > > While the diff has been tested already by abieber@ and brynet@ with > > additional review by mlarkin@, I'm looking for additional testers > > willing to look for regressions. > > Last call for additional testers. Plan is to commit the diff later today > as I have an OK from mlarkin@.
This is ok brynet@ too! > > > > This diff specifically improves and standardizes msr-based exit handling > > between Intel and AMD hosts to the following: > > > > 1. All RDMSR instructions that cause a vm-exit must be explicitly > > handled (e.g. via emulation) or they result in injecting a #GP > > exception into the guest. > > > > 2. All WRMSR instructions that cause a vm-exit and are not explicitly > > handled are ignored (i.e. %rax and %rdx values are not inspected or > > used). > > > > Consequently with the change for (1) above, the diff adds explicit > > handling for the following MSRs: > > > > 1. MSR_CR_PAT: for now reads/writes are shadowed for the guest vcpu and > > host state is not touched. The shadow state is initialized on vcpu > > reset to the same value as the host. > > > > 2. MSR_BIOS_SIGN, MSR_INT_PEN_MSG (on AMD), and MSR_PLATFORM_ID are all > > ignored. This means reads result in vmm(4) setting the guest vcpu's > > %rax and %rdx to 0. (These msr's are ignored in the same manner by > > other hypervisors like kvm and nvmm.) > > > > The average user should not see a change in behavior of vmm(4) or > > vmd(8). The biggest change is for *Intel* users as this diff changes the > > current vmx logic which was not injecting #GP for unsupported > > msr's. (Your guests were potentially getting garbage results from rdmsr > > instructions.) > > > > If you do test the diff and have issues, I forgot to mention to please > build the kernel with VMM_DEBUG. The output to the kernel buffer will > help diagnose any problematic msr access. > > > The folks attempting to host the latest release of 9front as a guest on > > AMD hosts should see their guest boot successfully with this diff :-) > > > > -dv > > > > [1] https://marc.info/?l=openbsd-tech&m=161693517121814&w=2 > > > > > > Index: sys/arch/amd64/include/vmmvar.h > > =================================================================== > > RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v > > retrieving revision 1.70 > > diff -u -p -r1.70 vmmvar.h > > --- sys/arch/amd64/include/vmmvar.h 8 Apr 2020 07:39:48 -0000 1.70 > > +++ sys/arch/amd64/include/vmmvar.h 31 Mar 2021 00:15:43 -0000 > > @@ -936,6 +936,9 @@ struct vcpu { > > paddr_t vc_pvclock_system_gpa; > > uint32_t vc_pvclock_system_tsc_mul; > > > > + /* Shadowed MSRs */ > > + uint64_t vc_shadow_pat; > > + > > /* VMX only */ > > uint64_t vc_vmx_basic; > > uint64_t vc_vmx_entry_ctls; > > Index: sys/arch/amd64/amd64/vmm.c > > =================================================================== > > RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v > > retrieving revision 1.278 > > diff -u -p -r1.278 vmm.c > > --- sys/arch/amd64/amd64/vmm.c 11 Mar 2021 11:16:55 -0000 1.278 > > +++ sys/arch/amd64/amd64/vmm.c 31 Mar 2021 00:15:43 -0000 > > @@ -207,6 +207,7 @@ void svm_set_dirty(struct vcpu *, uint32 > > int vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size); > > void vmm_init_pvclock(struct vcpu *, paddr_t); > > int vmm_update_pvclock(struct vcpu *); > > +int vmm_pat_is_valid(uint64_t); > > > > #ifdef VMM_DEBUG > > void dump_vcpu(struct vcpu *); > > @@ -3193,6 +3194,9 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s > > /* xcr0 power on default sets bit 0 (x87 state) */ > > vcpu->vc_gueststate.vg_xcr0 = XCR0_X87 & xsave_mask; > > > > + /* XXX PAT shadow */ > > + vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT); > > + > > exit: > > /* Flush the VMCS */ > > if (vmclear(&vcpu->vc_control_pa)) { > > @@ -3584,6 +3588,10 @@ vcpu_init(struct vcpu *vcpu) > > vcpu->vc_state = VCPU_STATE_STOPPED; > > vcpu->vc_vpid = 0; > > vcpu->vc_pvclock_system_gpa = 0; > > + > > + /* Shadow PAT MSR, starting with host's value. */ > > + vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT); > > + > > if (vmm_softc->mode == VMM_MODE_VMX || > > vmm_softc->mode == VMM_MODE_EPT) > > ret = vcpu_init_vmx(vcpu); > > @@ -6257,26 +6265,24 @@ vmx_handle_rdmsr(struct vcpu *vcpu) > > rdx = &vcpu->vc_gueststate.vg_rdx; > > > > switch (*rcx) { > > - case MSR_SMBASE: > > - /* > > - * 34.15.6.3 - Saving Guest State (SMM) > > - * > > - * Unsupported, so inject #GP and return without > > - * advancing %rip. > > - */ > > + case MSR_BIOS_SIGN: > > + case MSR_PLATFORM_ID: > > + /* Ignored */ > > + *rax = 0; > > + *rdx = 0; > > + break; > > + case MSR_CR_PAT: > > + *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL); > > + *rdx = (vcpu->vc_shadow_pat >> 32); > > + break; > > + default: > > + /* Unsupported MSRs causes #GP exception, don't advance %rip */ > > + DPRINTF("%s: unsupported rdmsr (msr=0x%llx), injecting #GP\n", > > + __func__, *rcx); > > ret = vmm_inject_gp(vcpu); > > return (ret); > > } > > > > - *rax = 0; > > - *rdx = 0; > > - > > -#ifdef VMM_DEBUG > > - /* Log the access, to be able to identify unknown MSRs */ > > - DPRINTF("%s: rdmsr exit, msr=0x%llx, data returned to " > > - "guest=0x%llx:0x%llx\n", __func__, *rcx, *rdx, *rax); > > -#endif /* VMM_DEBUG */ > > - > > vcpu->vc_gueststate.vg_rip += insn_length; > > > > return (0); > > @@ -6442,7 +6448,7 @@ vmx_handle_misc_enable_msr(struct vcpu * > > int > > vmx_handle_wrmsr(struct vcpu *vcpu) > > { > > - uint64_t insn_length; > > + uint64_t insn_length, val; > > uint64_t *rax, *rdx, *rcx; > > int ret; > > > > @@ -6460,8 +6466,16 @@ vmx_handle_wrmsr(struct vcpu *vcpu) > > rax = &vcpu->vc_gueststate.vg_rax; > > rcx = &vcpu->vc_gueststate.vg_rcx; > > rdx = &vcpu->vc_gueststate.vg_rdx; > > + val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL); > > > > switch (*rcx) { > > + case MSR_CR_PAT: > > + if (!vmm_pat_is_valid(val)) { > > + ret = vmm_inject_gp(vcpu); > > + return (ret); > > + } > > + vcpu->vc_shadow_pat = val; > > + break; > > case MSR_MISC_ENABLE: > > vmx_handle_misc_enable_msr(vcpu); > > break; > > @@ -6508,7 +6522,7 @@ vmx_handle_wrmsr(struct vcpu *vcpu) > > int > > svm_handle_msr(struct vcpu *vcpu) > > { > > - uint64_t insn_length; > > + uint64_t insn_length, val; > > uint64_t *rax, *rcx, *rdx; > > struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; > > int ret; > > @@ -6521,35 +6535,58 @@ svm_handle_msr(struct vcpu *vcpu) > > rdx = &vcpu->vc_gueststate.vg_rdx; > > > > if (vmcb->v_exitinfo1 == 1) { > > - if (*rcx == MSR_EFER) { > > + /* WRMSR */ > > + val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL); > > + > > + switch (*rcx) { > > + case MSR_CR_PAT: > > + if (!vmm_pat_is_valid(val)) { > > + ret = vmm_inject_gp(vcpu); > > + return (ret); > > + } > > + vcpu->vc_shadow_pat = val; > > + break; > > + case MSR_EFER: > > vmcb->v_efer = *rax | EFER_SVME; > > - } else if (*rcx == KVM_MSR_SYSTEM_TIME) { > > + break; > > + case KVM_MSR_SYSTEM_TIME: > > vmm_init_pvclock(vcpu, > > (*rax & 0xFFFFFFFFULL) | (*rdx << 32)); > > - } else { > > -#ifdef VMM_DEBUG > > + break; > > + default: > > /* Log the access, to be able to identify unknown MSRs > > */ > > DPRINTF("%s: wrmsr exit, msr=0x%llx, discarding data " > > "written from guest=0x%llx:0x%llx\n", __func__, > > *rcx, *rdx, *rax); > > -#endif /* VMM_DEBUG */ > > } > > } else { > > + /* RDMSR */ > > switch (*rcx) { > > - case MSR_DE_CFG: > > - /* LFENCE seralizing bit is set by host */ > > - *rax = DE_CFG_SERIALIZE_LFENCE; > > - *rdx = 0; > > - break; > > - case MSR_INT_PEN_MSG: > > - *rax = 0; > > - *rdx = 0; > > - break; > > - default: > > - DPRINTF("%s: guest read msr 0x%llx, injecting " > > - "#GP\n", __func__, *rcx); > > - ret = vmm_inject_gp(vcpu); > > - return (ret); > > + case MSR_BIOS_SIGN: > > + case MSR_INT_PEN_MSG: > > + case MSR_PLATFORM_ID: > > + /* Ignored */ > > + *rax = 0; > > + *rdx = 0; > > + break; > > + case MSR_CR_PAT: > > + *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL); > > + *rdx = (vcpu->vc_shadow_pat >> 32); > > + break; > > + case MSR_DE_CFG: > > + /* LFENCE seralizing bit is set by host */ > > + *rax = DE_CFG_SERIALIZE_LFENCE; > > + *rdx = 0; > > + break; > > + default: > > + /* > > + * Unsupported MSRs causes #GP exception, don't advance > > + * %rip > > + */ > > + DPRINTF("%s: unsupported rdmsr (msr=0x%llx), " > > + "injecting #GP\n", __func__, *rcx); > > + ret = vmm_inject_gp(vcpu); > > + return (ret); > > } > > } > > > > @@ -7271,6 +7308,23 @@ vmm_update_pvclock(struct vcpu *vcpu) > > pvclock_ti->ti_version &= ~0x1; > > } > > return (0); > > +} > > + > > +int > > +vmm_pat_is_valid(uint64_t pat) > > +{ > > + int i; > > + uint8_t *byte = (uint8_t *)&pat; > > + > > + /* Intel SDM Vol 3A, 11.12.2: 0x02, 0x03, and 0x08-0xFF result in #GP */ > > + for (i = 0; i < 8; i++) { > > + if (byte[i] == 0x02 || byte[i] == 0x03 || byte[i] > 0x07) { > > + DPRINTF("%s: invalid pat %llx\n", __func__, pat); > > + return 0; > > + } > > + } > > + > > + return 1; > > } > > > > /* > > > -- > -Dave Voutila > >