The same bug was fixed in FreeBSD here: "amd64: Store full 64bit of FIP/FDP for 64bit processes when using XSAVE." https://github.com/freebsd/freebsd/commit/62a9018a8878533432500e5cb89f9bd07fd9ef14
Kamil Rytarowski CTO, Moritz Systems www.moritz.systems pt., 16 paź 2020 o 15:26 Michał Górny <mgo...@gentoo.org> napisał(a): > > When calling FXSAVE, XSAVE, FXRSTOR, ... for 64-bit programs on amd64 > use the 64-suffixed variant in order to include the complete FIP/FDP > registers in the x87 area. > > The difference between the two variants is that the FXSAVE64 (new) > variant represents FIP/FDP as 64-bit fields (union fp_addr.fa_64), > while the legacy FXSAVE variant uses split fields: 32-bit offset, > 16-bit segment and 16-bit reserved field (union fp_addr.fa_32). > The latter implies that the actual addresses are truncated to 32 bits > which is insufficient in modern programs. > > The change is applied only to 64-bit programs on amd64. Plain i386 > and compat32 continue using plain FXSAVE. Similarly, NVMM is not > changed as I am not familiar with that code. > > This is a potentially breaking change. However, I don't think it likely > to actually break anything because the data provided by the old variant > were not meaningful (because of the truncated pointer). > --- > sys/arch/x86/include/cpufunc.h | 76 ++++++++++++++++++++++++++ > sys/arch/x86/include/fpu.h | 4 +- > sys/arch/x86/x86/fpu.c | 30 ++++++---- > sys/dev/nvmm/x86/nvmm_x86_svm.c | 6 +- > sys/dev/nvmm/x86/nvmm_x86_vmx.c | 6 +- > tests/lib/libc/sys/t_ptrace_x86_wait.h | 2 - > 6 files changed, 105 insertions(+), 19 deletions(-) > > diff --git a/sys/arch/x86/include/cpufunc.h b/sys/arch/x86/include/cpufunc.h > index 38534c305544..dd8b0c7dc375 100644 > --- a/sys/arch/x86/include/cpufunc.h > +++ b/sys/arch/x86/include/cpufunc.h > @@ -485,6 +485,82 @@ xrstor(const void *addr, uint64_t mask) > ); > } > > +#ifdef __x86_64__ > +static inline void > +fxsave64(void *addr) > +{ > + uint8_t *area = addr; > + > + __asm volatile ( > + "fxsave64 %[area]" > + : [area] "=m" (*area) > + : > + : "memory" > + ); > +} > + > +static inline void > +fxrstor64(const void *addr) > +{ > + const uint8_t *area = addr; > + > + __asm volatile ( > + "fxrstor64 %[area]" > + : > + : [area] "m" (*area) > + : "memory" > + ); > +} > + > +static inline void > +xsave64(void *addr, uint64_t mask) > +{ > + uint8_t *area = addr; > + uint32_t low, high; > + > + low = mask; > + high = mask >> 32; > + __asm volatile ( > + "xsave64 %[area]" > + : [area] "=m" (*area) > + : "a" (low), "d" (high) > + : "memory" > + ); > +} > + > +static inline void > +xsaveopt64(void *addr, uint64_t mask) > +{ > + uint8_t *area = addr; > + uint32_t low, high; > + > + low = mask; > + high = mask >> 32; > + __asm volatile ( > + "xsaveopt64 %[area]" > + : [area] "=m" (*area) > + : "a" (low), "d" (high) > + : "memory" > + ); > +} > + > +static inline void > +xrstor64(const void *addr, uint64_t mask) > +{ > + const uint8_t *area = addr; > + uint32_t low, high; > + > + low = mask; > + high = mask >> 32; > + __asm volatile ( > + "xrstor64 %[area]" > + : > + : [area] "m" (*area), "a" (low), "d" (high) > + : "memory" > + ); > +} > +#endif > + > /* > -------------------------------------------------------------------------- */ > > #ifdef XENPV > diff --git a/sys/arch/x86/include/fpu.h b/sys/arch/x86/include/fpu.h > index 3255a8ca39e0..bdd86abfdd39 100644 > --- a/sys/arch/x86/include/fpu.h > +++ b/sys/arch/x86/include/fpu.h > @@ -14,8 +14,8 @@ struct trapframe; > void fpuinit(struct cpu_info *); > void fpuinit_mxcsr_mask(void); > > -void fpu_area_save(void *, uint64_t); > -void fpu_area_restore(const void *, uint64_t); > +void fpu_area_save(void *, uint64_t, bool); > +void fpu_area_restore(const void *, uint64_t, bool); > > void fpu_save(void); > > diff --git a/sys/arch/x86/x86/fpu.c b/sys/arch/x86/x86/fpu.c > index baff8d008299..e16a64f9ff8a 100644 > --- a/sys/arch/x86/x86/fpu.c > +++ b/sys/arch/x86/x86/fpu.c > @@ -156,7 +156,7 @@ fpu_save_lwp(struct lwp *l) > s = splvm(); > if (l->l_md.md_flags & MDL_FPU_IN_CPU) { > KASSERT((l->l_flag & LW_SYSTEM) == 0); > - fpu_area_save(area, x86_xsave_features); > + fpu_area_save(area, x86_xsave_features, !(l->l_proc->p_flag & > PK_32)); > l->l_md.md_flags &= ~MDL_FPU_IN_CPU; > } > splx(s); > @@ -246,21 +246,27 @@ fpu_errata_amd(void) > fldummy(); > } > > +#ifdef __x86_64__ > +#define XS64(x) (is_64bit ? x##64 : x) > +#else > +#define XS64(x) x > +#endif > + > void > -fpu_area_save(void *area, uint64_t xsave_features) > +fpu_area_save(void *area, uint64_t xsave_features, bool is_64bit) > { > switch (x86_fpu_save) { > case FPU_SAVE_FSAVE: > fnsave(area); > break; > case FPU_SAVE_FXSAVE: > - fxsave(area); > + XS64(fxsave)(area); > break; > case FPU_SAVE_XSAVE: > - xsave(area, xsave_features); > + XS64(xsave)(area, xsave_features); > break; > case FPU_SAVE_XSAVEOPT: > - xsaveopt(area, xsave_features); > + XS64(xsaveopt)(area, xsave_features); > break; > } > > @@ -268,7 +274,7 @@ fpu_area_save(void *area, uint64_t xsave_features) > } > > void > -fpu_area_restore(const void *area, uint64_t xsave_features) > +fpu_area_restore(const void *area, uint64_t xsave_features, bool is_64bit) > { > clts(); > > @@ -279,13 +285,13 @@ fpu_area_restore(const void *area, uint64_t > xsave_features) > case FPU_SAVE_FXSAVE: > if (cpu_vendor == CPUVENDOR_AMD) > fpu_errata_amd(); > - fxrstor(area); > + XS64(fxrstor)(area); > break; > case FPU_SAVE_XSAVE: > case FPU_SAVE_XSAVEOPT: > if (cpu_vendor == CPUVENDOR_AMD) > fpu_errata_amd(); > - xrstor(area, xsave_features); > + XS64(xrstor)(area, xsave_features); > break; > } > } > @@ -294,7 +300,8 @@ void > fpu_handle_deferred(void) > { > struct pcb *pcb = lwp_getpcb(curlwp); > - fpu_area_restore(&pcb->pcb_savefpu, x86_xsave_features); > + fpu_area_restore(&pcb->pcb_savefpu, x86_xsave_features, > + !(curlwp->l_proc->p_flag & PK_32)); > } > > void > @@ -309,7 +316,8 @@ fpu_switch(struct lwp *oldlwp, struct lwp *newlwp) > if (oldlwp->l_md.md_flags & MDL_FPU_IN_CPU) { > KASSERT(!(oldlwp->l_flag & LW_SYSTEM)); > pcb = lwp_getpcb(oldlwp); > - fpu_area_save(&pcb->pcb_savefpu, x86_xsave_features); > + fpu_area_save(&pcb->pcb_savefpu, x86_xsave_features, > + !(oldlwp->l_proc->p_flag & PK_32)); > oldlwp->l_md.md_flags &= ~MDL_FPU_IN_CPU; > } > KASSERT(!(newlwp->l_md.md_flags & MDL_FPU_IN_CPU)); > @@ -413,7 +421,7 @@ fpu_kern_leave(void) > * through Spectre-class attacks to userland, even if there are > * no bugs in fpu state management. > */ > - fpu_area_restore(&zero_fpu, x86_xsave_features); > + fpu_area_restore(&zero_fpu, x86_xsave_features, false); > > /* > * Set CR0_TS again so that the kernel can't accidentally use > diff --git a/sys/dev/nvmm/x86/nvmm_x86_svm.c b/sys/dev/nvmm/x86/nvmm_x86_svm.c > index 1dab0b27c7b3..18c82520adfa 100644 > --- a/sys/dev/nvmm/x86/nvmm_x86_svm.c > +++ b/sys/dev/nvmm/x86/nvmm_x86_svm.c > @@ -1351,7 +1351,8 @@ svm_vcpu_guest_fpu_enter(struct nvmm_cpu *vcpu) > struct svm_cpudata *cpudata = vcpu->cpudata; > > fpu_kern_enter(); > - fpu_area_restore(&cpudata->gfpu, svm_xcr0_mask); > + /* TODO: should we use *XSAVE64 here? */ > + fpu_area_restore(&cpudata->gfpu, svm_xcr0_mask, false); > > if (svm_xcr0_mask != 0) { > cpudata->hxcr0 = rdxcr(0); > @@ -1369,7 +1370,8 @@ svm_vcpu_guest_fpu_leave(struct nvmm_cpu *vcpu) > wrxcr(0, cpudata->hxcr0); > } > > - fpu_area_save(&cpudata->gfpu, svm_xcr0_mask); > + /* TODO: should we use *XSAVE64 here? */ > + fpu_area_save(&cpudata->gfpu, svm_xcr0_mask, false); > fpu_kern_leave(); > } > > diff --git a/sys/dev/nvmm/x86/nvmm_x86_vmx.c b/sys/dev/nvmm/x86/nvmm_x86_vmx.c > index 852aadfd7626..471e56df0507 100644 > --- a/sys/dev/nvmm/x86/nvmm_x86_vmx.c > +++ b/sys/dev/nvmm/x86/nvmm_x86_vmx.c > @@ -2014,7 +2014,8 @@ vmx_vcpu_guest_fpu_enter(struct nvmm_cpu *vcpu) > struct vmx_cpudata *cpudata = vcpu->cpudata; > > fpu_kern_enter(); > - fpu_area_restore(&cpudata->gfpu, vmx_xcr0_mask); > + /* TODO: should we use *XSAVE64 here? */ > + fpu_area_restore(&cpudata->gfpu, vmx_xcr0_mask, false); > > if (vmx_xcr0_mask != 0) { > cpudata->hxcr0 = rdxcr(0); > @@ -2032,7 +2033,8 @@ vmx_vcpu_guest_fpu_leave(struct nvmm_cpu *vcpu) > wrxcr(0, cpudata->hxcr0); > } > > - fpu_area_save(&cpudata->gfpu, vmx_xcr0_mask); > + /* TODO: should we use *XSAVE64 here? */ > + fpu_area_save(&cpudata->gfpu, vmx_xcr0_mask, false); > fpu_kern_leave(); > } > > diff --git a/tests/lib/libc/sys/t_ptrace_x86_wait.h > b/tests/lib/libc/sys/t_ptrace_x86_wait.h > index 7410a0fc5500..8d3d0000b9ae 100644 > --- a/tests/lib/libc/sys/t_ptrace_x86_wait.h > +++ b/tests/lib/libc/sys/t_ptrace_x86_wait.h > @@ -3387,12 +3387,10 @@ x86_register_test(enum x86_test_regset regset, enum > x86_test_registers regs, > ATF_CHECK_EQ(fxs->fx_opcode, > expected_fpu.opcode); > #if defined(__x86_64__) > -#if 0 /* TODO: kernel needs patching to call *XSAVE64 */ > ATF_CHECK_EQ(fxs->fx_ip.fa_64, > ((uint64_t)gpr.regs[_REG_RIP]) - 3); > ATF_CHECK_EQ(fxs->fx_dp.fa_64, > (uint64_t)&x86_test_zero); > -#endif > #else > ATF_CHECK_EQ(fxs->fx_ip.fa_32.fa_off, > (uint32_t)gpr.r_eip - 3); > -- > 2.28.0 >