[Xen-devel] [PATCH 2/2] xen/x86: Introduce a new VMASSIST for architectural behaviour of iopl
The existing vIOPL interface is hard to use, and need not be. Introduce a VMASSIST with which a guest can opt-in to having vIOPL behaviour consistenly with native hardware. Specifically: - virtual iopl updated from do_iret() hypercalls. - virtual iopl reported in bounce frames. - guest kernels assumed to be level 0 for the purpose of iopl checks. Signed-off-by: Andrew Cooper --- CC: Jan Beulich --- xen/arch/x86/domain.c | 10 +++--- xen/arch/x86/physdev.c | 2 +- xen/arch/x86/traps.c | 8 ++-- xen/arch/x86/x86_64/asm-offsets.c | 3 +++ xen/arch/x86/x86_64/compat/entry.S | 7 ++- xen/arch/x86/x86_64/compat/traps.c | 4 xen/arch/x86/x86_64/entry.S| 7 ++- xen/arch/x86/x86_64/traps.c| 3 +++ xen/include/asm-x86/config.h | 1 + xen/include/asm-x86/domain.h | 3 ++- xen/include/public/xen.h | 8 11 files changed, 47 insertions(+), 9 deletions(-) diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index a6d721b..36b9aaa 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -1001,7 +1001,7 @@ int arch_set_info_guest( init_int80_direct_trap(v); /* IOPL privileges are virtualised. */ -v->arch.pv_vcpu.iopl = (v->arch.user_regs.eflags >> 12) & 3; +v->arch.pv_vcpu.iopl = v->arch.user_regs.eflags & X86_EFLAGS_IOPL; v->arch.user_regs.eflags &= ~X86_EFLAGS_IOPL; /* Ensure real hardware interrupts are enabled. */ @@ -1742,8 +1742,10 @@ static void load_segments(struct vcpu *n) cs_and_mask = (unsigned short)regs->cs | ((unsigned int)vcpu_info(n, evtchn_upcall_mask) << 16); /* Fold upcall mask into RFLAGS.IF. */ -eflags = regs->_eflags & ~X86_EFLAGS_IF; +eflags = regs->_eflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL); eflags |= !vcpu_info(n, evtchn_upcall_mask) << 9; +if ( VM_ASSIST(n->domain, architectural_iopl) ) +eflags |= n->arch.pv_vcpu.iopl; if ( !ring_1(regs) ) { @@ -1788,8 +1790,10 @@ static void load_segments(struct vcpu *n) ((unsigned long)vcpu_info(n, evtchn_upcall_mask) << 32); /* Fold upcall mask into RFLAGS.IF. */ -rflags = regs->rflags & ~X86_EFLAGS_IF; +rflags = regs->rflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL); rflags |= !vcpu_info(n, evtchn_upcall_mask) << 9; +if ( VM_ASSIST(n->domain, architectural_iopl) ) +rflags |= n->arch.pv_vcpu.iopl; if ( put_user(regs->ss,rsp- 1) | put_user(regs->rsp, rsp- 2) | diff --git a/xen/arch/x86/physdev.c b/xen/arch/x86/physdev.c index 1cb9b58..5a49796 100644 --- a/xen/arch/x86/physdev.c +++ b/xen/arch/x86/physdev.c @@ -529,7 +529,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) if ( set_iopl.iopl > 3 ) break; ret = 0; -curr->arch.pv_vcpu.iopl = set_iopl.iopl; +curr->arch.pv_vcpu.iopl = MASK_INSR(set_iopl.iopl, X86_EFLAGS_IOPL); break; } diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 564a107..9754a2f 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -1806,7 +1806,9 @@ static int guest_io_okay( #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v) if ( !vm86_mode(regs) && - (v->arch.pv_vcpu.iopl >= (guest_kernel_mode(v, regs) ? 1 : 3)) ) + (MASK_EXTR(v->arch.pv_vcpu.iopl, X86_EFLAGS_IOPL) >= + (guest_kernel_mode(v, regs) ? + (VM_ASSIST(v->domain, architectural_iopl) ? 0 : 1) : 3)) ) return 1; if ( v->arch.pv_vcpu.iobmp_limit > (port + bytes) ) @@ -2367,7 +2369,9 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) case 0xfa: /* CLI */ case 0xfb: /* STI */ -if ( v->arch.pv_vcpu.iopl < (guest_kernel_mode(v, regs) ? 1 : 3) ) +if ( MASK_EXTR(v->arch.pv_vcpu.iopl, X86_EFLAGS_IOPL) < + (guest_kernel_mode(v, regs) ? + (VM_ASSIST(v->domain, architectural_iopl) ? 0 : 1) : 3) ) goto fail; /* * This is just too dangerous to allow, in my opinion. Consider if the diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c index 447c650..fa37ee0 100644 --- a/xen/arch/x86/x86_64/asm-offsets.c +++ b/xen/arch/x86/x86_64/asm-offsets.c @@ -86,6 +86,7 @@ void __dummy__(void) OFFSET(VCPU_trap_ctxt, struct vcpu, arch.pv_vcpu.trap_ctxt); OFFSET(VCPU_kernel_sp, struct vcpu, arch.pv_vcpu.kernel_sp); OFFSET(VCPU_kernel_ss, struct vcpu, arch.pv_vcpu.kernel_ss); +OFFSET(VCPU_iopl, struct vcpu, arch.pv_vcpu.iopl); OFFSET(VCPU_guest_context_flags, struct vcpu, arch.vgc_flags); OFFSET(VCPU_nmi_pending, struct vcpu, nmi_pending); OFFSET(VCPU_mce_pending, struct vcpu, mce_pending); @@ -166,4 +167,6 @@ void __dummy__(void) OFFSET(MB_flags, multiboot_info_t, f
Re: [Xen-devel] [PATCH 2/2] xen/x86: Introduce a new VMASSIST for architectural behaviour of iopl
On 17/03/16 10:25, Jan Beulich wrote: On 16.03.16 at 21:05, wrote: >> @@ -1742,8 +1742,10 @@ static void load_segments(struct vcpu *n) >> cs_and_mask = (unsigned short)regs->cs | >> ((unsigned int)vcpu_info(n, evtchn_upcall_mask) << 16); >> /* Fold upcall mask into RFLAGS.IF. */ >> -eflags = regs->_eflags & ~X86_EFLAGS_IF; >> +eflags = regs->_eflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL); > This and ... > >> @@ -1788,8 +1790,10 @@ static void load_segments(struct vcpu *n) >> ((unsigned long)vcpu_info(n, evtchn_upcall_mask) << 32); >> >> /* Fold upcall mask into RFLAGS.IF. */ >> -rflags = regs->rflags & ~X86_EFLAGS_IF; >> +rflags = regs->rflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL); > ... this is not really necessary (but also not wrong) - the actual > EFLAGS.IOPL is always zero (and assumed to be so by code > further down from the respective adjustments you make). For > consistency's sake it might be better to either drop the changes > here, or also adjust the two places masking regs->eflags. I will adjust the others. I would prefer not to rely on the assumption that it is actually 0. > >> --- a/xen/arch/x86/traps.c >> +++ b/xen/arch/x86/traps.c >> @@ -1806,7 +1806,9 @@ static int guest_io_okay( >> #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v) >> >> if ( !vm86_mode(regs) && >> - (v->arch.pv_vcpu.iopl >= (guest_kernel_mode(v, regs) ? 1 : 3)) ) >> + (MASK_EXTR(v->arch.pv_vcpu.iopl, X86_EFLAGS_IOPL) >= >> + (guest_kernel_mode(v, regs) ? >> + (VM_ASSIST(v->domain, architectural_iopl) ? 0 : 1) : 3)) ) >> return 1; >> >> if ( v->arch.pv_vcpu.iobmp_limit > (port + bytes) ) >> @@ -2367,7 +2369,9 @@ static int emulate_privileged_op(struct cpu_user_regs >> *regs) >> >> case 0xfa: /* CLI */ >> case 0xfb: /* STI */ >> -if ( v->arch.pv_vcpu.iopl < (guest_kernel_mode(v, regs) ? 1 : 3) ) >> +if ( MASK_EXTR(v->arch.pv_vcpu.iopl, X86_EFLAGS_IOPL) < >> + (guest_kernel_mode(v, regs) ? >> + (VM_ASSIST(v->domain, architectural_iopl) ? 0 : 1) : 3) ) >> goto fail; > The similarity of the two together with the growing complexity > suggests to make this a macro or inline function. Additionally > resulting binary code would likely be better if you compared > v->arch.pv_vcpu.iopl with MASK_INSR(, > X86_EFLAGS_IOPL), even if that means having three > MASK_INSR() (albeit those perhaps again would be hidden in > a macro, e.g. > > #define IOPL(n) MASK_INSR(n, X86_EFLAGS_IOPL) I will see what I can do. > > ). > >> --- a/xen/arch/x86/x86_64/compat/entry.S >> +++ b/xen/arch/x86/x86_64/compat/entry.S >> @@ -277,9 +277,14 @@ compat_create_bounce_frame: >> testb %al,%al # Bits 0-7: saved_upcall_mask >> setz %ch # %ch == !saved_upcall_mask >> movl UREGS_eflags+8(%rsp),%eax >> -andl $~X86_EFLAGS_IF,%eax >> +andl $~(X86_EFLAGS_IF|X86_EFLAGS_IOPL),%eax > See earlier comment. I hope I have suitably answered why this is staying. > >> addb %ch,%ch # Bit 9 (EFLAGS.IF) >> orb %ch,%ah # Fold EFLAGS.IF into %eax >> +movq VCPU_domain(%rbx),%rcx# if ( VM_ASSIST(v->domain, >> architectural_iopl) ) > If you used another register, this could be pulled up quite a bit, > to hide the latency of the load before the loaded value gets used. Can do, but given all pipeline stalls which currently exist in this code, I doubt it will make any observable difference. > >> +testb $1 << VMASST_TYPE_architectural_iopl,DOMAIN_vm_assist(%rcx) >> +jz.Lft6 >> +movzwl VCPU_iopl(%rbx),%ecx # Bits 13:12 (EFLAGS.IOPL) > Why not just MOVL? Ah yes - this is leftover from the first iteration. ~Andrew ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 2/2] xen/x86: Introduce a new VMASSIST for architectural behaviour of iopl
>>> On 16.03.16 at 21:05, wrote: > @@ -1742,8 +1742,10 @@ static void load_segments(struct vcpu *n) > cs_and_mask = (unsigned short)regs->cs | > ((unsigned int)vcpu_info(n, evtchn_upcall_mask) << 16); > /* Fold upcall mask into RFLAGS.IF. */ > -eflags = regs->_eflags & ~X86_EFLAGS_IF; > +eflags = regs->_eflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL); This and ... > @@ -1788,8 +1790,10 @@ static void load_segments(struct vcpu *n) > ((unsigned long)vcpu_info(n, evtchn_upcall_mask) << 32); > > /* Fold upcall mask into RFLAGS.IF. */ > -rflags = regs->rflags & ~X86_EFLAGS_IF; > +rflags = regs->rflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL); ... this is not really necessary (but also not wrong) - the actual EFLAGS.IOPL is always zero (and assumed to be so by code further down from the respective adjustments you make). For consistency's sake it might be better to either drop the changes here, or also adjust the two places masking regs->eflags. > --- a/xen/arch/x86/traps.c > +++ b/xen/arch/x86/traps.c > @@ -1806,7 +1806,9 @@ static int guest_io_okay( > #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v) > > if ( !vm86_mode(regs) && > - (v->arch.pv_vcpu.iopl >= (guest_kernel_mode(v, regs) ? 1 : 3)) ) > + (MASK_EXTR(v->arch.pv_vcpu.iopl, X86_EFLAGS_IOPL) >= > + (guest_kernel_mode(v, regs) ? > + (VM_ASSIST(v->domain, architectural_iopl) ? 0 : 1) : 3)) ) > return 1; > > if ( v->arch.pv_vcpu.iobmp_limit > (port + bytes) ) > @@ -2367,7 +2369,9 @@ static int emulate_privileged_op(struct cpu_user_regs > *regs) > > case 0xfa: /* CLI */ > case 0xfb: /* STI */ > -if ( v->arch.pv_vcpu.iopl < (guest_kernel_mode(v, regs) ? 1 : 3) ) > +if ( MASK_EXTR(v->arch.pv_vcpu.iopl, X86_EFLAGS_IOPL) < > + (guest_kernel_mode(v, regs) ? > + (VM_ASSIST(v->domain, architectural_iopl) ? 0 : 1) : 3) ) > goto fail; The similarity of the two together with the growing complexity suggests to make this a macro or inline function. Additionally resulting binary code would likely be better if you compared v->arch.pv_vcpu.iopl with MASK_INSR(, X86_EFLAGS_IOPL), even if that means having three MASK_INSR() (albeit those perhaps again would be hidden in a macro, e.g. #define IOPL(n) MASK_INSR(n, X86_EFLAGS_IOPL) ). > --- a/xen/arch/x86/x86_64/compat/entry.S > +++ b/xen/arch/x86/x86_64/compat/entry.S > @@ -277,9 +277,14 @@ compat_create_bounce_frame: > testb %al,%al # Bits 0-7: saved_upcall_mask > setz %ch # %ch == !saved_upcall_mask > movl UREGS_eflags+8(%rsp),%eax > -andl $~X86_EFLAGS_IF,%eax > +andl $~(X86_EFLAGS_IF|X86_EFLAGS_IOPL),%eax See earlier comment. > addb %ch,%ch # Bit 9 (EFLAGS.IF) > orb %ch,%ah # Fold EFLAGS.IF into %eax > +movq VCPU_domain(%rbx),%rcx# if ( VM_ASSIST(v->domain, > architectural_iopl) ) If you used another register, this could be pulled up quite a bit, to hide the latency of the load before the loaded value gets used. > +testb $1 << VMASST_TYPE_architectural_iopl,DOMAIN_vm_assist(%rcx) > +jz.Lft6 > +movzwl VCPU_iopl(%rbx),%ecx # Bits 13:12 (EFLAGS.IOPL) Why not just MOVL? > +orl %ecx,%eax # Fold EFLAGS.IOPL into %eax Also I continue to think this would better be done with CMOVcc, avoiding yet another conditional branch here. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 2/2] xen/x86: Introduce a new VMASSIST for architectural behaviour of iopl
>>> On 17.03.16 at 11:45, wrote: > On 17/03/16 10:25, Jan Beulich wrote: > On 16.03.16 at 21:05, wrote: >>> @@ -1742,8 +1742,10 @@ static void load_segments(struct vcpu *n) >>> cs_and_mask = (unsigned short)regs->cs | >>> ((unsigned int)vcpu_info(n, evtchn_upcall_mask) << 16); >>> /* Fold upcall mask into RFLAGS.IF. */ >>> -eflags = regs->_eflags & ~X86_EFLAGS_IF; >>> +eflags = regs->_eflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL); >> This and ... >> >>> @@ -1788,8 +1790,10 @@ static void load_segments(struct vcpu *n) >>> ((unsigned long)vcpu_info(n, evtchn_upcall_mask) << 32); >>> >>> /* Fold upcall mask into RFLAGS.IF. */ >>> -rflags = regs->rflags & ~X86_EFLAGS_IF; >>> +rflags = regs->rflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL); >> ... this is not really necessary (but also not wrong) - the actual >> EFLAGS.IOPL is always zero (and assumed to be so by code >> further down from the respective adjustments you make). For >> consistency's sake it might be better to either drop the changes >> here, or also adjust the two places masking regs->eflags. > > I will adjust the others. I would prefer not to rely on the assumption > that it is actually 0. But you realize that if it wasn't zero, we'd have a security issue? (This notwithstanding I'm fine with both directions, as indicated before.) Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 2/2] xen/x86: Introduce a new VMASSIST for architectural behaviour of iopl
On 17/03/16 11:00, Jan Beulich wrote: On 17.03.16 at 11:45, wrote: >> On 17/03/16 10:25, Jan Beulich wrote: >> On 16.03.16 at 21:05, wrote: @@ -1742,8 +1742,10 @@ static void load_segments(struct vcpu *n) cs_and_mask = (unsigned short)regs->cs | ((unsigned int)vcpu_info(n, evtchn_upcall_mask) << 16); /* Fold upcall mask into RFLAGS.IF. */ -eflags = regs->_eflags & ~X86_EFLAGS_IF; +eflags = regs->_eflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL); >>> This and ... >>> @@ -1788,8 +1790,10 @@ static void load_segments(struct vcpu *n) ((unsigned long)vcpu_info(n, evtchn_upcall_mask) << 32); /* Fold upcall mask into RFLAGS.IF. */ -rflags = regs->rflags & ~X86_EFLAGS_IF; +rflags = regs->rflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL); >>> ... this is not really necessary (but also not wrong) - the actual >>> EFLAGS.IOPL is always zero (and assumed to be so by code >>> further down from the respective adjustments you make). For >>> consistency's sake it might be better to either drop the changes >>> here, or also adjust the two places masking regs->eflags. >> I will adjust the others. I would prefer not to rely on the assumption >> that it is actually 0. > But you realize that if it wasn't zero, we'd have a security issue? Indeed. But as this adjustment is literally free for us to use, making Xen a little more robust in the (hopefully never) case were IOPL ends up not being 0. ~Andrew > (This notwithstanding I'm fine with both directions, as indicated > before.) > > Jan > ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel