[Xen-devel] [PATCH 2/2] xen/x86: Introduce a new VMASSIST for architectural behaviour of iopl

2016-03-19 Thread Andrew Cooper
The existing vIOPL interface is hard to use, and need not be.

Introduce a VMASSIST with which a guest can opt-in to having vIOPL behaviour
consistenly with native hardware.

Specifically:
 - virtual iopl updated from do_iret() hypercalls.
 - virtual iopl reported in bounce frames.
 - guest kernels assumed to be level 0 for the purpose of iopl checks.

Signed-off-by: Andrew Cooper 
---
CC: Jan Beulich 
---
 xen/arch/x86/domain.c  | 10 +++---
 xen/arch/x86/physdev.c |  2 +-
 xen/arch/x86/traps.c   |  8 ++--
 xen/arch/x86/x86_64/asm-offsets.c  |  3 +++
 xen/arch/x86/x86_64/compat/entry.S |  7 ++-
 xen/arch/x86/x86_64/compat/traps.c |  4 
 xen/arch/x86/x86_64/entry.S|  7 ++-
 xen/arch/x86/x86_64/traps.c|  3 +++
 xen/include/asm-x86/config.h   |  1 +
 xen/include/asm-x86/domain.h   |  3 ++-
 xen/include/public/xen.h   |  8 
 11 files changed, 47 insertions(+), 9 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index a6d721b..36b9aaa 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1001,7 +1001,7 @@ int arch_set_info_guest(
 init_int80_direct_trap(v);
 
 /* IOPL privileges are virtualised. */
-v->arch.pv_vcpu.iopl = (v->arch.user_regs.eflags >> 12) & 3;
+v->arch.pv_vcpu.iopl = v->arch.user_regs.eflags & X86_EFLAGS_IOPL;
 v->arch.user_regs.eflags &= ~X86_EFLAGS_IOPL;
 
 /* Ensure real hardware interrupts are enabled. */
@@ -1742,8 +1742,10 @@ static void load_segments(struct vcpu *n)
 cs_and_mask = (unsigned short)regs->cs |
 ((unsigned int)vcpu_info(n, evtchn_upcall_mask) << 16);
 /* Fold upcall mask into RFLAGS.IF. */
-eflags  = regs->_eflags & ~X86_EFLAGS_IF;
+eflags  = regs->_eflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL);
 eflags |= !vcpu_info(n, evtchn_upcall_mask) << 9;
+if ( VM_ASSIST(n->domain, architectural_iopl) )
+eflags |= n->arch.pv_vcpu.iopl;
 
 if ( !ring_1(regs) )
 {
@@ -1788,8 +1790,10 @@ static void load_segments(struct vcpu *n)
 ((unsigned long)vcpu_info(n, evtchn_upcall_mask) << 32);
 
 /* Fold upcall mask into RFLAGS.IF. */
-rflags  = regs->rflags & ~X86_EFLAGS_IF;
+rflags  = regs->rflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL);
 rflags |= !vcpu_info(n, evtchn_upcall_mask) << 9;
+if ( VM_ASSIST(n->domain, architectural_iopl) )
+rflags |= n->arch.pv_vcpu.iopl;
 
 if ( put_user(regs->ss,rsp- 1) |
  put_user(regs->rsp,   rsp- 2) |
diff --git a/xen/arch/x86/physdev.c b/xen/arch/x86/physdev.c
index 1cb9b58..5a49796 100644
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -529,7 +529,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) 
arg)
 if ( set_iopl.iopl > 3 )
 break;
 ret = 0;
-curr->arch.pv_vcpu.iopl = set_iopl.iopl;
+curr->arch.pv_vcpu.iopl = MASK_INSR(set_iopl.iopl, X86_EFLAGS_IOPL);
 break;
 }
 
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 564a107..9754a2f 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1806,7 +1806,9 @@ static int guest_io_okay(
 #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
 
 if ( !vm86_mode(regs) &&
- (v->arch.pv_vcpu.iopl >= (guest_kernel_mode(v, regs) ? 1 : 3)) )
+ (MASK_EXTR(v->arch.pv_vcpu.iopl, X86_EFLAGS_IOPL) >=
+  (guest_kernel_mode(v, regs) ?
+   (VM_ASSIST(v->domain, architectural_iopl) ? 0 : 1) : 3)) )
 return 1;
 
 if ( v->arch.pv_vcpu.iobmp_limit > (port + bytes) )
@@ -2367,7 +2369,9 @@ static int emulate_privileged_op(struct cpu_user_regs 
*regs)
 
 case 0xfa: /* CLI */
 case 0xfb: /* STI */
-if ( v->arch.pv_vcpu.iopl < (guest_kernel_mode(v, regs) ? 1 : 3) )
+if ( MASK_EXTR(v->arch.pv_vcpu.iopl, X86_EFLAGS_IOPL) <
+ (guest_kernel_mode(v, regs) ?
+  (VM_ASSIST(v->domain, architectural_iopl) ? 0 : 1) : 3) )
 goto fail;
 /*
  * This is just too dangerous to allow, in my opinion. Consider if the
diff --git a/xen/arch/x86/x86_64/asm-offsets.c 
b/xen/arch/x86/x86_64/asm-offsets.c
index 447c650..fa37ee0 100644
--- a/xen/arch/x86/x86_64/asm-offsets.c
+++ b/xen/arch/x86/x86_64/asm-offsets.c
@@ -86,6 +86,7 @@ void __dummy__(void)
 OFFSET(VCPU_trap_ctxt, struct vcpu, arch.pv_vcpu.trap_ctxt);
 OFFSET(VCPU_kernel_sp, struct vcpu, arch.pv_vcpu.kernel_sp);
 OFFSET(VCPU_kernel_ss, struct vcpu, arch.pv_vcpu.kernel_ss);
+OFFSET(VCPU_iopl, struct vcpu, arch.pv_vcpu.iopl);
 OFFSET(VCPU_guest_context_flags, struct vcpu, arch.vgc_flags);
 OFFSET(VCPU_nmi_pending, struct vcpu, nmi_pending);
 OFFSET(VCPU_mce_pending, struct vcpu, mce_pending);
@@ -166,4 +167,6 @@ void __dummy__(void)
 OFFSET(MB_flags, multiboot_info_t, f

Re: [Xen-devel] [PATCH 2/2] xen/x86: Introduce a new VMASSIST for architectural behaviour of iopl

2016-03-19 Thread Andrew Cooper
On 17/03/16 10:25, Jan Beulich wrote:
 On 16.03.16 at 21:05,  wrote:
>> @@ -1742,8 +1742,10 @@ static void load_segments(struct vcpu *n)
>>  cs_and_mask = (unsigned short)regs->cs |
>>  ((unsigned int)vcpu_info(n, evtchn_upcall_mask) << 16);
>>  /* Fold upcall mask into RFLAGS.IF. */
>> -eflags  = regs->_eflags & ~X86_EFLAGS_IF;
>> +eflags  = regs->_eflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL);
> This and ...
>
>> @@ -1788,8 +1790,10 @@ static void load_segments(struct vcpu *n)
>>  ((unsigned long)vcpu_info(n, evtchn_upcall_mask) << 32);
>>  
>>  /* Fold upcall mask into RFLAGS.IF. */
>> -rflags  = regs->rflags & ~X86_EFLAGS_IF;
>> +rflags  = regs->rflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL);
> ... this is not really necessary (but also not wrong) - the actual
> EFLAGS.IOPL is always zero (and assumed to be so by code
> further down from the respective adjustments you make). For
> consistency's sake it might be better to either drop the changes
> here, or also adjust the two places masking regs->eflags.

I will adjust the others.  I would prefer not to rely on the assumption
that it is actually 0.

>
>> --- a/xen/arch/x86/traps.c
>> +++ b/xen/arch/x86/traps.c
>> @@ -1806,7 +1806,9 @@ static int guest_io_okay(
>>  #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
>>  
>>  if ( !vm86_mode(regs) &&
>> - (v->arch.pv_vcpu.iopl >= (guest_kernel_mode(v, regs) ? 1 : 3)) )
>> + (MASK_EXTR(v->arch.pv_vcpu.iopl, X86_EFLAGS_IOPL) >=
>> +  (guest_kernel_mode(v, regs) ?
>> +   (VM_ASSIST(v->domain, architectural_iopl) ? 0 : 1) : 3)) )
>>  return 1;
>>  
>>  if ( v->arch.pv_vcpu.iobmp_limit > (port + bytes) )
>> @@ -2367,7 +2369,9 @@ static int emulate_privileged_op(struct cpu_user_regs 
>> *regs)
>>  
>>  case 0xfa: /* CLI */
>>  case 0xfb: /* STI */
>> -if ( v->arch.pv_vcpu.iopl < (guest_kernel_mode(v, regs) ? 1 : 3) )
>> +if ( MASK_EXTR(v->arch.pv_vcpu.iopl, X86_EFLAGS_IOPL) <
>> + (guest_kernel_mode(v, regs) ?
>> +  (VM_ASSIST(v->domain, architectural_iopl) ? 0 : 1) : 3) )
>>  goto fail;
> The similarity of the two together with the growing complexity
> suggests to make this a macro or inline function. Additionally
> resulting binary code would likely be better if you compared
> v->arch.pv_vcpu.iopl with MASK_INSR(,
> X86_EFLAGS_IOPL), even if that means having three
> MASK_INSR() (albeit those perhaps again would be hidden in
> a macro, e.g.
>
> #define IOPL(n) MASK_INSR(n, X86_EFLAGS_IOPL)

I will see what I can do.

>
> ).
>
>> --- a/xen/arch/x86/x86_64/compat/entry.S
>> +++ b/xen/arch/x86/x86_64/compat/entry.S
>> @@ -277,9 +277,14 @@ compat_create_bounce_frame:
>>  testb %al,%al   # Bits 0-7: saved_upcall_mask
>>  setz  %ch   # %ch == !saved_upcall_mask
>>  movl  UREGS_eflags+8(%rsp),%eax
>> -andl  $~X86_EFLAGS_IF,%eax
>> +andl  $~(X86_EFLAGS_IF|X86_EFLAGS_IOPL),%eax
> See earlier comment.

I hope I have suitably answered why this is staying.

>
>>  addb  %ch,%ch   # Bit 9 (EFLAGS.IF)
>>  orb   %ch,%ah   # Fold EFLAGS.IF into %eax
>> +movq  VCPU_domain(%rbx),%rcx# if ( VM_ASSIST(v->domain, 
>> architectural_iopl) )
> If you used another register, this could be pulled up quite a bit,
> to hide the latency of the load before the loaded value gets used.

Can do, but given all pipeline stalls which currently exist in this
code, I doubt it will make any observable difference.

>
>> +testb $1 << VMASST_TYPE_architectural_iopl,DOMAIN_vm_assist(%rcx)
>> +jz.Lft6
>> +movzwl VCPU_iopl(%rbx),%ecx # Bits 13:12 (EFLAGS.IOPL)
> Why not just MOVL?

Ah yes - this is leftover from the first iteration.

~Andrew

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 2/2] xen/x86: Introduce a new VMASSIST for architectural behaviour of iopl

2016-03-19 Thread Jan Beulich
>>> On 16.03.16 at 21:05,  wrote:
> @@ -1742,8 +1742,10 @@ static void load_segments(struct vcpu *n)
>  cs_and_mask = (unsigned short)regs->cs |
>  ((unsigned int)vcpu_info(n, evtchn_upcall_mask) << 16);
>  /* Fold upcall mask into RFLAGS.IF. */
> -eflags  = regs->_eflags & ~X86_EFLAGS_IF;
> +eflags  = regs->_eflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL);

This and ...

> @@ -1788,8 +1790,10 @@ static void load_segments(struct vcpu *n)
>  ((unsigned long)vcpu_info(n, evtchn_upcall_mask) << 32);
>  
>  /* Fold upcall mask into RFLAGS.IF. */
> -rflags  = regs->rflags & ~X86_EFLAGS_IF;
> +rflags  = regs->rflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL);

... this is not really necessary (but also not wrong) - the actual
EFLAGS.IOPL is always zero (and assumed to be so by code
further down from the respective adjustments you make). For
consistency's sake it might be better to either drop the changes
here, or also adjust the two places masking regs->eflags.

> --- a/xen/arch/x86/traps.c
> +++ b/xen/arch/x86/traps.c
> @@ -1806,7 +1806,9 @@ static int guest_io_okay(
>  #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
>  
>  if ( !vm86_mode(regs) &&
> - (v->arch.pv_vcpu.iopl >= (guest_kernel_mode(v, regs) ? 1 : 3)) )
> + (MASK_EXTR(v->arch.pv_vcpu.iopl, X86_EFLAGS_IOPL) >=
> +  (guest_kernel_mode(v, regs) ?
> +   (VM_ASSIST(v->domain, architectural_iopl) ? 0 : 1) : 3)) )
>  return 1;
>  
>  if ( v->arch.pv_vcpu.iobmp_limit > (port + bytes) )
> @@ -2367,7 +2369,9 @@ static int emulate_privileged_op(struct cpu_user_regs 
> *regs)
>  
>  case 0xfa: /* CLI */
>  case 0xfb: /* STI */
> -if ( v->arch.pv_vcpu.iopl < (guest_kernel_mode(v, regs) ? 1 : 3) )
> +if ( MASK_EXTR(v->arch.pv_vcpu.iopl, X86_EFLAGS_IOPL) <
> + (guest_kernel_mode(v, regs) ?
> +  (VM_ASSIST(v->domain, architectural_iopl) ? 0 : 1) : 3) )
>  goto fail;

The similarity of the two together with the growing complexity
suggests to make this a macro or inline function. Additionally
resulting binary code would likely be better if you compared
v->arch.pv_vcpu.iopl with MASK_INSR(,
X86_EFLAGS_IOPL), even if that means having three
MASK_INSR() (albeit those perhaps again would be hidden in
a macro, e.g.

#define IOPL(n) MASK_INSR(n, X86_EFLAGS_IOPL)

).

> --- a/xen/arch/x86/x86_64/compat/entry.S
> +++ b/xen/arch/x86/x86_64/compat/entry.S
> @@ -277,9 +277,14 @@ compat_create_bounce_frame:
>  testb %al,%al   # Bits 0-7: saved_upcall_mask
>  setz  %ch   # %ch == !saved_upcall_mask
>  movl  UREGS_eflags+8(%rsp),%eax
> -andl  $~X86_EFLAGS_IF,%eax
> +andl  $~(X86_EFLAGS_IF|X86_EFLAGS_IOPL),%eax

See earlier comment.

>  addb  %ch,%ch   # Bit 9 (EFLAGS.IF)
>  orb   %ch,%ah   # Fold EFLAGS.IF into %eax
> +movq  VCPU_domain(%rbx),%rcx# if ( VM_ASSIST(v->domain, 
> architectural_iopl) )

If you used another register, this could be pulled up quite a bit,
to hide the latency of the load before the loaded value gets used.

> +testb $1 << VMASST_TYPE_architectural_iopl,DOMAIN_vm_assist(%rcx)
> +jz.Lft6
> +movzwl VCPU_iopl(%rbx),%ecx # Bits 13:12 (EFLAGS.IOPL)

Why not just MOVL?

> +orl   %ecx,%eax # Fold EFLAGS.IOPL into %eax

Also I continue to think this would better be done with CMOVcc,
avoiding yet another conditional branch here.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 2/2] xen/x86: Introduce a new VMASSIST for architectural behaviour of iopl

2016-03-19 Thread Jan Beulich
>>> On 17.03.16 at 11:45,  wrote:
> On 17/03/16 10:25, Jan Beulich wrote:
> On 16.03.16 at 21:05,  wrote:
>>> @@ -1742,8 +1742,10 @@ static void load_segments(struct vcpu *n)
>>>  cs_and_mask = (unsigned short)regs->cs |
>>>  ((unsigned int)vcpu_info(n, evtchn_upcall_mask) << 16);
>>>  /* Fold upcall mask into RFLAGS.IF. */
>>> -eflags  = regs->_eflags & ~X86_EFLAGS_IF;
>>> +eflags  = regs->_eflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL);
>> This and ...
>>
>>> @@ -1788,8 +1790,10 @@ static void load_segments(struct vcpu *n)
>>>  ((unsigned long)vcpu_info(n, evtchn_upcall_mask) << 32);
>>>  
>>>  /* Fold upcall mask into RFLAGS.IF. */
>>> -rflags  = regs->rflags & ~X86_EFLAGS_IF;
>>> +rflags  = regs->rflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL);
>> ... this is not really necessary (but also not wrong) - the actual
>> EFLAGS.IOPL is always zero (and assumed to be so by code
>> further down from the respective adjustments you make). For
>> consistency's sake it might be better to either drop the changes
>> here, or also adjust the two places masking regs->eflags.
> 
> I will adjust the others.  I would prefer not to rely on the assumption
> that it is actually 0.

But you realize that if it wasn't zero, we'd have a security issue?
(This notwithstanding I'm fine with both directions, as indicated
before.)

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 2/2] xen/x86: Introduce a new VMASSIST for architectural behaviour of iopl

2016-03-19 Thread Andrew Cooper
On 17/03/16 11:00, Jan Beulich wrote:
 On 17.03.16 at 11:45,  wrote:
>> On 17/03/16 10:25, Jan Beulich wrote:
>> On 16.03.16 at 21:05,  wrote:
 @@ -1742,8 +1742,10 @@ static void load_segments(struct vcpu *n)
  cs_and_mask = (unsigned short)regs->cs |
  ((unsigned int)vcpu_info(n, evtchn_upcall_mask) << 16);
  /* Fold upcall mask into RFLAGS.IF. */
 -eflags  = regs->_eflags & ~X86_EFLAGS_IF;
 +eflags  = regs->_eflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL);
>>> This and ...
>>>
 @@ -1788,8 +1790,10 @@ static void load_segments(struct vcpu *n)
  ((unsigned long)vcpu_info(n, evtchn_upcall_mask) << 32);
  
  /* Fold upcall mask into RFLAGS.IF. */
 -rflags  = regs->rflags & ~X86_EFLAGS_IF;
 +rflags  = regs->rflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL);
>>> ... this is not really necessary (but also not wrong) - the actual
>>> EFLAGS.IOPL is always zero (and assumed to be so by code
>>> further down from the respective adjustments you make). For
>>> consistency's sake it might be better to either drop the changes
>>> here, or also adjust the two places masking regs->eflags.
>> I will adjust the others.  I would prefer not to rely on the assumption
>> that it is actually 0.
> But you realize that if it wasn't zero, we'd have a security issue?

Indeed.  But as this adjustment is literally free for us to use, making
Xen a little more robust in the (hopefully never) case were IOPL ends up
not being 0.

~Andrew

> (This notwithstanding I'm fine with both directions, as indicated
> before.)
>
> Jan
>


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel