The existing vIOPL interface is hard to use, and need not be. Introduce a VMASSIST with which a guest can opt-in to having vIOPL behaviour consistenly with native hardware.
Specifically: - virtual iopl updated from do_iret() hypercalls. - virtual iopl reported in bounce frames. - guest kernels assumed to be level 0 for the purpose of iopl checks. v->arch.pv_vcpu.iopl is altered to store IOPL shifted as it would exist eflags, for the benefit of the assembly code. Signed-off-by: Andrew Cooper <andrew.coop...@citrix.com> --- CC: Jan Beulich <jbeul...@suse.com> v2: * Shift vcpu.iopl to match eflags. * Factor out iopl_ok(). * Use CMOVcc in assembly code. Along with this, I have functional tests for both vIOPL interfaces in PV guests. --- xen/arch/x86/domain.c | 15 ++++++++++----- xen/arch/x86/physdev.c | 2 +- xen/arch/x86/traps.c | 15 +++++++++++++-- xen/arch/x86/x86_64/asm-offsets.c | 3 +++ xen/arch/x86/x86_64/compat/entry.S | 7 ++++++- xen/arch/x86/x86_64/compat/traps.c | 4 ++++ xen/arch/x86/x86_64/entry.S | 7 ++++++- xen/arch/x86/x86_64/traps.c | 3 +++ xen/include/asm-x86/config.h | 1 + xen/include/asm-x86/domain.h | 4 +++- xen/include/public/xen.h | 8 ++++++++ 11 files changed, 58 insertions(+), 11 deletions(-) diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index a4f6db2..8c590f3 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -994,7 +994,7 @@ int arch_set_info_guest( init_int80_direct_trap(v); /* IOPL privileges are virtualised. */ - v->arch.pv_vcpu.iopl = (v->arch.user_regs.eflags >> 12) & 3; + v->arch.pv_vcpu.iopl = v->arch.user_regs.eflags & X86_EFLAGS_IOPL; v->arch.user_regs.eflags &= ~X86_EFLAGS_IOPL; /* Ensure real hardware interrupts are enabled. */ @@ -1735,8 +1735,10 @@ static void load_segments(struct vcpu *n) cs_and_mask = (unsigned short)regs->cs | ((unsigned int)vcpu_info(n, evtchn_upcall_mask) << 16); /* Fold upcall mask into RFLAGS.IF. */ - eflags = regs->_eflags & ~X86_EFLAGS_IF; + eflags = regs->_eflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL); eflags |= !vcpu_info(n, evtchn_upcall_mask) << 9; + if ( VM_ASSIST(n->domain, architectural_iopl) ) + eflags |= n->arch.pv_vcpu.iopl; if ( !ring_1(regs) ) { @@ -1763,7 +1765,8 @@ static void load_segments(struct vcpu *n) vcpu_info(n, evtchn_upcall_mask) = 1; regs->entry_vector |= TRAP_syscall; - regs->_eflags &= 0xFFFCBEFFUL; + regs->_eflags &= ~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF| + X86_EFLAGS_NT|X86_EFLAGS_IOPL|X86_EFLAGS_TF); regs->ss = FLAT_COMPAT_KERNEL_SS; regs->_esp = (unsigned long)(esp-7); regs->cs = FLAT_COMPAT_KERNEL_CS; @@ -1781,8 +1784,10 @@ static void load_segments(struct vcpu *n) ((unsigned long)vcpu_info(n, evtchn_upcall_mask) << 32); /* Fold upcall mask into RFLAGS.IF. */ - rflags = regs->rflags & ~X86_EFLAGS_IF; + rflags = regs->rflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL); rflags |= !vcpu_info(n, evtchn_upcall_mask) << 9; + if ( VM_ASSIST(n->domain, architectural_iopl) ) + rflags |= n->arch.pv_vcpu.iopl; if ( put_user(regs->ss, rsp- 1) | put_user(regs->rsp, rsp- 2) | @@ -1806,7 +1811,7 @@ static void load_segments(struct vcpu *n) regs->entry_vector |= TRAP_syscall; regs->rflags &= ~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF| - X86_EFLAGS_NT|X86_EFLAGS_TF); + X86_EFLAGS_NT|X86_EFLAGS_IOPL|X86_EFLAGS_TF); regs->ss = FLAT_KERNEL_SS; regs->rsp = (unsigned long)(rsp-11); regs->cs = FLAT_KERNEL_CS; diff --git a/xen/arch/x86/physdev.c b/xen/arch/x86/physdev.c index 1cb9b58..5a49796 100644 --- a/xen/arch/x86/physdev.c +++ b/xen/arch/x86/physdev.c @@ -529,7 +529,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) if ( set_iopl.iopl > 3 ) break; ret = 0; - curr->arch.pv_vcpu.iopl = set_iopl.iopl; + curr->arch.pv_vcpu.iopl = MASK_INSR(set_iopl.iopl, X86_EFLAGS_IOPL); break; } diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 8125c53..7861a3c 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -1779,6 +1779,17 @@ static int read_gate_descriptor(unsigned int gate_sel, return 1; } +/* Perform IOPL check between the vcpu's shadowed IOPL, and the assumed cpl. */ +static bool_t iopl_ok(const struct vcpu *v, const struct cpu_user_regs *regs) +{ + unsigned int cpl = guest_kernel_mode(v, regs) ? + (VM_ASSIST(v->domain, architectural_iopl) ? 0 : 1) : 3; + + ASSERT((v->arch.pv_vcpu.iopl & ~X86_EFLAGS_IOPL) == 0); + + return IOPL(cpl) <= v->arch.pv_vcpu.iopl; +} + /* Has the guest requested sufficient permission for this I/O access? */ static int guest_io_okay( unsigned int port, unsigned int bytes, @@ -1788,7 +1799,7 @@ static int guest_io_okay( int user_mode = !(v->arch.flags & TF_kernel_mode); #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v) - if ( v->arch.pv_vcpu.iopl >= (guest_kernel_mode(v, regs) ? 1 : 3) ) + if ( iopl_ok(v, regs) ) return 1; if ( v->arch.pv_vcpu.iobmp_limit > (port + bytes) ) @@ -2346,7 +2357,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) case 0xfa: /* CLI */ case 0xfb: /* STI */ - if ( v->arch.pv_vcpu.iopl < (guest_kernel_mode(v, regs) ? 1 : 3) ) + if ( !iopl_ok(v, regs) ) goto fail; /* * This is just too dangerous to allow, in my opinion. Consider if the diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c index 447c650..fa37ee0 100644 --- a/xen/arch/x86/x86_64/asm-offsets.c +++ b/xen/arch/x86/x86_64/asm-offsets.c @@ -86,6 +86,7 @@ void __dummy__(void) OFFSET(VCPU_trap_ctxt, struct vcpu, arch.pv_vcpu.trap_ctxt); OFFSET(VCPU_kernel_sp, struct vcpu, arch.pv_vcpu.kernel_sp); OFFSET(VCPU_kernel_ss, struct vcpu, arch.pv_vcpu.kernel_ss); + OFFSET(VCPU_iopl, struct vcpu, arch.pv_vcpu.iopl); OFFSET(VCPU_guest_context_flags, struct vcpu, arch.vgc_flags); OFFSET(VCPU_nmi_pending, struct vcpu, nmi_pending); OFFSET(VCPU_mce_pending, struct vcpu, mce_pending); @@ -166,4 +167,6 @@ void __dummy__(void) OFFSET(MB_flags, multiboot_info_t, flags); OFFSET(MB_cmdline, multiboot_info_t, cmdline); OFFSET(MB_mem_lower, multiboot_info_t, mem_lower); + + OFFSET(DOMAIN_vm_assist, struct domain, vm_assist); } diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S index fd25e84..0ff6818 100644 --- a/xen/arch/x86/x86_64/compat/entry.S +++ b/xen/arch/x86/x86_64/compat/entry.S @@ -263,6 +263,7 @@ compat_create_bounce_frame: movl UREGS_rsp+8(%rsp),%esi .Lft4: mov UREGS_ss+8(%rsp),%fs 2: + movq VCPU_domain(%rbx),%r8 subl $3*4,%esi movq VCPU_vcpu_info(%rbx),%rax pushq COMPAT_VCPUINFO_upcall_mask(%rax) @@ -277,9 +278,13 @@ compat_create_bounce_frame: testb %al,%al # Bits 0-7: saved_upcall_mask setz %ch # %ch == !saved_upcall_mask movl UREGS_eflags+8(%rsp),%eax - andl $~X86_EFLAGS_IF,%eax + andl $~(X86_EFLAGS_IF|X86_EFLAGS_IOPL),%eax addb %ch,%ch # Bit 9 (EFLAGS.IF) orb %ch,%ah # Fold EFLAGS.IF into %eax + xorl %ecx,%ecx # if ( VM_ASSIST(v->domain, architectural_iopl) ) + testb $1 << VMASST_TYPE_architectural_iopl,DOMAIN_vm_assist(%r8) + cmovnzl VCPU_iopl(%rbx),%ecx # Bits 13:12 (EFLAGS.IOPL) + orl %ecx,%eax # Fold EFLAGS.IOPL into %eax .Lft6: movl %eax,%fs:2*4(%rsi) # EFLAGS movl UREGS_rip+8(%rsp),%eax .Lft7: movl %eax,%fs:(%rsi) # EIP diff --git a/xen/arch/x86/x86_64/compat/traps.c b/xen/arch/x86/x86_64/compat/traps.c index bbf18e4..a6afb26 100644 --- a/xen/arch/x86/x86_64/compat/traps.c +++ b/xen/arch/x86/x86_64/compat/traps.c @@ -99,6 +99,10 @@ unsigned int compat_iret(void) domain_crash(v->domain); return 0; } + + if ( VM_ASSIST(v->domain, architectural_iopl) ) + v->arch.pv_vcpu.iopl = eflags & X86_EFLAGS_IOPL; + regs->_eflags = (eflags & ~X86_EFLAGS_IOPL) | X86_EFLAGS_IF; if ( unlikely(eflags & X86_EFLAGS_VM) ) diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S index b0e7257..6866e8f 100644 --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -346,6 +346,7 @@ __UNLIKELY_END(create_bounce_frame_bad_sp) subq $40,%rsi movq UREGS_ss+8(%rsp),%rax ASM_STAC + movq VCPU_domain(%rbx),%rdi .Lft2: movq %rax,32(%rsi) # SS movq UREGS_rsp+8(%rsp),%rax .Lft3: movq %rax,24(%rsi) # RSP @@ -362,9 +363,13 @@ __UNLIKELY_END(create_bounce_frame_bad_sp) testb $0xFF,%al # Bits 0-7: saved_upcall_mask setz %ch # %ch == !saved_upcall_mask movl UREGS_eflags+8(%rsp),%eax - andl $~X86_EFLAGS_IF,%eax + andl $~(X86_EFLAGS_IF|X86_EFLAGS_IOPL),%eax addb %ch,%ch # Bit 9 (EFLAGS.IF) orb %ch,%ah # Fold EFLAGS.IF into %eax + xorl %ecx,%ecx # if ( VM_ASSIST(v->domain, architectural_iopl) ) + testb $1 << VMASST_TYPE_architectural_iopl,DOMAIN_vm_assist(%rdi) + cmovnzl VCPU_iopl(%rbx),%ecx # Bits 13:12 (EFLAGS.IOPL) + orl %ecx,%eax # Fold EFLAGS.IOPL into %eax .Lft5: movq %rax,16(%rsi) # RFLAGS movq UREGS_rip+8(%rsp),%rax .Lft6: movq %rax,(%rsi) # RIP diff --git a/xen/arch/x86/x86_64/traps.c b/xen/arch/x86/x86_64/traps.c index 26e2dd7..19f58a1 100644 --- a/xen/arch/x86/x86_64/traps.c +++ b/xen/arch/x86/x86_64/traps.c @@ -310,6 +310,9 @@ unsigned long do_iret(void) toggle_guest_mode(v); } + if ( VM_ASSIST(v->domain, architectural_iopl) ) + v->arch.pv_vcpu.iopl = iret_saved.rflags & X86_EFLAGS_IOPL; + regs->rip = iret_saved.rip; regs->cs = iret_saved.cs | 3; /* force guest privilege */ regs->rflags = ((iret_saved.rflags & ~(X86_EFLAGS_IOPL|X86_EFLAGS_VM)) diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h index f6f5a10..c10129d 100644 --- a/xen/include/asm-x86/config.h +++ b/xen/include/asm-x86/config.h @@ -331,6 +331,7 @@ extern unsigned long xen_phys_start; (1UL << VMASST_TYPE_4gb_segments_notify) | \ (1UL << VMASST_TYPE_writable_pagetables) | \ (1UL << VMASST_TYPE_pae_extended_cr3) | \ + (1UL << VMASST_TYPE_architectural_iopl) | \ (1UL << VMASST_TYPE_m2p_strict)) #define VM_ASSIST_VALID NATIVE_VM_ASSIST_VALID #define COMPAT_VM_ASSIST_VALID (NATIVE_VM_ASSIST_VALID & \ diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h index de60def..5f2f27f 100644 --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -470,7 +470,9 @@ struct pv_vcpu /* I/O-port access bitmap. */ XEN_GUEST_HANDLE(uint8) iobmp; /* Guest kernel vaddr of the bitmap. */ unsigned int iobmp_limit; /* Number of ports represented in the bitmap. */ - unsigned int iopl; /* Current IOPL for this VCPU. */ +#define IOPL(val) MASK_INSR(val, X86_EFLAGS_IOPL) + unsigned int iopl; /* Current IOPL for this VCPU, shifted left by + * 12 to match the eflags register. */ /* Current LDT details. */ unsigned long shadow_ldt_mapcnt; diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h index 435d789..97a5ae5 100644 --- a/xen/include/public/xen.h +++ b/xen/include/public/xen.h @@ -503,6 +503,14 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t); #define VMASST_TYPE_pae_extended_cr3 3 /* + * x86 guests: Sane behaviour for virtual iopl + * - virtual iopl updated from do_iret() hypercalls. + * - virtual iopl reported in bounce frames. + * - guest kernels assumed to be level 0 for the purpose of iopl checks. + */ +#define VMASST_TYPE_architectural_iopl 4 + +/* * x86/64 guests: strictly hide M2P from user mode. * This allows the guest to control respective hypervisor behavior: * - when not set, L4 tables get created with the respective slot blank, -- 2.1.4 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel