Instead of flushing the TLB from global pages when switching address spaces with XPTI being active just disable global pages via %cr4 completely when a domain subject to XPTI is active. This avoids the need for extra TLB flushes as loading %cr3 will remove all TLB entries.
In order to avoid states with cr3/cr4 having inconsistent values (e.g. global pages being activated while cr3 already specifies a XPTI address space) move loading of the new cr4 value to write_ptbase() (actually to switch_cr3_cr4() called by write_ptbase()). This requires to use switch_cr3_cr4() instead of write_ptbase() when building dom0 in order to avoid setting cr4 with cr4.smap set. Signed-off-by: Juergen Gross <jgr...@suse.com> Reviewed-by: Jan Beulich <jbeul...@suse.com> --- V7: - use switch_cr3_cr4() in dom0_build.c V6: - don't call read_cr4() multiple times in switch_cr3_cr4() (Andrew Cooper) V4: - don't use mmu_cr4_features for setting new cr4 value (Jan Beulich) - use simpler scheme for setting X86_CR4_PGE in pv_guest_cr4_to_real_cr4() (Jan Beulich) V3: - move cr4 loading for all domains from *_ctxt_switch_to() to write_cr3_cr4() called by write_ptbase() (Jan Beulich) - rebase --- xen/arch/x86/domain.c | 5 ----- xen/arch/x86/flushtlb.c | 17 ++++++++++++----- xen/arch/x86/mm.c | 14 +++++++++++--- xen/arch/x86/pv/dom0_build.c | 6 +++--- xen/arch/x86/x86_64/entry.S | 10 ---------- xen/common/efi/runtime.c | 4 ++-- xen/include/asm-x86/domain.h | 3 ++- xen/include/asm-x86/flushtlb.h | 2 +- 8 files changed, 31 insertions(+), 30 deletions(-) diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 3d9c19d055..9b001a03ec 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -1523,17 +1523,12 @@ void paravirt_ctxt_switch_from(struct vcpu *v) void paravirt_ctxt_switch_to(struct vcpu *v) { root_pgentry_t *root_pgt = this_cpu(root_pgt); - unsigned long cr4; if ( root_pgt ) root_pgt[root_table_offset(PERDOMAIN_VIRT_START)] = l4e_from_page(v->domain->arch.perdomain_l3_pg, __PAGE_HYPERVISOR_RW); - cr4 = pv_guest_cr4_to_real_cr4(v); - if ( unlikely(cr4 != read_cr4()) ) - write_cr4(cr4); - if ( unlikely(v->arch.debugreg[7] & DR7_ACTIVE_MASK) ) activate_debugregs(v); diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c index 717f3bd19b..d48b900100 100644 --- a/xen/arch/x86/flushtlb.c +++ b/xen/arch/x86/flushtlb.c @@ -90,20 +90,27 @@ static void do_tlb_flush(void) post_flush(t); } -void switch_cr3(unsigned long cr3) +void switch_cr3_cr4(unsigned long cr3, unsigned long cr4) { - unsigned long flags, cr4; + unsigned long flags, old_cr4; u32 t; /* This non-reentrant function is sometimes called in interrupt context. */ local_irq_save(flags); t = pre_flush(); - cr4 = read_cr4(); - write_cr4(cr4 & ~X86_CR4_PGE); + old_cr4 = read_cr4(); + if ( old_cr4 & X86_CR4_PGE ) + { + old_cr4 = cr4 & ~X86_CR4_PGE; + write_cr4(old_cr4); + } + write_cr3(cr3); - write_cr4(cr4); + + if ( old_cr4 != cr4 ) + write_cr4(cr4); post_flush(t); diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index 9c36614099..73a38e8715 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -503,20 +503,28 @@ void make_cr3(struct vcpu *v, mfn_t mfn) void write_ptbase(struct vcpu *v) { struct cpu_info *cpu_info = get_cpu_info(); + unsigned long new_cr4; + + new_cr4 = (is_pv_vcpu(v) && !is_idle_vcpu(v)) + ? pv_guest_cr4_to_real_cr4(v) + : ((read_cr4() & ~X86_CR4_TSD) | X86_CR4_PGE); if ( is_pv_vcpu(v) && v->domain->arch.pv_domain.xpti ) { cpu_info->root_pgt_changed = true; cpu_info->pv_cr3 = __pa(this_cpu(root_pgt)); - switch_cr3(v->arch.cr3); + switch_cr3_cr4(v->arch.cr3, new_cr4); } else { - /* Make sure to clear xen_cr3 before pv_cr3; switch_cr3() serializes. */ + /* Make sure to clear xen_cr3 before pv_cr3. */ cpu_info->xen_cr3 = 0; - switch_cr3(v->arch.cr3); + /* switch_cr3_cr4() serializes. */ + switch_cr3_cr4(v->arch.cr3, new_cr4); cpu_info->pv_cr3 = 0; } + + ASSERT(is_pv_vcpu(v) || read_cr4() == mmu_cr4_features); } /* diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c index d148395919..4465a059a8 100644 --- a/xen/arch/x86/pv/dom0_build.c +++ b/xen/arch/x86/pv/dom0_build.c @@ -717,7 +717,7 @@ int __init dom0_construct_pv(struct domain *d, update_cr3(v); /* We run on dom0's page tables for the final part of the build process. */ - write_ptbase(v); + switch_cr3_cr4(v->arch.cr3, read_cr4()); mapcache_override_current(v); /* Copy the OS image and free temporary buffer. */ @@ -738,7 +738,7 @@ int __init dom0_construct_pv(struct domain *d, (parms.virt_hypercall >= v_end) ) { mapcache_override_current(NULL); - write_ptbase(current); + switch_cr3_cr4(current->arch.cr3, read_cr4()); printk("Invalid HYPERCALL_PAGE field in ELF notes.\n"); rc = -1; goto out; @@ -871,7 +871,7 @@ int __init dom0_construct_pv(struct domain *d, /* Return to idle domain's page tables. */ mapcache_override_current(NULL); - write_ptbase(current); + switch_cr3_cr4(current->arch.cr3, read_cr4()); update_domain_wallclock_time(d); diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S index dd42223b20..5f0758d64f 100644 --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -153,13 +153,8 @@ restore_all_guest: ROOT_PAGETABLE_LAST_XEN_SLOT - 1) * 8, %rdi rep movsq .Lrag_copy_done: - mov STACK_CPUINFO_FIELD(cr4)(%rdx), %rdi mov %r9, STACK_CPUINFO_FIELD(xen_cr3)(%rdx) - mov %rdi, %rsi - and $~X86_CR4_PGE, %rdi - mov %rdi, %cr4 mov %rax, %cr3 - mov %rsi, %cr4 .Lrag_keep_cr3: /* Restore stashed SPEC_CTRL value. */ @@ -215,12 +210,7 @@ restore_all_xen: * so "g" will have to do. */ UNLIKELY_START(g, exit_cr3) - mov %cr4, %rdi - mov %rdi, %rsi - and $~X86_CR4_PGE, %rdi - mov %rdi, %cr4 mov %rax, %cr3 - mov %rsi, %cr4 UNLIKELY_END(exit_cr3) /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ diff --git a/xen/common/efi/runtime.c b/xen/common/efi/runtime.c index 4e5ddfef4f..070a70d784 100644 --- a/xen/common/efi/runtime.c +++ b/xen/common/efi/runtime.c @@ -111,7 +111,7 @@ struct efi_rs_state efi_rs_enter(void) lgdt(&gdt_desc); } - switch_cr3(virt_to_maddr(efi_l4_pgtable)); + switch_cr3_cr4(virt_to_maddr(efi_l4_pgtable), read_cr4()); return state; } @@ -120,7 +120,7 @@ void efi_rs_leave(struct efi_rs_state *state) { if ( !state->cr3 ) return; - switch_cr3(state->cr3); + switch_cr3_cr4(state->cr3, read_cr4()); if ( is_pv_vcpu(current) && !is_idle_vcpu(current) ) { struct desc_ptr gdt_desc = { diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h index ed4199931a..b7894dc8c8 100644 --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -618,9 +618,10 @@ unsigned long pv_guest_cr4_fixup(const struct vcpu *, unsigned long guest_cr4); #define pv_guest_cr4_to_real_cr4(v) \ (((v)->arch.pv_vcpu.ctrlreg[4] \ | (mmu_cr4_features \ - & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP | \ + & (X86_CR4_PSE | X86_CR4_SMEP | \ X86_CR4_SMAP | X86_CR4_OSXSAVE | \ X86_CR4_FSGSBASE)) \ + | ((v)->domain->arch.pv_domain.xpti ? 0 : X86_CR4_PGE) \ | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)) \ & ~X86_CR4_DE) #define real_cr4_to_pv_guest_cr4(c) \ diff --git a/xen/include/asm-x86/flushtlb.h b/xen/include/asm-x86/flushtlb.h index c150f82ca2..6356e85101 100644 --- a/xen/include/asm-x86/flushtlb.h +++ b/xen/include/asm-x86/flushtlb.h @@ -84,7 +84,7 @@ static inline unsigned long read_cr3(void) } /* Write pagetable base and implicitly tick the tlbflush clock. */ -void switch_cr3(unsigned long cr3); +void switch_cr3_cr4(unsigned long cr3, unsigned long cr4); /* flush_* flag fields: */ /* -- 2.13.6 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel