Re: [Xen-devel] [PATCH 4/4] x86: switch default mapping attributes to non-executable
On 19.05.15 at 20:53, andrew.coop...@citrix.com wrote: On 18/05/15 13:47, Jan Beulich wrote: --- a/xen/arch/x86/x86_64/mm.c +++ b/xen/arch/x86/x86_64/mm.c @@ -895,6 +895,33 @@ void __init subarch_init_memory(void) share_xen_page_with_privileged_guests(page, XENSHARE_readonly); } } + +/* Mark low 16Mb of direct map NX if hardware supports it. */ +if ( !cpu_has_nx ) +return; + +v = DIRECTMAP_VIRT_START + (1UL 20); What about 0-1MB ? I'd like to avoid fiddling with that range, as the trampoline living there will want to continue to be RWX anyway. +l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[l3_table_offset(v)]; +ASSERT(l3e_get_flags(l3e) _PAGE_PRESENT); +do { +l2e = l3e_to_l2e(l3e)[l2_table_offset(v)]; +ASSERT(l2e_get_flags(l2e) _PAGE_PRESENT); +if ( l2e_get_flags(l2e) _PAGE_PSE ) +{ +l2e_add_flags(l2e, _PAGE_NX_BIT); +l3e_to_l2e(l3e)[l2_table_offset(v)] = l2e; +v += 1 L2_PAGETABLE_SHIFT; +} +else +{ +l1_pgentry_t l1e = l2e_to_l1e(l2e)[l1_table_offset(v)]; + +ASSERT(l1e_get_flags(l1e) _PAGE_PRESENT); +l1e_add_flags(l1e, _PAGE_NX_BIT); +l2e_to_l1e(l2e)[l1_table_offset(v)] = l1e; +v += 1 L1_PAGETABLE_SHIFT; +} +} while ( v DIRECTMAP_VIRT_START + (16UL 20) ); How about just setting l3e.nx and leaving all lower tables alone? Apart from the trampoline aspect (see above) I don't think it's a good idea to restrict permissions in non-leaf entries - that just calls for more intrusive patches if later an individual page needs them lifted. --- a/xen/include/asm-x86/x86_64/page.h +++ b/xen/include/asm-x86/x86_64/page.h @@ -147,9 +147,20 @@ typedef l4_pgentry_t root_pgentry_t; */ #define _PAGE_GUEST_KERNEL (1U12) -#define PAGE_HYPERVISOR (__PAGE_HYPERVISOR | _PAGE_GLOBAL) +#define PAGE_HYPERVISOR_RO (__PAGE_HYPERVISOR_RO | _PAGE_GLOBAL) +#define PAGE_HYPERVISOR_RW (__PAGE_HYPERVISOR_RW | _PAGE_GLOBAL) #define PAGE_HYPERVISOR_RX (__PAGE_HYPERVISOR_RX | _PAGE_GLOBAL) -#define PAGE_HYPERVISOR_NOCACHE (__PAGE_HYPERVISOR_NOCACHE | _PAGE_GLOBAL) +#define PAGE_HYPERVISOR_RWX (__PAGE_HYPERVISOR | _PAGE_GLOBAL) + +#ifdef __ASSEMBLY__ +/* Dependency on NX being available can't be expressed. */ +# define PAGE_HYPERVISOR PAGE_HYPERVISOR_RWX +# define PAGE_HYPERVISOR_NOCACHE (__PAGE_HYPERVISOR_NOCACHE | _PAGE_GLOBAL) +#else +# define PAGE_HYPERVISOR PAGE_HYPERVISOR_RW +# define PAGE_HYPERVISOR_NOCACHE (__PAGE_HYPERVISOR_NOCACHE | \ + _PAGE_GLOBAL | _PAGE_NX) +#endif This patch is clearly an improvement, so my comments can possibly be deferred to subsequent patches. After boot, I can't think of a legitimate reason for Xen to have a RWX mapping. As such, leaving __PAGE_HYPERVISOR around constitutes a risk that RWX mappings will be used. It would be nice if we could remove all constants (which aren't BOOT_*) which could lead to accidental use of RWX mappings on NX-enabled hardware. But you realize that __PAGE_HYPERVISOR is particularly used for intermediate page table entry permissions? I think we also want a warning on boot if we find NX unavailable. The only 64bit capable CPUs which do not support NX are now 11 years old, and I don't expect many of those to be running Xen these days. However, given that disable NX is a common BIOS option for compatibility reasons, we should press people to alter their settings if possible. Sure, easily done. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 4/4] x86: switch default mapping attributes to non-executable
On 18/05/15 13:47, Jan Beulich wrote: Only a very limited subset of mappings need to be done as executable ones; in particular the direct mapping should not be executable to limit the damage attackers can cause by exploiting security relevant bugs. The EFI change at once includes an adjustment to set NX only when supported by the hardware. Signed-off-by: Jan Beulich jbeul...@suse.com --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -293,7 +293,7 @@ struct vcpu_guest_context *alloc_vcpu_gu free_vcpu_guest_context(NULL); return NULL; } -__set_fixmap(idx - i, page_to_mfn(pg), __PAGE_HYPERVISOR); +__set_fixmap(idx - i, page_to_mfn(pg), __PAGE_HYPERVISOR_RW); per_cpu(vgc_pages[i], cpu) = pg; } return (void *)fix_to_virt(idx); --- a/xen/arch/x86/domain_page.c +++ b/xen/arch/x86/domain_page.c @@ -160,7 +160,7 @@ void *map_domain_page(unsigned long mfn) spin_unlock(dcache-lock); -l1e_write(MAPCACHE_L1ENT(idx), l1e_from_pfn(mfn, __PAGE_HYPERVISOR)); +l1e_write(MAPCACHE_L1ENT(idx), l1e_from_pfn(mfn, __PAGE_HYPERVISOR_RW)); out: local_irq_restore(flags); --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -4416,7 +4416,7 @@ long set_gdt(struct vcpu *v, for ( i = 0; i nr_pages; i++ ) { v-arch.pv_vcpu.gdt_frames[i] = frames[i]; -l1e_write(pl1e[i], l1e_from_pfn(frames[i], __PAGE_HYPERVISOR)); +l1e_write(pl1e[i], l1e_from_pfn(frames[i], __PAGE_HYPERVISOR_RW)); } xfree(pfns); @@ -6004,7 +6004,7 @@ int create_perdomain_mapping(struct doma if ( !IS_NIL(ppg) ) *ppg++ = pg; l1tab[l1_table_offset(va)] = -l1e_from_page(pg, __PAGE_HYPERVISOR | _PAGE_AVAIL0); +l1e_from_page(pg, __PAGE_HYPERVISOR_RW | _PAGE_AVAIL0); l2e_add_flags(*pl2e, _PAGE_AVAIL0); } else @@ -6133,7 +6133,7 @@ void memguard_init(void) (unsigned long)__va(start), start PAGE_SHIFT, (__pa(_end) + PAGE_SIZE - 1 - start) PAGE_SHIFT, -__PAGE_HYPERVISOR|MAP_SMALL_PAGES); +__PAGE_HYPERVISOR_RW|MAP_SMALL_PAGES); BUG_ON(start != xen_phys_start); map_pages_to_xen( XEN_VIRT_START, @@ -6146,7 +6146,7 @@ static void __memguard_change_range(void { unsigned long _p = (unsigned long)p; unsigned long _l = (unsigned long)l; -unsigned int flags = __PAGE_HYPERVISOR | MAP_SMALL_PAGES; +unsigned int flags = __PAGE_HYPERVISOR_RW | MAP_SMALL_PAGES; /* Ensure we are dealing with a page-aligned whole number of pages. */ ASSERT((_p~PAGE_MASK) == 0); --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -900,7 +900,7 @@ void __init noreturn __start_xen(unsigne /* The only data mappings to be relocated are in the Xen area. */ pl2e = __va(__pa(l2_xenmap)); *pl2e++ = l2e_from_pfn(xen_phys_start PAGE_SHIFT, - PAGE_HYPERVISOR | _PAGE_PSE); + PAGE_HYPERVISOR_RWX | _PAGE_PSE); for ( i = 1; i L2_PAGETABLE_ENTRIES; i++, pl2e++ ) { if ( !(l2e_get_flags(*pl2e) _PAGE_PRESENT) ) @@ -1087,7 +1087,7 @@ void __init noreturn __start_xen(unsigne /* This range must not be passed to the boot allocator and * must also not be mapped with _PAGE_GLOBAL. */ map_pages_to_xen((unsigned long)__va(map_e), PFN_DOWN(map_e), - PFN_DOWN(e - map_e), __PAGE_HYPERVISOR); + PFN_DOWN(e - map_e), __PAGE_HYPERVISOR_RW); } if ( s map_s ) { --- a/xen/arch/x86/x86_64/mm.c +++ b/xen/arch/x86/x86_64/mm.c @@ -895,6 +895,33 @@ void __init subarch_init_memory(void) share_xen_page_with_privileged_guests(page, XENSHARE_readonly); } } + +/* Mark low 16Mb of direct map NX if hardware supports it. */ +if ( !cpu_has_nx ) +return; + +v = DIRECTMAP_VIRT_START + (1UL 20); What about 0-1MB ? +l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[l3_table_offset(v)]; +ASSERT(l3e_get_flags(l3e) _PAGE_PRESENT); +do { +l2e = l3e_to_l2e(l3e)[l2_table_offset(v)]; +ASSERT(l2e_get_flags(l2e) _PAGE_PRESENT); +if ( l2e_get_flags(l2e) _PAGE_PSE ) +{ +l2e_add_flags(l2e, _PAGE_NX_BIT); +l3e_to_l2e(l3e)[l2_table_offset(v)] = l2e; +v += 1 L2_PAGETABLE_SHIFT; +} +else +{ +l1_pgentry_t l1e = l2e_to_l1e(l2e)[l1_table_offset(v)]; + +ASSERT(l1e_get_flags(l1e) _PAGE_PRESENT); +l1e_add_flags(l1e, _PAGE_NX_BIT); +l2e_to_l1e(l2e)[l1_table_offset(v)] = l1e; +v += 1
[Xen-devel] [PATCH 4/4] x86: switch default mapping attributes to non-executable
Only a very limited subset of mappings need to be done as executable ones; in particular the direct mapping should not be executable to limit the damage attackers can cause by exploiting security relevant bugs. The EFI change at once includes an adjustment to set NX only when supported by the hardware. Signed-off-by: Jan Beulich jbeul...@suse.com --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -293,7 +293,7 @@ struct vcpu_guest_context *alloc_vcpu_gu free_vcpu_guest_context(NULL); return NULL; } -__set_fixmap(idx - i, page_to_mfn(pg), __PAGE_HYPERVISOR); +__set_fixmap(idx - i, page_to_mfn(pg), __PAGE_HYPERVISOR_RW); per_cpu(vgc_pages[i], cpu) = pg; } return (void *)fix_to_virt(idx); --- a/xen/arch/x86/domain_page.c +++ b/xen/arch/x86/domain_page.c @@ -160,7 +160,7 @@ void *map_domain_page(unsigned long mfn) spin_unlock(dcache-lock); -l1e_write(MAPCACHE_L1ENT(idx), l1e_from_pfn(mfn, __PAGE_HYPERVISOR)); +l1e_write(MAPCACHE_L1ENT(idx), l1e_from_pfn(mfn, __PAGE_HYPERVISOR_RW)); out: local_irq_restore(flags); --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -4416,7 +4416,7 @@ long set_gdt(struct vcpu *v, for ( i = 0; i nr_pages; i++ ) { v-arch.pv_vcpu.gdt_frames[i] = frames[i]; -l1e_write(pl1e[i], l1e_from_pfn(frames[i], __PAGE_HYPERVISOR)); +l1e_write(pl1e[i], l1e_from_pfn(frames[i], __PAGE_HYPERVISOR_RW)); } xfree(pfns); @@ -6004,7 +6004,7 @@ int create_perdomain_mapping(struct doma if ( !IS_NIL(ppg) ) *ppg++ = pg; l1tab[l1_table_offset(va)] = -l1e_from_page(pg, __PAGE_HYPERVISOR | _PAGE_AVAIL0); +l1e_from_page(pg, __PAGE_HYPERVISOR_RW | _PAGE_AVAIL0); l2e_add_flags(*pl2e, _PAGE_AVAIL0); } else @@ -6133,7 +6133,7 @@ void memguard_init(void) (unsigned long)__va(start), start PAGE_SHIFT, (__pa(_end) + PAGE_SIZE - 1 - start) PAGE_SHIFT, -__PAGE_HYPERVISOR|MAP_SMALL_PAGES); +__PAGE_HYPERVISOR_RW|MAP_SMALL_PAGES); BUG_ON(start != xen_phys_start); map_pages_to_xen( XEN_VIRT_START, @@ -6146,7 +6146,7 @@ static void __memguard_change_range(void { unsigned long _p = (unsigned long)p; unsigned long _l = (unsigned long)l; -unsigned int flags = __PAGE_HYPERVISOR | MAP_SMALL_PAGES; +unsigned int flags = __PAGE_HYPERVISOR_RW | MAP_SMALL_PAGES; /* Ensure we are dealing with a page-aligned whole number of pages. */ ASSERT((_p~PAGE_MASK) == 0); --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -900,7 +900,7 @@ void __init noreturn __start_xen(unsigne /* The only data mappings to be relocated are in the Xen area. */ pl2e = __va(__pa(l2_xenmap)); *pl2e++ = l2e_from_pfn(xen_phys_start PAGE_SHIFT, - PAGE_HYPERVISOR | _PAGE_PSE); + PAGE_HYPERVISOR_RWX | _PAGE_PSE); for ( i = 1; i L2_PAGETABLE_ENTRIES; i++, pl2e++ ) { if ( !(l2e_get_flags(*pl2e) _PAGE_PRESENT) ) @@ -1087,7 +1087,7 @@ void __init noreturn __start_xen(unsigne /* This range must not be passed to the boot allocator and * must also not be mapped with _PAGE_GLOBAL. */ map_pages_to_xen((unsigned long)__va(map_e), PFN_DOWN(map_e), - PFN_DOWN(e - map_e), __PAGE_HYPERVISOR); + PFN_DOWN(e - map_e), __PAGE_HYPERVISOR_RW); } if ( s map_s ) { --- a/xen/arch/x86/x86_64/mm.c +++ b/xen/arch/x86/x86_64/mm.c @@ -895,6 +895,33 @@ void __init subarch_init_memory(void) share_xen_page_with_privileged_guests(page, XENSHARE_readonly); } } + +/* Mark low 16Mb of direct map NX if hardware supports it. */ +if ( !cpu_has_nx ) +return; + +v = DIRECTMAP_VIRT_START + (1UL 20); +l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[l3_table_offset(v)]; +ASSERT(l3e_get_flags(l3e) _PAGE_PRESENT); +do { +l2e = l3e_to_l2e(l3e)[l2_table_offset(v)]; +ASSERT(l2e_get_flags(l2e) _PAGE_PRESENT); +if ( l2e_get_flags(l2e) _PAGE_PSE ) +{ +l2e_add_flags(l2e, _PAGE_NX_BIT); +l3e_to_l2e(l3e)[l2_table_offset(v)] = l2e; +v += 1 L2_PAGETABLE_SHIFT; +} +else +{ +l1_pgentry_t l1e = l2e_to_l1e(l2e)[l1_table_offset(v)]; + +ASSERT(l1e_get_flags(l1e) _PAGE_PRESENT); +l1e_add_flags(l1e, _PAGE_NX_BIT); +l2e_to_l1e(l2e)[l1_table_offset(v)] = l1e; +v += 1 L1_PAGETABLE_SHIFT; +} +} while ( v DIRECTMAP_VIRT_START + (16UL 20) ); } long subarch_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) @@ -1359,7 +1386,7 @@ int