Re: [Xen-devel] [PATCH 4/4] x86: switch default mapping attributes to non-executable

2015-05-20 Thread Jan Beulich
 On 19.05.15 at 20:53, andrew.coop...@citrix.com wrote:
 On 18/05/15 13:47, Jan Beulich wrote:
 --- a/xen/arch/x86/x86_64/mm.c
 +++ b/xen/arch/x86/x86_64/mm.c
 @@ -895,6 +895,33 @@ void __init subarch_init_memory(void)
  share_xen_page_with_privileged_guests(page, XENSHARE_readonly);
  }
  }
 +
 +/* Mark low 16Mb of direct map NX if hardware supports it. */
 +if ( !cpu_has_nx )
 +return;
 +
 +v = DIRECTMAP_VIRT_START + (1UL  20);
 
 What about 0-1MB ?

I'd like to avoid fiddling with that range, as the trampoline living there
will want to continue to be RWX anyway.

 +l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[l3_table_offset(v)];
 +ASSERT(l3e_get_flags(l3e)  _PAGE_PRESENT);
 +do {
 +l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
 +ASSERT(l2e_get_flags(l2e)  _PAGE_PRESENT);
 +if ( l2e_get_flags(l2e)  _PAGE_PSE )
 +{
 +l2e_add_flags(l2e, _PAGE_NX_BIT);
 +l3e_to_l2e(l3e)[l2_table_offset(v)] = l2e;
 +v += 1  L2_PAGETABLE_SHIFT;
 +}
 +else
 +{
 +l1_pgentry_t l1e = l2e_to_l1e(l2e)[l1_table_offset(v)];
 +
 +ASSERT(l1e_get_flags(l1e)  _PAGE_PRESENT);
 +l1e_add_flags(l1e, _PAGE_NX_BIT);
 +l2e_to_l1e(l2e)[l1_table_offset(v)] = l1e;
 +v += 1  L1_PAGETABLE_SHIFT;
 +}
 +} while ( v  DIRECTMAP_VIRT_START + (16UL  20) );
 
 How about just setting l3e.nx and leaving all lower tables alone?

Apart from the trampoline aspect (see above) I don't think it's a
good idea to restrict permissions in non-leaf entries - that just
calls for more intrusive patches if later an individual page needs
them lifted.

 --- a/xen/include/asm-x86/x86_64/page.h
 +++ b/xen/include/asm-x86/x86_64/page.h
 @@ -147,9 +147,20 @@ typedef l4_pgentry_t root_pgentry_t;
   */
  #define _PAGE_GUEST_KERNEL (1U12)
  
 -#define PAGE_HYPERVISOR (__PAGE_HYPERVISOR | _PAGE_GLOBAL)
 +#define PAGE_HYPERVISOR_RO  (__PAGE_HYPERVISOR_RO  | _PAGE_GLOBAL)
 +#define PAGE_HYPERVISOR_RW  (__PAGE_HYPERVISOR_RW  | _PAGE_GLOBAL)
  #define PAGE_HYPERVISOR_RX  (__PAGE_HYPERVISOR_RX  | _PAGE_GLOBAL)
 -#define PAGE_HYPERVISOR_NOCACHE (__PAGE_HYPERVISOR_NOCACHE | _PAGE_GLOBAL)
 +#define PAGE_HYPERVISOR_RWX (__PAGE_HYPERVISOR | _PAGE_GLOBAL)
 +
 +#ifdef __ASSEMBLY__
 +/* Dependency on NX being available can't be expressed. */
 +# define PAGE_HYPERVISOR PAGE_HYPERVISOR_RWX
 +# define PAGE_HYPERVISOR_NOCACHE (__PAGE_HYPERVISOR_NOCACHE | _PAGE_GLOBAL)
 +#else
 +# define PAGE_HYPERVISOR PAGE_HYPERVISOR_RW
 +# define PAGE_HYPERVISOR_NOCACHE (__PAGE_HYPERVISOR_NOCACHE | \
 +  _PAGE_GLOBAL | _PAGE_NX)
 +#endif
 
 This patch is clearly an improvement, so my comments can possibly be
 deferred to subsequent patches.
 
 After boot, I can't think of a legitimate reason for Xen to have a RWX
 mapping.  As such, leaving __PAGE_HYPERVISOR around constitutes a risk
 that RWX mappings will be used.  It would be nice if we could remove all
 constants (which aren't BOOT_*) which could lead to accidental use of
 RWX mappings on NX-enabled hardware.

But you realize that __PAGE_HYPERVISOR is particularly used for
intermediate page table entry permissions?

 I think we also want a warning on boot if we find NX unavailable.  The
 only 64bit capable CPUs which do not support NX are now 11 years old,
 and I don't expect many of those to be running Xen these days.  However,
 given that disable NX is a common BIOS option for compatibility
 reasons, we should press people to alter their settings if possible.

Sure, easily done.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 4/4] x86: switch default mapping attributes to non-executable

2015-05-19 Thread Andrew Cooper
On 18/05/15 13:47, Jan Beulich wrote:
 Only a very limited subset of mappings need to be done as executable
 ones; in particular the direct mapping should not be executable to
 limit the damage attackers can cause by exploiting security relevant
 bugs.

 The EFI change at once includes an adjustment to set NX only when
 supported by the hardware.

 Signed-off-by: Jan Beulich jbeul...@suse.com

 --- a/xen/arch/x86/domain.c
 +++ b/xen/arch/x86/domain.c
 @@ -293,7 +293,7 @@ struct vcpu_guest_context *alloc_vcpu_gu
  free_vcpu_guest_context(NULL);
  return NULL;
  }
 -__set_fixmap(idx - i, page_to_mfn(pg), __PAGE_HYPERVISOR);
 +__set_fixmap(idx - i, page_to_mfn(pg), __PAGE_HYPERVISOR_RW);
  per_cpu(vgc_pages[i], cpu) = pg;
  }
  return (void *)fix_to_virt(idx);
 --- a/xen/arch/x86/domain_page.c
 +++ b/xen/arch/x86/domain_page.c
 @@ -160,7 +160,7 @@ void *map_domain_page(unsigned long mfn)
  
  spin_unlock(dcache-lock);
  
 -l1e_write(MAPCACHE_L1ENT(idx), l1e_from_pfn(mfn, __PAGE_HYPERVISOR));
 +l1e_write(MAPCACHE_L1ENT(idx), l1e_from_pfn(mfn, __PAGE_HYPERVISOR_RW));
  
   out:
  local_irq_restore(flags);
 --- a/xen/arch/x86/mm.c
 +++ b/xen/arch/x86/mm.c
 @@ -4416,7 +4416,7 @@ long set_gdt(struct vcpu *v, 
  for ( i = 0; i  nr_pages; i++ )
  {
  v-arch.pv_vcpu.gdt_frames[i] = frames[i];
 -l1e_write(pl1e[i], l1e_from_pfn(frames[i], __PAGE_HYPERVISOR));
 +l1e_write(pl1e[i], l1e_from_pfn(frames[i], __PAGE_HYPERVISOR_RW));
  }
  
  xfree(pfns);
 @@ -6004,7 +6004,7 @@ int create_perdomain_mapping(struct doma
  if ( !IS_NIL(ppg) )
  *ppg++ = pg;
  l1tab[l1_table_offset(va)] =
 -l1e_from_page(pg, __PAGE_HYPERVISOR | _PAGE_AVAIL0);
 +l1e_from_page(pg, __PAGE_HYPERVISOR_RW | _PAGE_AVAIL0);
  l2e_add_flags(*pl2e, _PAGE_AVAIL0);
  }
  else
 @@ -6133,7 +6133,7 @@ void memguard_init(void)
  (unsigned long)__va(start),
  start  PAGE_SHIFT,
  (__pa(_end) + PAGE_SIZE - 1 - start)  PAGE_SHIFT,
 -__PAGE_HYPERVISOR|MAP_SMALL_PAGES);
 +__PAGE_HYPERVISOR_RW|MAP_SMALL_PAGES);
  BUG_ON(start != xen_phys_start);
  map_pages_to_xen(
  XEN_VIRT_START,
 @@ -6146,7 +6146,7 @@ static void __memguard_change_range(void
  {
  unsigned long _p = (unsigned long)p;
  unsigned long _l = (unsigned long)l;
 -unsigned int flags = __PAGE_HYPERVISOR | MAP_SMALL_PAGES;
 +unsigned int flags = __PAGE_HYPERVISOR_RW | MAP_SMALL_PAGES;
  
  /* Ensure we are dealing with a page-aligned whole number of pages. */
  ASSERT((_p~PAGE_MASK) == 0);
 --- a/xen/arch/x86/setup.c
 +++ b/xen/arch/x86/setup.c
 @@ -900,7 +900,7 @@ void __init noreturn __start_xen(unsigne
  /* The only data mappings to be relocated are in the Xen area. */
  pl2e = __va(__pa(l2_xenmap));
  *pl2e++ = l2e_from_pfn(xen_phys_start  PAGE_SHIFT,
 -   PAGE_HYPERVISOR | _PAGE_PSE);
 +   PAGE_HYPERVISOR_RWX | _PAGE_PSE);
  for ( i = 1; i  L2_PAGETABLE_ENTRIES; i++, pl2e++ )
  {
  if ( !(l2e_get_flags(*pl2e)  _PAGE_PRESENT) )
 @@ -1087,7 +1087,7 @@ void __init noreturn __start_xen(unsigne
  /* This range must not be passed to the boot allocator and
   * must also not be mapped with _PAGE_GLOBAL. */
  map_pages_to_xen((unsigned long)__va(map_e), PFN_DOWN(map_e),
 - PFN_DOWN(e - map_e), __PAGE_HYPERVISOR);
 + PFN_DOWN(e - map_e), __PAGE_HYPERVISOR_RW);
  }
  if ( s  map_s )
  {
 --- a/xen/arch/x86/x86_64/mm.c
 +++ b/xen/arch/x86/x86_64/mm.c
 @@ -895,6 +895,33 @@ void __init subarch_init_memory(void)
  share_xen_page_with_privileged_guests(page, XENSHARE_readonly);
  }
  }
 +
 +/* Mark low 16Mb of direct map NX if hardware supports it. */
 +if ( !cpu_has_nx )
 +return;
 +
 +v = DIRECTMAP_VIRT_START + (1UL  20);

What about 0-1MB ?

 +l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[l3_table_offset(v)];
 +ASSERT(l3e_get_flags(l3e)  _PAGE_PRESENT);
 +do {
 +l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
 +ASSERT(l2e_get_flags(l2e)  _PAGE_PRESENT);
 +if ( l2e_get_flags(l2e)  _PAGE_PSE )
 +{
 +l2e_add_flags(l2e, _PAGE_NX_BIT);
 +l3e_to_l2e(l3e)[l2_table_offset(v)] = l2e;
 +v += 1  L2_PAGETABLE_SHIFT;
 +}
 +else
 +{
 +l1_pgentry_t l1e = l2e_to_l1e(l2e)[l1_table_offset(v)];
 +
 +ASSERT(l1e_get_flags(l1e)  _PAGE_PRESENT);
 +l1e_add_flags(l1e, _PAGE_NX_BIT);
 +l2e_to_l1e(l2e)[l1_table_offset(v)] = l1e;
 +v += 1  

[Xen-devel] [PATCH 4/4] x86: switch default mapping attributes to non-executable

2015-05-18 Thread Jan Beulich
Only a very limited subset of mappings need to be done as executable
ones; in particular the direct mapping should not be executable to
limit the damage attackers can cause by exploiting security relevant
bugs.

The EFI change at once includes an adjustment to set NX only when
supported by the hardware.

Signed-off-by: Jan Beulich jbeul...@suse.com

--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -293,7 +293,7 @@ struct vcpu_guest_context *alloc_vcpu_gu
 free_vcpu_guest_context(NULL);
 return NULL;
 }
-__set_fixmap(idx - i, page_to_mfn(pg), __PAGE_HYPERVISOR);
+__set_fixmap(idx - i, page_to_mfn(pg), __PAGE_HYPERVISOR_RW);
 per_cpu(vgc_pages[i], cpu) = pg;
 }
 return (void *)fix_to_virt(idx);
--- a/xen/arch/x86/domain_page.c
+++ b/xen/arch/x86/domain_page.c
@@ -160,7 +160,7 @@ void *map_domain_page(unsigned long mfn)
 
 spin_unlock(dcache-lock);
 
-l1e_write(MAPCACHE_L1ENT(idx), l1e_from_pfn(mfn, __PAGE_HYPERVISOR));
+l1e_write(MAPCACHE_L1ENT(idx), l1e_from_pfn(mfn, __PAGE_HYPERVISOR_RW));
 
  out:
 local_irq_restore(flags);
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -4416,7 +4416,7 @@ long set_gdt(struct vcpu *v, 
 for ( i = 0; i  nr_pages; i++ )
 {
 v-arch.pv_vcpu.gdt_frames[i] = frames[i];
-l1e_write(pl1e[i], l1e_from_pfn(frames[i], __PAGE_HYPERVISOR));
+l1e_write(pl1e[i], l1e_from_pfn(frames[i], __PAGE_HYPERVISOR_RW));
 }
 
 xfree(pfns);
@@ -6004,7 +6004,7 @@ int create_perdomain_mapping(struct doma
 if ( !IS_NIL(ppg) )
 *ppg++ = pg;
 l1tab[l1_table_offset(va)] =
-l1e_from_page(pg, __PAGE_HYPERVISOR | _PAGE_AVAIL0);
+l1e_from_page(pg, __PAGE_HYPERVISOR_RW | _PAGE_AVAIL0);
 l2e_add_flags(*pl2e, _PAGE_AVAIL0);
 }
 else
@@ -6133,7 +6133,7 @@ void memguard_init(void)
 (unsigned long)__va(start),
 start  PAGE_SHIFT,
 (__pa(_end) + PAGE_SIZE - 1 - start)  PAGE_SHIFT,
-__PAGE_HYPERVISOR|MAP_SMALL_PAGES);
+__PAGE_HYPERVISOR_RW|MAP_SMALL_PAGES);
 BUG_ON(start != xen_phys_start);
 map_pages_to_xen(
 XEN_VIRT_START,
@@ -6146,7 +6146,7 @@ static void __memguard_change_range(void
 {
 unsigned long _p = (unsigned long)p;
 unsigned long _l = (unsigned long)l;
-unsigned int flags = __PAGE_HYPERVISOR | MAP_SMALL_PAGES;
+unsigned int flags = __PAGE_HYPERVISOR_RW | MAP_SMALL_PAGES;
 
 /* Ensure we are dealing with a page-aligned whole number of pages. */
 ASSERT((_p~PAGE_MASK) == 0);
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -900,7 +900,7 @@ void __init noreturn __start_xen(unsigne
 /* The only data mappings to be relocated are in the Xen area. */
 pl2e = __va(__pa(l2_xenmap));
 *pl2e++ = l2e_from_pfn(xen_phys_start  PAGE_SHIFT,
-   PAGE_HYPERVISOR | _PAGE_PSE);
+   PAGE_HYPERVISOR_RWX | _PAGE_PSE);
 for ( i = 1; i  L2_PAGETABLE_ENTRIES; i++, pl2e++ )
 {
 if ( !(l2e_get_flags(*pl2e)  _PAGE_PRESENT) )
@@ -1087,7 +1087,7 @@ void __init noreturn __start_xen(unsigne
 /* This range must not be passed to the boot allocator and
  * must also not be mapped with _PAGE_GLOBAL. */
 map_pages_to_xen((unsigned long)__va(map_e), PFN_DOWN(map_e),
- PFN_DOWN(e - map_e), __PAGE_HYPERVISOR);
+ PFN_DOWN(e - map_e), __PAGE_HYPERVISOR_RW);
 }
 if ( s  map_s )
 {
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -895,6 +895,33 @@ void __init subarch_init_memory(void)
 share_xen_page_with_privileged_guests(page, XENSHARE_readonly);
 }
 }
+
+/* Mark low 16Mb of direct map NX if hardware supports it. */
+if ( !cpu_has_nx )
+return;
+
+v = DIRECTMAP_VIRT_START + (1UL  20);
+l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[l3_table_offset(v)];
+ASSERT(l3e_get_flags(l3e)  _PAGE_PRESENT);
+do {
+l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
+ASSERT(l2e_get_flags(l2e)  _PAGE_PRESENT);
+if ( l2e_get_flags(l2e)  _PAGE_PSE )
+{
+l2e_add_flags(l2e, _PAGE_NX_BIT);
+l3e_to_l2e(l3e)[l2_table_offset(v)] = l2e;
+v += 1  L2_PAGETABLE_SHIFT;
+}
+else
+{
+l1_pgentry_t l1e = l2e_to_l1e(l2e)[l1_table_offset(v)];
+
+ASSERT(l1e_get_flags(l1e)  _PAGE_PRESENT);
+l1e_add_flags(l1e, _PAGE_NX_BIT);
+l2e_to_l1e(l2e)[l1_table_offset(v)] = l1e;
+v += 1  L1_PAGETABLE_SHIFT;
+}
+} while ( v  DIRECTMAP_VIRT_START + (16UL  20) );
 }
 
 long subarch_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
@@ -1359,7 +1386,7 @@ int