Module Name:    src
Committed By:   maxv
Date:           Wed Nov 15 18:02:37 UTC 2017

Modified Files:
        src/sys/arch/amd64/conf: Makefile.amd64
        src/sys/arch/amd64/stand/prekern: elf.c mm.c prekern.h
        src/sys/arch/x86/x86: pmap.c
        src/sys/lib/libsa: loadfile_elf32.c

Log Message:
Support large pages on KASLR kernels, in a way that does not reduce
randomness, but on the contrary that increases it.

The size of the kernel sub-blocks is changed to be 1MB. This produces a
kernel with sections that are always < 2MB in size, that can fit a large
page.

Each section is put in a 2MB physical chunk. In this chunk, there is a
padding of approximately 1MB. The prekern uses a random offset aligned to
sh_addralign, to shift the section in physical memory.

For example, physical memory layout created by the bootloader for .text.4
and .rodata.0:
 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+
 |+---------------+                  |+---------------+                  |
 ||    .text.4    |       PAD        ||   .rodata.0   |       PAD        |
 |+---------------+                  |+---------------+                  |
 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+
 PA                                  PA+2MB                         PA+4MB

Then, physical memory layout, after having been shifted by the prekern:
 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+
 | P +---------------+               |          +---------------+        |
 | A |    .text.4    |      PAD      |   PAD    |   .rodata.0   |   PAD  |
 | D +---------------+               |          +---------------+        |
 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+
 PA                                  PA+2MB                         PA+4MB

The kernel maps these 2MB physical chunks with 2MB large pages. Therefore,
randomness is enforced at both the virtual and physical levels, and the
resulting entropy is higher than that of our current implementaion until
now.

The padding around the section is filled by the prekern. Not to consume
too much memory, the sections that are smaller than PAGE_SIZE are mapped
with normal pages - because there is no point in optimizing them. In these
normal pages, the same shift is applied.

This change has two additional advantages: (a) the cache attacks based on
the TLB are mostly mitigated, because even if you are able to determine
that a given page-aligned range is mapped as executable you don't know
where exactly within that range the section actually begins, and (b) given
that we are slightly randomizing the physical layout we are making some
rare physical attacks more difficult to conduct.

NOTE: after this change you need to update GENERIC_KASLR / prekern /
bootloader.


To generate a diff of this commit:
cvs rdiff -u -r1.63 -r1.64 src/sys/arch/amd64/conf/Makefile.amd64
cvs rdiff -u -r1.13 -r1.14 src/sys/arch/amd64/stand/prekern/elf.c \
    src/sys/arch/amd64/stand/prekern/mm.c
cvs rdiff -u -r1.12 -r1.13 src/sys/arch/amd64/stand/prekern/prekern.h
cvs rdiff -u -r1.264 -r1.265 src/sys/arch/x86/x86/pmap.c
cvs rdiff -u -r1.50 -r1.51 src/sys/lib/libsa/loadfile_elf32.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/amd64/conf/Makefile.amd64
diff -u src/sys/arch/amd64/conf/Makefile.amd64:1.63 src/sys/arch/amd64/conf/Makefile.amd64:1.64
--- src/sys/arch/amd64/conf/Makefile.amd64:1.63	Tue Nov 14 10:15:40 2017
+++ src/sys/arch/amd64/conf/Makefile.amd64	Wed Nov 15 18:02:37 2017
@@ -1,4 +1,4 @@
-#	$NetBSD: Makefile.amd64,v 1.63 2017/11/14 10:15:40 maxv Exp $
+#	$NetBSD: Makefile.amd64,v 1.64 2017/11/15 18:02:37 maxv Exp $
 
 # Makefile for NetBSD
 #
@@ -62,7 +62,7 @@ KERN_AS=	library
 ##
 TEXTADDR?=	0xffffffff80200000
 .if defined(KASLR)
-EXTRA_LINKFLAGS=	--split-by-file=0x200000 -r -d
+EXTRA_LINKFLAGS=	--split-by-file=0x100000 -r -d
 KERNLDSCRIPT?= ${AMD64}/conf/kern.ldscript.kaslr
 .else
 EXTRA_LINKFLAGS=	-z max-page-size=0x200000

Index: src/sys/arch/amd64/stand/prekern/elf.c
diff -u src/sys/arch/amd64/stand/prekern/elf.c:1.13 src/sys/arch/amd64/stand/prekern/elf.c:1.14
--- src/sys/arch/amd64/stand/prekern/elf.c:1.13	Tue Nov 14 07:06:34 2017
+++ src/sys/arch/amd64/stand/prekern/elf.c	Wed Nov 15 18:02:36 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: elf.c,v 1.13 2017/11/14 07:06:34 maxv Exp $	*/
+/*	$NetBSD: elf.c,v 1.14 2017/11/15 18:02:36 maxv Exp $	*/
 
 /*
  * Copyright (c) 2017 The NetBSD Foundation, Inc. All rights reserved.
@@ -267,7 +267,7 @@ elf_map_sections(void)
 	int segtype;
 	vaddr_t secva;
 	paddr_t secpa;
-	size_t i, secsz;
+	size_t i, secsz, secalign;
 
 	for (i = 0; i < eif.ehdr->e_shnum; i++) {
 		shdr = &eif.shdr[i];
@@ -289,10 +289,11 @@ elf_map_sections(void)
 		}
 		secpa = basepa + shdr->sh_offset;
 		secsz = shdr->sh_size;
+		secalign = shdr->sh_addralign;
 		ASSERT(shdr->sh_offset != 0);
 		ASSERT(secpa % PAGE_SIZE == 0);
 
-		secva = mm_map_segment(segtype, secpa, secsz);
+		secva = mm_map_segment(segtype, secpa, secsz, secalign);
 
 		/* We want (headva + sh_offset) to be the VA of the section. */
 		ASSERT(secva > headva);
Index: src/sys/arch/amd64/stand/prekern/mm.c
diff -u src/sys/arch/amd64/stand/prekern/mm.c:1.13 src/sys/arch/amd64/stand/prekern/mm.c:1.14
--- src/sys/arch/amd64/stand/prekern/mm.c:1.13	Tue Nov 14 07:06:34 2017
+++ src/sys/arch/amd64/stand/prekern/mm.c	Wed Nov 15 18:02:36 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: mm.c,v 1.13 2017/11/14 07:06:34 maxv Exp $	*/
+/*	$NetBSD: mm.c,v 1.14 2017/11/15 18:02:36 maxv Exp $	*/
 
 /*
  * Copyright (c) 2017 The NetBSD Foundation, Inc. All rights reserved.
@@ -34,6 +34,8 @@
 #define PAD_RODATA	0x00
 #define PAD_DATA	0x00
 
+#define ELFROUND	64
+
 static const pt_entry_t protection_codes[3] = {
 	[MM_PROT_READ] = PG_RO | PG_NX,
 	[MM_PROT_WRITE] = PG_RW | PG_NX,
@@ -236,7 +238,7 @@ mm_map_head(void)
 }
 
 static vaddr_t
-mm_randva_kregion(size_t size)
+mm_randva_kregion(size_t size, size_t align)
 {
 	vaddr_t sva, eva;
 	vaddr_t randva;
@@ -247,7 +249,7 @@ mm_randva_kregion(size_t size)
 	while (1) {
 		rnd = mm_rand_num64();
 		randva = rounddown(KASLR_WINDOW_BASE +
-		    rnd % (KASLR_WINDOW_SIZE - size), PAGE_SIZE);
+		    rnd % (KASLR_WINDOW_SIZE - size), align);
 
 		/* Detect collisions */
 		ok = true;
@@ -313,22 +315,55 @@ bootspace_addseg(int type, vaddr_t va, p
 	fatal("bootspace_addseg: segments full");
 }
 
+static size_t
+mm_shift_segment(vaddr_t va, size_t pagesz, size_t elfsz, size_t elfalign)
+{
+	size_t shiftsize, offset;
+	uint64_t rnd;
+
+	if (elfalign == 0) {
+		elfalign = ELFROUND;
+	}
+
+	shiftsize = roundup(elfsz, pagesz) - roundup(elfsz, elfalign);
+	if (shiftsize == 0) {
+		return 0;
+	}
+
+	rnd = mm_rand_num64();
+	offset = roundup(rnd % shiftsize, elfalign);
+	ASSERT((va + offset) % elfalign == 0);
+
+	memmove((void *)(va + offset), (void *)va, elfsz);
+
+	return offset;
+}
+
 vaddr_t
-mm_map_segment(int segtype, paddr_t pa, size_t elfsz)
+mm_map_segment(int segtype, paddr_t pa, size_t elfsz, size_t elfalign)
 {
-	size_t i, npages, size;
+	size_t i, npages, size, pagesz, offset;
 	vaddr_t randva;
 	char pad;
 
-	size = roundup(elfsz, PAGE_SIZE);
-	randva = mm_randva_kregion(size);
-	npages = size / PAGE_SIZE;
+	if (elfsz < PAGE_SIZE) {
+		pagesz = NBPD_L1;
+	} else {
+		pagesz = NBPD_L2;
+	}
+
+	size = roundup(elfsz, pagesz);
+	randva = mm_randva_kregion(size, pagesz);
 
+	npages = size / PAGE_SIZE;
 	for (i = 0; i < npages; i++) {
 		mm_enter_pa(pa + i * PAGE_SIZE,
 		    randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
 	}
 
+	offset = mm_shift_segment(randva, pagesz, elfsz, elfalign);
+	ASSERT(offset + elfsz <= size);
+
 	if (segtype == BTSEG_TEXT) {
 		pad = PAD_TEXT;
 	} else if (segtype == BTSEG_RODATA) {
@@ -336,11 +371,12 @@ mm_map_segment(int segtype, paddr_t pa, 
 	} else {
 		pad = PAD_DATA;
 	}
-	memset((void *)(randva + elfsz), pad, size - elfsz);
+	memset((void *)randva, pad, offset);
+	memset((void *)(randva + offset + elfsz), pad, size - elfsz - offset);
 
 	bootspace_addseg(segtype, randva, pa, size);
 
-	return randva;
+	return (randva + offset);
 }
 
 static void
@@ -357,7 +393,7 @@ mm_map_boot(void)
 
 	/* Create the page tree */
 	size = (NKL2_KIMG_ENTRIES + 1) * NBPD_L2;
-	randva = mm_randva_kregion(size);
+	randva = mm_randva_kregion(size, PAGE_SIZE);
 
 	/* Enter the area and build the ELF info */
 	bootpa = bootspace_getend();

Index: src/sys/arch/amd64/stand/prekern/prekern.h
diff -u src/sys/arch/amd64/stand/prekern/prekern.h:1.12 src/sys/arch/amd64/stand/prekern/prekern.h:1.13
--- src/sys/arch/amd64/stand/prekern/prekern.h:1.12	Tue Nov 14 07:06:34 2017
+++ src/sys/arch/amd64/stand/prekern/prekern.h	Wed Nov 15 18:02:36 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: prekern.h,v 1.12 2017/11/14 07:06:34 maxv Exp $	*/
+/*	$NetBSD: prekern.h,v 1.13 2017/11/15 18:02:36 maxv Exp $	*/
 
 /*
  * Copyright (c) 2017 The NetBSD Foundation, Inc. All rights reserved.
@@ -111,7 +111,7 @@ void jump_kernel(vaddr_t);
 void mm_init(paddr_t);
 paddr_t mm_vatopa(vaddr_t);
 void mm_bootspace_mprotect(void);
-vaddr_t mm_map_segment(int, paddr_t, size_t);
+vaddr_t mm_map_segment(int, paddr_t, size_t, size_t);
 void mm_map_kernel(void);
 
 /* prekern.c */

Index: src/sys/arch/x86/x86/pmap.c
diff -u src/sys/arch/x86/x86/pmap.c:1.264 src/sys/arch/x86/x86/pmap.c:1.265
--- src/sys/arch/x86/x86/pmap.c:1.264	Sat Nov 11 12:51:05 2017
+++ src/sys/arch/x86/x86/pmap.c	Wed Nov 15 18:02:37 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.c,v 1.264 2017/11/11 12:51:05 maxv Exp $	*/
+/*	$NetBSD: pmap.c,v 1.265 2017/11/15 18:02:37 maxv Exp $	*/
 
 /*
  * Copyright (c) 2008, 2010, 2016, 2017 The NetBSD Foundation, Inc.
@@ -170,13 +170,12 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.264 2017/11/11 12:51:05 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.265 2017/11/15 18:02:37 maxv Exp $");
 
 #include "opt_user_ldt.h"
 #include "opt_lockdebug.h"
 #include "opt_multiprocessor.h"
 #include "opt_xen.h"
-#include "opt_kaslr.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -1603,11 +1602,6 @@ pmap_remap_largepages(void)
 	paddr_t pa;
 	size_t i;
 
-#ifdef KASLR
-	/* XXX no large pages yet, soon */
-	return;
-#endif
-
 	/* Remap the kernel text using large pages. */
 	for (i = 0; i < BTSPACE_NSEGS; i++) {
 		if (bootspace.segs[i].type != BTSEG_TEXT) {

Index: src/sys/lib/libsa/loadfile_elf32.c
diff -u src/sys/lib/libsa/loadfile_elf32.c:1.50 src/sys/lib/libsa/loadfile_elf32.c:1.51
--- src/sys/lib/libsa/loadfile_elf32.c:1.50	Mon Nov 13 21:32:21 2017
+++ src/sys/lib/libsa/loadfile_elf32.c	Wed Nov 15 18:02:37 2017
@@ -1,4 +1,4 @@
-/* $NetBSD: loadfile_elf32.c,v 1.50 2017/11/13 21:32:21 maxv Exp $ */
+/* $NetBSD: loadfile_elf32.c,v 1.51 2017/11/15 18:02:37 maxv Exp $ */
 
 /*
  * Copyright (c) 1997, 2008, 2017 The NetBSD Foundation, Inc.
@@ -265,7 +265,8 @@ externalize_shdr(Elf_Byte bo, Elf_Shdr *
 
 /* -------------------------------------------------------------------------- */
 
-#define KERNALIGN 4096	/* XXX should depend on marks[] */
+#define KERNALIGN_SMALL (1 << 12)	/* XXX should depend on marks[] */
+#define KERNALIGN_LARGE (1 << 21)	/* XXX should depend on marks[] */
 
 /*
  * Read some data from a file, and put it in the bootloader memory (local).
@@ -343,7 +344,7 @@ ELFNAMEEND(loadfile_dynamic)(int fd, Elf
 	Elf_Shdr *shdr;
 	Elf_Addr shpp, addr;
 	int i, j, loaded;
-	size_t size, shdrsz;
+	size_t size, shdrsz, align;
 	Elf_Addr maxp, elfp = 0;
 	int ret;
 
@@ -385,14 +386,18 @@ ELFNAMEEND(loadfile_dynamic)(int fd, Elf
 	/*
 	 * Load the KERNEL SECTIONS.
 	 */
-	maxp = roundup(maxp, KERNALIGN);
+	maxp = roundup(maxp, KERNALIGN_SMALL);
 	for (i = 0; i < elf->e_shnum; i++) {
-		addr = maxp;
-		size = (size_t)shdr[i].sh_size;
-
 		if (!(shdr[i].sh_flags & SHF_ALLOC)) {
 			continue;
 		}
+		size = (size_t)shdr[i].sh_size;
+		if (size <= KERNALIGN_SMALL) {
+			align = KERNALIGN_SMALL;
+		} else {
+			align = KERNALIGN_LARGE;
+		}
+		addr = roundup(maxp, align);
 
 		loaded = 0;
 		switch (shdr[i].sh_type) {
@@ -415,10 +420,11 @@ ELFNAMEEND(loadfile_dynamic)(int fd, Elf
 		}
 
 		if (loaded) {
-			shdr[i].sh_offset = maxp - elfp;
-			maxp = roundup(maxp + size, KERNALIGN);
+			shdr[i].sh_offset = addr - elfp;
+			maxp = roundup(addr + size, align);
 		}
 	}
+	maxp = roundup(maxp, KERNALIGN_LARGE);
 
 	/*
 	 * Load the SYM+REL SECTIONS.
@@ -456,7 +462,7 @@ ELFNAMEEND(loadfile_dynamic)(int fd, Elf
 			break;
 		}
 	}
-	maxp = roundup(maxp, KERNALIGN);
+	maxp = roundup(maxp, KERNALIGN_SMALL);
 
 	/*
 	 * Finally, load the SECTION HEADERS.

Reply via email to