Module Name: src Committed By: maxv Date: Wed Nov 15 18:02:37 UTC 2017
Modified Files: src/sys/arch/amd64/conf: Makefile.amd64 src/sys/arch/amd64/stand/prekern: elf.c mm.c prekern.h src/sys/arch/x86/x86: pmap.c src/sys/lib/libsa: loadfile_elf32.c Log Message: Support large pages on KASLR kernels, in a way that does not reduce randomness, but on the contrary that increases it. The size of the kernel sub-blocks is changed to be 1MB. This produces a kernel with sections that are always < 2MB in size, that can fit a large page. Each section is put in a 2MB physical chunk. In this chunk, there is a padding of approximately 1MB. The prekern uses a random offset aligned to sh_addralign, to shift the section in physical memory. For example, physical memory layout created by the bootloader for .text.4 and .rodata.0: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+ |+---------------+ |+---------------+ | || .text.4 | PAD || .rodata.0 | PAD | |+---------------+ |+---------------+ | +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+ PA PA+2MB PA+4MB Then, physical memory layout, after having been shifted by the prekern: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+ | P +---------------+ | +---------------+ | | A | .text.4 | PAD | PAD | .rodata.0 | PAD | | D +---------------+ | +---------------+ | +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+ PA PA+2MB PA+4MB The kernel maps these 2MB physical chunks with 2MB large pages. Therefore, randomness is enforced at both the virtual and physical levels, and the resulting entropy is higher than that of our current implementaion until now. The padding around the section is filled by the prekern. Not to consume too much memory, the sections that are smaller than PAGE_SIZE are mapped with normal pages - because there is no point in optimizing them. In these normal pages, the same shift is applied. This change has two additional advantages: (a) the cache attacks based on the TLB are mostly mitigated, because even if you are able to determine that a given page-aligned range is mapped as executable you don't know where exactly within that range the section actually begins, and (b) given that we are slightly randomizing the physical layout we are making some rare physical attacks more difficult to conduct. NOTE: after this change you need to update GENERIC_KASLR / prekern / bootloader. To generate a diff of this commit: cvs rdiff -u -r1.63 -r1.64 src/sys/arch/amd64/conf/Makefile.amd64 cvs rdiff -u -r1.13 -r1.14 src/sys/arch/amd64/stand/prekern/elf.c \ src/sys/arch/amd64/stand/prekern/mm.c cvs rdiff -u -r1.12 -r1.13 src/sys/arch/amd64/stand/prekern/prekern.h cvs rdiff -u -r1.264 -r1.265 src/sys/arch/x86/x86/pmap.c cvs rdiff -u -r1.50 -r1.51 src/sys/lib/libsa/loadfile_elf32.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/amd64/conf/Makefile.amd64 diff -u src/sys/arch/amd64/conf/Makefile.amd64:1.63 src/sys/arch/amd64/conf/Makefile.amd64:1.64 --- src/sys/arch/amd64/conf/Makefile.amd64:1.63 Tue Nov 14 10:15:40 2017 +++ src/sys/arch/amd64/conf/Makefile.amd64 Wed Nov 15 18:02:37 2017 @@ -1,4 +1,4 @@ -# $NetBSD: Makefile.amd64,v 1.63 2017/11/14 10:15:40 maxv Exp $ +# $NetBSD: Makefile.amd64,v 1.64 2017/11/15 18:02:37 maxv Exp $ # Makefile for NetBSD # @@ -62,7 +62,7 @@ KERN_AS= library ## TEXTADDR?= 0xffffffff80200000 .if defined(KASLR) -EXTRA_LINKFLAGS= --split-by-file=0x200000 -r -d +EXTRA_LINKFLAGS= --split-by-file=0x100000 -r -d KERNLDSCRIPT?= ${AMD64}/conf/kern.ldscript.kaslr .else EXTRA_LINKFLAGS= -z max-page-size=0x200000 Index: src/sys/arch/amd64/stand/prekern/elf.c diff -u src/sys/arch/amd64/stand/prekern/elf.c:1.13 src/sys/arch/amd64/stand/prekern/elf.c:1.14 --- src/sys/arch/amd64/stand/prekern/elf.c:1.13 Tue Nov 14 07:06:34 2017 +++ src/sys/arch/amd64/stand/prekern/elf.c Wed Nov 15 18:02:36 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: elf.c,v 1.13 2017/11/14 07:06:34 maxv Exp $ */ +/* $NetBSD: elf.c,v 1.14 2017/11/15 18:02:36 maxv Exp $ */ /* * Copyright (c) 2017 The NetBSD Foundation, Inc. All rights reserved. @@ -267,7 +267,7 @@ elf_map_sections(void) int segtype; vaddr_t secva; paddr_t secpa; - size_t i, secsz; + size_t i, secsz, secalign; for (i = 0; i < eif.ehdr->e_shnum; i++) { shdr = &eif.shdr[i]; @@ -289,10 +289,11 @@ elf_map_sections(void) } secpa = basepa + shdr->sh_offset; secsz = shdr->sh_size; + secalign = shdr->sh_addralign; ASSERT(shdr->sh_offset != 0); ASSERT(secpa % PAGE_SIZE == 0); - secva = mm_map_segment(segtype, secpa, secsz); + secva = mm_map_segment(segtype, secpa, secsz, secalign); /* We want (headva + sh_offset) to be the VA of the section. */ ASSERT(secva > headva); Index: src/sys/arch/amd64/stand/prekern/mm.c diff -u src/sys/arch/amd64/stand/prekern/mm.c:1.13 src/sys/arch/amd64/stand/prekern/mm.c:1.14 --- src/sys/arch/amd64/stand/prekern/mm.c:1.13 Tue Nov 14 07:06:34 2017 +++ src/sys/arch/amd64/stand/prekern/mm.c Wed Nov 15 18:02:36 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: mm.c,v 1.13 2017/11/14 07:06:34 maxv Exp $ */ +/* $NetBSD: mm.c,v 1.14 2017/11/15 18:02:36 maxv Exp $ */ /* * Copyright (c) 2017 The NetBSD Foundation, Inc. All rights reserved. @@ -34,6 +34,8 @@ #define PAD_RODATA 0x00 #define PAD_DATA 0x00 +#define ELFROUND 64 + static const pt_entry_t protection_codes[3] = { [MM_PROT_READ] = PG_RO | PG_NX, [MM_PROT_WRITE] = PG_RW | PG_NX, @@ -236,7 +238,7 @@ mm_map_head(void) } static vaddr_t -mm_randva_kregion(size_t size) +mm_randva_kregion(size_t size, size_t align) { vaddr_t sva, eva; vaddr_t randva; @@ -247,7 +249,7 @@ mm_randva_kregion(size_t size) while (1) { rnd = mm_rand_num64(); randva = rounddown(KASLR_WINDOW_BASE + - rnd % (KASLR_WINDOW_SIZE - size), PAGE_SIZE); + rnd % (KASLR_WINDOW_SIZE - size), align); /* Detect collisions */ ok = true; @@ -313,22 +315,55 @@ bootspace_addseg(int type, vaddr_t va, p fatal("bootspace_addseg: segments full"); } +static size_t +mm_shift_segment(vaddr_t va, size_t pagesz, size_t elfsz, size_t elfalign) +{ + size_t shiftsize, offset; + uint64_t rnd; + + if (elfalign == 0) { + elfalign = ELFROUND; + } + + shiftsize = roundup(elfsz, pagesz) - roundup(elfsz, elfalign); + if (shiftsize == 0) { + return 0; + } + + rnd = mm_rand_num64(); + offset = roundup(rnd % shiftsize, elfalign); + ASSERT((va + offset) % elfalign == 0); + + memmove((void *)(va + offset), (void *)va, elfsz); + + return offset; +} + vaddr_t -mm_map_segment(int segtype, paddr_t pa, size_t elfsz) +mm_map_segment(int segtype, paddr_t pa, size_t elfsz, size_t elfalign) { - size_t i, npages, size; + size_t i, npages, size, pagesz, offset; vaddr_t randva; char pad; - size = roundup(elfsz, PAGE_SIZE); - randva = mm_randva_kregion(size); - npages = size / PAGE_SIZE; + if (elfsz < PAGE_SIZE) { + pagesz = NBPD_L1; + } else { + pagesz = NBPD_L2; + } + + size = roundup(elfsz, pagesz); + randva = mm_randva_kregion(size, pagesz); + npages = size / PAGE_SIZE; for (i = 0; i < npages; i++) { mm_enter_pa(pa + i * PAGE_SIZE, randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE); } + offset = mm_shift_segment(randva, pagesz, elfsz, elfalign); + ASSERT(offset + elfsz <= size); + if (segtype == BTSEG_TEXT) { pad = PAD_TEXT; } else if (segtype == BTSEG_RODATA) { @@ -336,11 +371,12 @@ mm_map_segment(int segtype, paddr_t pa, } else { pad = PAD_DATA; } - memset((void *)(randva + elfsz), pad, size - elfsz); + memset((void *)randva, pad, offset); + memset((void *)(randva + offset + elfsz), pad, size - elfsz - offset); bootspace_addseg(segtype, randva, pa, size); - return randva; + return (randva + offset); } static void @@ -357,7 +393,7 @@ mm_map_boot(void) /* Create the page tree */ size = (NKL2_KIMG_ENTRIES + 1) * NBPD_L2; - randva = mm_randva_kregion(size); + randva = mm_randva_kregion(size, PAGE_SIZE); /* Enter the area and build the ELF info */ bootpa = bootspace_getend(); Index: src/sys/arch/amd64/stand/prekern/prekern.h diff -u src/sys/arch/amd64/stand/prekern/prekern.h:1.12 src/sys/arch/amd64/stand/prekern/prekern.h:1.13 --- src/sys/arch/amd64/stand/prekern/prekern.h:1.12 Tue Nov 14 07:06:34 2017 +++ src/sys/arch/amd64/stand/prekern/prekern.h Wed Nov 15 18:02:36 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: prekern.h,v 1.12 2017/11/14 07:06:34 maxv Exp $ */ +/* $NetBSD: prekern.h,v 1.13 2017/11/15 18:02:36 maxv Exp $ */ /* * Copyright (c) 2017 The NetBSD Foundation, Inc. All rights reserved. @@ -111,7 +111,7 @@ void jump_kernel(vaddr_t); void mm_init(paddr_t); paddr_t mm_vatopa(vaddr_t); void mm_bootspace_mprotect(void); -vaddr_t mm_map_segment(int, paddr_t, size_t); +vaddr_t mm_map_segment(int, paddr_t, size_t, size_t); void mm_map_kernel(void); /* prekern.c */ Index: src/sys/arch/x86/x86/pmap.c diff -u src/sys/arch/x86/x86/pmap.c:1.264 src/sys/arch/x86/x86/pmap.c:1.265 --- src/sys/arch/x86/x86/pmap.c:1.264 Sat Nov 11 12:51:05 2017 +++ src/sys/arch/x86/x86/pmap.c Wed Nov 15 18:02:37 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.c,v 1.264 2017/11/11 12:51:05 maxv Exp $ */ +/* $NetBSD: pmap.c,v 1.265 2017/11/15 18:02:37 maxv Exp $ */ /* * Copyright (c) 2008, 2010, 2016, 2017 The NetBSD Foundation, Inc. @@ -170,13 +170,12 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.264 2017/11/11 12:51:05 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.265 2017/11/15 18:02:37 maxv Exp $"); #include "opt_user_ldt.h" #include "opt_lockdebug.h" #include "opt_multiprocessor.h" #include "opt_xen.h" -#include "opt_kaslr.h" #include <sys/param.h> #include <sys/systm.h> @@ -1603,11 +1602,6 @@ pmap_remap_largepages(void) paddr_t pa; size_t i; -#ifdef KASLR - /* XXX no large pages yet, soon */ - return; -#endif - /* Remap the kernel text using large pages. */ for (i = 0; i < BTSPACE_NSEGS; i++) { if (bootspace.segs[i].type != BTSEG_TEXT) { Index: src/sys/lib/libsa/loadfile_elf32.c diff -u src/sys/lib/libsa/loadfile_elf32.c:1.50 src/sys/lib/libsa/loadfile_elf32.c:1.51 --- src/sys/lib/libsa/loadfile_elf32.c:1.50 Mon Nov 13 21:32:21 2017 +++ src/sys/lib/libsa/loadfile_elf32.c Wed Nov 15 18:02:37 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: loadfile_elf32.c,v 1.50 2017/11/13 21:32:21 maxv Exp $ */ +/* $NetBSD: loadfile_elf32.c,v 1.51 2017/11/15 18:02:37 maxv Exp $ */ /* * Copyright (c) 1997, 2008, 2017 The NetBSD Foundation, Inc. @@ -265,7 +265,8 @@ externalize_shdr(Elf_Byte bo, Elf_Shdr * /* -------------------------------------------------------------------------- */ -#define KERNALIGN 4096 /* XXX should depend on marks[] */ +#define KERNALIGN_SMALL (1 << 12) /* XXX should depend on marks[] */ +#define KERNALIGN_LARGE (1 << 21) /* XXX should depend on marks[] */ /* * Read some data from a file, and put it in the bootloader memory (local). @@ -343,7 +344,7 @@ ELFNAMEEND(loadfile_dynamic)(int fd, Elf Elf_Shdr *shdr; Elf_Addr shpp, addr; int i, j, loaded; - size_t size, shdrsz; + size_t size, shdrsz, align; Elf_Addr maxp, elfp = 0; int ret; @@ -385,14 +386,18 @@ ELFNAMEEND(loadfile_dynamic)(int fd, Elf /* * Load the KERNEL SECTIONS. */ - maxp = roundup(maxp, KERNALIGN); + maxp = roundup(maxp, KERNALIGN_SMALL); for (i = 0; i < elf->e_shnum; i++) { - addr = maxp; - size = (size_t)shdr[i].sh_size; - if (!(shdr[i].sh_flags & SHF_ALLOC)) { continue; } + size = (size_t)shdr[i].sh_size; + if (size <= KERNALIGN_SMALL) { + align = KERNALIGN_SMALL; + } else { + align = KERNALIGN_LARGE; + } + addr = roundup(maxp, align); loaded = 0; switch (shdr[i].sh_type) { @@ -415,10 +420,11 @@ ELFNAMEEND(loadfile_dynamic)(int fd, Elf } if (loaded) { - shdr[i].sh_offset = maxp - elfp; - maxp = roundup(maxp + size, KERNALIGN); + shdr[i].sh_offset = addr - elfp; + maxp = roundup(addr + size, align); } } + maxp = roundup(maxp, KERNALIGN_LARGE); /* * Load the SYM+REL SECTIONS. @@ -456,7 +462,7 @@ ELFNAMEEND(loadfile_dynamic)(int fd, Elf break; } } - maxp = roundup(maxp, KERNALIGN); + maxp = roundup(maxp, KERNALIGN_SMALL); /* * Finally, load the SECTION HEADERS.