Current kernels have a bug in the brk() codepath. This path, unlike
the mmap() path, doesn't check if the newly allocated region
intersects a hugepage dedicated region. This means it can create a
normalpage VMA which extends into a hugepage-only address region (at
least on powerpc, which has such regions). This can easily lead to
later oopses or other nastiness.
This patch adds a testcase to catch this bug (triggering an oops on
powerpc).
Signed-off-by: David Gibson <[EMAIL PROTECTED]>
Index: libhugetlbfs/tests/Makefile
===================================================================
--- libhugetlbfs.orig/tests/Makefile 2006-11-14 14:45:32.000000000 +1100
+++ libhugetlbfs/tests/Makefile 2006-11-14 14:46:16.000000000 +1100
@@ -6,7 +6,7 @@ LIB_TESTS = gethugepagesize test_root fi
chunk-overcommit mprotect alloc-instantiate-race mlock \
truncate_reserve_wraparound truncate_sigbus_versus_oom \
map_high_truncate_2 truncate_above_4GB \
- misaligned_offset
+ misaligned_offset brk_near_huge
LIB_TESTS_64 = straddle_4GB huge_at_4GB_normal_below \
huge_below_4GB_normal_above
NOLIB_TESTS = malloc malloc_manysmall dummy
Index: libhugetlbfs/tests/run_tests.sh
===================================================================
--- libhugetlbfs.orig/tests/run_tests.sh 2006-11-14 14:45:32.000000000
+1100
+++ libhugetlbfs/tests/run_tests.sh 2006-11-14 14:46:02.000000000 +1100
@@ -127,6 +127,7 @@ functional_tests () {
run_test map_high_truncate_2
run_test misaligned_offset
run_test truncate_above_4GB
+ run_test brk_near_huge
# Tests requiring an active mount and hugepage COW
run_test private
Index: libhugetlbfs/tests/hugetests.h
===================================================================
--- libhugetlbfs.orig/tests/hugetests.h 2006-11-14 14:45:32.000000000 +1100
+++ libhugetlbfs/tests/hugetests.h 2006-11-14 14:46:02.000000000 +1100
@@ -35,6 +35,7 @@ int test_addr_huge(void *p);
ino_t get_addr_inode(void *p);
#define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1))
+#define PALIGN(p, a) ((void *)ALIGN((unsigned long)(p), (a)))
#ifndef barrier
# ifdef mb
Index: libhugetlbfs/tests/brk_near_huge.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ libhugetlbfs/tests/brk_near_huge.c 2006-11-14 14:46:02.000000000 +1100
@@ -0,0 +1,111 @@
+/*
+ * libhugetlbfs - Easy use of Linux hugepages
+ * Copyright (C) 2005-2006 David Gibson & Adam Litke, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include <hugetlbfs.h>
+
+#include "hugetests.h"
+
+/*
+ * Test rationale:
+ *
+ * Certain kernels have a bug where brk() does not perform the same
+ * checks that a MAP_FIXED mmap() will, allowing brk() to create a
+ * normal page VMA in a hugepage only address region. This can lead
+ * to oopses or other badness.
+ */
+
+/* Possibly these functions should go in the library itself.. */
+#ifdef __powerpc64__
+void *next_chunk(void *addr)
+{
+ if ((unsigned long)addr < 0x100000000UL)
+ /* 256M segments below 4G */
+ return PALIGN(addr, 0x10000000UL);
+ else
+ /* 1TB segments above */
+ return PALIGN(addr, 0x10000000000UL);
+}
+#elif __powerpc__
+void *next_chunk(void *addr)
+{
+ return PALIGN(addr, 0x10000000UL);
+}
+#else
+void *next_chunk(void *addr)
+{
+ return PALIGN(addr, gethugepagesize());
+}
+#endif
+
+int main(int argc, char *argv[])
+{
+ int hpage_size;
+ int fd;
+ void *brk0, *hugemap_addr, *newbrk;
+ char *p;
+ int err;
+
+ test_init(argc, argv);
+
+ hpage_size = gethugepagesize();
+ if (hpage_size < 0)
+ CONFIG("No hugepage kernel support");
+
+ fd = hugetlbfs_unlinked_fd();
+ if (fd < 0)
+ FAIL("hugetlbfs_unlinked_fd()");
+
+ brk0 = sbrk(0);
+ verbose_printf("Initial break at %p\n", brk0);
+
+ hugemap_addr = next_chunk(brk0) + hpage_size;
+
+ p = mmap(hugemap_addr, hpage_size, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED,
+ fd, 0);
+ if (p == MAP_FAILED)
+ FAIL("mmap(): %s", strerror(errno));
+ if (p != hugemap_addr)
+ FAIL("mmap() at unexpected address %p instead of %p\n", p,
+ hugemap_addr);
+
+ verbose_printf("Hugepage mapped at %p-%p\n", p, p+hpage_size-1);
+
+ err = test_addr_huge((void *)p);
+ if (err != 1)
+ FAIL("Mapped address is not hugepage");
+
+ newbrk = next_chunk(brk0) + getpagesize();
+ err = brk((void *)newbrk);
+ if (err == -1)
+ /* Failing the brk() is an acceptable kernel response */
+ PASS();
+
+ /* Suceeding the brk() is acceptable iff the new memory is
+ * properly accesible and we don't have a kernel blow up when
+ * we touch it. */
+ memset(brk0, 0, newbrk-brk0);
+
+ PASS();
+}
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
-------------------------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642
_______________________________________________
Libhugetlbfs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/libhugetlbfs-devel