Current kernels have a bug in the brk() codepath.  This path, unlike
the mmap() path, doesn't check if the newly allocated region
intersects a hugepage dedicated region.  This means it can create a
normalpage VMA which extends into a hugepage-only address region (at
least on powerpc, which has such regions).  This can easily lead to
later oopses or other nastiness.

This patch adds a testcase to catch this bug (triggering an oops on
powerpc).

Signed-off-by: David Gibson <[EMAIL PROTECTED]>

Index: libhugetlbfs/tests/Makefile
===================================================================
--- libhugetlbfs.orig/tests/Makefile    2006-11-14 14:45:32.000000000 +1100
+++ libhugetlbfs/tests/Makefile 2006-11-14 14:46:16.000000000 +1100
@@ -6,7 +6,7 @@ LIB_TESTS = gethugepagesize test_root fi
        chunk-overcommit mprotect alloc-instantiate-race mlock \
        truncate_reserve_wraparound truncate_sigbus_versus_oom \
        map_high_truncate_2 truncate_above_4GB \
-       misaligned_offset
+       misaligned_offset brk_near_huge
 LIB_TESTS_64 = straddle_4GB huge_at_4GB_normal_below \
        huge_below_4GB_normal_above
 NOLIB_TESTS = malloc malloc_manysmall dummy
Index: libhugetlbfs/tests/run_tests.sh
===================================================================
--- libhugetlbfs.orig/tests/run_tests.sh        2006-11-14 14:45:32.000000000 
+1100
+++ libhugetlbfs/tests/run_tests.sh     2006-11-14 14:46:02.000000000 +1100
@@ -127,6 +127,7 @@ functional_tests () {
     run_test map_high_truncate_2
     run_test misaligned_offset
     run_test truncate_above_4GB
+    run_test brk_near_huge
 
 # Tests requiring an active mount and hugepage COW
     run_test private
Index: libhugetlbfs/tests/hugetests.h
===================================================================
--- libhugetlbfs.orig/tests/hugetests.h 2006-11-14 14:45:32.000000000 +1100
+++ libhugetlbfs/tests/hugetests.h      2006-11-14 14:46:02.000000000 +1100
@@ -35,6 +35,7 @@ int test_addr_huge(void *p);
 ino_t get_addr_inode(void *p);
 
 #define ALIGN(x, a)    (((x) + (a) - 1) & ~((a) - 1))
+#define PALIGN(p, a)   ((void *)ALIGN((unsigned long)(p), (a)))
 
 #ifndef barrier
 # ifdef mb
Index: libhugetlbfs/tests/brk_near_huge.c
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ libhugetlbfs/tests/brk_near_huge.c  2006-11-14 14:46:02.000000000 +1100
@@ -0,0 +1,111 @@
+/*
+ * libhugetlbfs - Easy use of Linux hugepages
+ * Copyright (C) 2005-2006 David Gibson & Adam Litke, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include <hugetlbfs.h>
+
+#include "hugetests.h"
+
+/*
+ * Test rationale:
+ *
+ * Certain kernels have a bug where brk() does not perform the same
+ * checks that a MAP_FIXED mmap() will, allowing brk() to create a
+ * normal page VMA in a hugepage only address region.  This can lead
+ * to oopses or other badness.
+ */
+
+/* Possibly these functions should go in the library itself.. */
+#ifdef __powerpc64__
+void *next_chunk(void *addr)
+{
+       if ((unsigned long)addr < 0x100000000UL)
+               /* 256M segments below 4G */
+               return PALIGN(addr, 0x10000000UL);
+       else
+               /* 1TB segments above */
+               return PALIGN(addr, 0x10000000000UL);
+}
+#elif __powerpc__
+void *next_chunk(void *addr)
+{
+       return PALIGN(addr, 0x10000000UL);
+}
+#else
+void *next_chunk(void *addr)
+{
+       return PALIGN(addr, gethugepagesize());
+}
+#endif
+
+int main(int argc, char *argv[])
+{
+       int hpage_size;
+       int fd;
+       void *brk0, *hugemap_addr, *newbrk;
+       char *p;
+       int err;
+
+       test_init(argc, argv);
+
+       hpage_size = gethugepagesize();
+       if (hpage_size < 0)
+               CONFIG("No hugepage kernel support");
+
+       fd = hugetlbfs_unlinked_fd();
+       if (fd < 0)
+               FAIL("hugetlbfs_unlinked_fd()");
+
+       brk0 = sbrk(0);
+       verbose_printf("Initial break at %p\n", brk0);
+
+       hugemap_addr = next_chunk(brk0) + hpage_size;
+
+       p = mmap(hugemap_addr, hpage_size, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED,
+                fd, 0);
+       if (p == MAP_FAILED)
+               FAIL("mmap(): %s", strerror(errno));
+       if (p != hugemap_addr)
+               FAIL("mmap() at unexpected address %p instead of %p\n", p,
+                    hugemap_addr);
+
+       verbose_printf("Hugepage mapped at %p-%p\n", p, p+hpage_size-1);
+
+       err = test_addr_huge((void *)p);
+       if (err != 1)
+               FAIL("Mapped address is not hugepage");
+
+       newbrk = next_chunk(brk0) + getpagesize();
+       err = brk((void *)newbrk);
+       if (err == -1)
+               /* Failing the brk() is an acceptable kernel response */
+               PASS();
+
+       /* Suceeding the brk() is acceptable iff the new memory is
+        * properly accesible and we don't have a kernel blow up when
+        * we touch it. */
+       memset(brk0, 0, newbrk-brk0);
+
+       PASS();
+}


-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

-------------------------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642
_______________________________________________
Libhugetlbfs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/libhugetlbfs-devel

Reply via email to