Hello,

in recent days I've been able to consistently panic an OpenSolaris
kernel (build 79b, 64-bit on dual-core Opteron with 4GB of RAM) by
using KQEMU and running qemu with a -m (memory) parameter greater than
about 1750.  The panic would be due to a NULL pointer dereference and
happen mainly in kqemu_init().  On Linux this does not seem to be a
problem for some reason.

Anyway, while nowhere near understanding root cause yet, I did sweep
the code and came up with a patch to check for errors of certain
internal functions - particularly errors that could eventually lead to
NULL pointer dereferences, etc.  So now either the kernel call will
fail (and userspace will deal with it accordingly), or the monitor
will just crash the running qemu process rather than panic the kernel
if one of these error situations happens.  What I discovered is that
almost, if not all of these error situations come from the return
value of the function get_vaddr() in common/common.c not being checked
by its callers.  get_vaddr() can return -1 if there is no virtual
address available, but its callers were often assuming the return
value was valid.  Particularly common/mon_get_ptep_l3() would ignore
it, and quickly lead to a kernel panic.  I did not try the non-PAE
version but I addressed it anyway with my patch.

I was able to identify 2 distinct situations that would lead to a
kernel panic, driven by the -m parameter from userspace.  The first,
if -m 1750, was likely due to mon_user_map() in common/kernel.c
ignoring the return from get_vaddr().  I am not 100% sure about that
as it was very difficult to debug this with the Solaris kernel
debugger, but that's what I believe right now.  The second and more
traceable problem came when using a value greater than or equal to
about 1800 for -m.  That would crash consistently in kqemu_init() in
common/kernel.c around line 564 where the monitor PTE pages are cloned
in each address space.  The result was that mon_get_ptep_l3() was
ignoring a -1 return value from get_vaddr(), and in turn, kqemu_init()
was also ignoring a potential NULL return value from
mon_get_ptep_l3(), leading to a dereference of a NULL pointer.

My patch below may have detected one condition that is not really an
error, in phys_page_find() of common/monitor.c.  The return of
get_vaddr() there is not actually dereferenced, so I'm not sure that
when it gets passed into set_vaddr_page_index() it's okay for it to be
-1.  Still, this situation does happen (the monitor panic) if you run
qemu -m 1500 or so (I didn't compute exact values that trigger it,
sorry), and the guest (i.e. Windows 2000) runs for a while.  Note that
the errors do not seem to be related to using -m values that are
powers of 10 instead of powers of 2 - I tried either.

While it's important that a userspace process never panic a kernel,
I'm sure some of the error checking below is not accurate or may mask
another problem that should be fixed rather than detected.  If someone
who understands the KQEMU code better than me can take a look, I would
appreciate it.  Note that I have both inlined and attached the patch
for easy handling.  The patch is against the latest KQEMU, and applies
to either the one on www.qemu.org or the one from the OpenSolaris
project cleanly.

Special thanks to Juergen Keil for helping me with the Solaris kernel
debugger (off list).

Best regards,

Leo Reiter

diff -Naur kqemu_1.0.3pre11-20070520/common/common.c
kqemu_1.0.3pre11-20070520.new/common/common.c
--- kqemu_1.0.3pre11-20070520/common/common.c   2007-05-20
07:35:07.000000000 -0400
+++ kqemu_1.0.3pre11-20070520.new/common/common.c       2008-02-15
16:29:21.000000000 -0500
@@ -252,6 +252,8 @@
         pdp_page_index = pml4e >> PAGE_SHIFT;
     }
     pdp_page = page_index_to_addr(s, pdp_page_index);
+    if (!pdp_page)
+        return NULL;

     pdpe_index = (vaddr >> 30) & 0x1ff;
     pdpe = pdp_page[pdpe_index];
@@ -267,6 +269,8 @@
         pde_page_index = pdpe >> PAGE_SHIFT;
     }
     pde_page = page_index_to_addr(s, pde_page_index);
+    if (!pde_page)
+        return NULL;

     pde_index = (vaddr >> 21) & 0x1ff;
     if (alloc == 2)
@@ -284,6 +288,8 @@
         pte_page_index = pde >> PAGE_SHIFT;
     }
     pte_page = page_index_to_addr(s, pte_page_index);
+    if (!pte_page)
+        return NULL;    /* XXX - ?? we don't dereference pte_page here */

     pte_index = (vaddr >> 12) & 0x1ff;
 #ifndef IN_MONITOR
@@ -331,6 +337,8 @@
         pde_page_index = pdpe >> PAGE_SHIFT;
     }
     pde_page = page_index_to_addr(s, pde_page_index);
+    if (!pde_page)
+        return NULL;

     pde_index = (vaddr >> 21) & 0x1ff;
     if (alloc == 2)
@@ -348,6 +356,8 @@
         pte_page_index = pde >> PAGE_SHIFT;
     }
     pte_page = page_index_to_addr(s, pte_page_index);
+    if (!pte_page)
+        return NULL;    /* XXX - ?? we don't dereference pte_page here */

     pte_index = (vaddr >> 12) & 0x1ff;
 #ifndef IN_MONITOR
@@ -387,6 +397,9 @@
         pte_page_index = pde >> PAGE_SHIFT;
     }
     pte_page = page_index_to_addr(s, pte_page_index);
+    if (!pte_page)
+        return NULL;    /* XXX - ?? we don't dereference pte_page here */
+
     pte_index = (vaddr >> PAGE_SHIFT) & PTE_MASK;
 #ifndef IN_MONITOR
     if (pvptep) {
diff -Naur kqemu_1.0.3pre11-20070520/common/kernel.c
kqemu_1.0.3pre11-20070520.new/common/kernel.c
--- kqemu_1.0.3pre11-20070520/common/kernel.c   2007-05-20
07:35:07.000000000 -0400
+++ kqemu_1.0.3pre11-20070520.new/common/kernel.c       2008-02-15
18:39:03.000000000 -0500
@@ -156,6 +156,13 @@
         return NULL;
     }
     vaddr = get_vaddr(s);
+    if (vaddr == (unsigned long) -1) {
+#ifdef DEBUG
+        kqemu_log("mon_alloc_page: vaddr=-1\n");
+#endif
+        kqemu_free_page(host_page);
+        return NULL;
+    }
     set_vaddr_page_index(s, vaddr, page_index, host_page, 0);
     /* avoid recursion during init */
     if (!s->in_page_init)
@@ -209,6 +216,13 @@
         if (!host_page)
             return NULL;
         vaddr = get_vaddr(s);
+        if (vaddr == (unsigned long) -1) {
+#ifdef DEBUG
+            kqemu_log("mon_user_map: vaddr=-1\n");
+#endif
+            kqemu_unlock_user_page(host_page);
+            return NULL;
+        }
         set_vaddr_page_index(s, vaddr, page_index, host_page, 1);
         mon_set_pte(s, vaddr, page_index,
                     PG_PRESENT_MASK | PG_GLOBAL(s) | pte_flags);
@@ -561,6 +575,13 @@
                 vaddr = s->monitor_vaddr + j;
                 pdep = mon_get_ptep_l3(s, 0, vaddr, 2, NULL);
                 pdep1 = mon_get_ptep_l3(s, i, vaddr, 2, NULL);
+               if (!pdep || !pdep1) {
+                       kqemu_log("kqemu_init(): mon_get_ptep_l3(i=0,"
+                                       "vaddr=%08lx)=%p\n", vaddr, pdep);
+                       kqemu_log("kqemu_init(): mon_get_ptep_l3(i=%d,"
+                                       "vaddr=%08lx)=%p\n", i, vaddr, pdep);
+                       goto fail;
+               }
                 *pdep1 = *pdep;
             }
         } else {
@@ -569,6 +590,13 @@
                 vaddr = s->monitor_vaddr + j;
                 pdep = mon_get_ptep_l2(s, 0, vaddr, 2, NULL);
                 pdep1 = mon_get_ptep_l2(s, i, vaddr, 2, NULL);
+               if (!pdep || !pdep1) {
+                       kqemu_log("kqemu_init(): mon_get_ptep_l2(i=0,"
+                                       "vaddr=%08lx)=%p\n", vaddr, pdep);
+                       kqemu_log("kqemu_init(): mon_get_ptep_l2(i=%d,"
+                                       "vaddr=%08lx)=%p\n", i, vaddr, pdep);
+                       goto fail;
+               }
                 *pdep1 = *pdep;
             }
         }
diff -Naur kqemu_1.0.3pre11-20070520/common/monitor.c
kqemu_1.0.3pre11-20070520.new/common/monitor.c
--- kqemu_1.0.3pre11-20070520/common/monitor.c  2007-05-20
07:35:07.000000000 -0400
+++ kqemu_1.0.3pre11-20070520.new/common/monitor.c      2008-02-15
17:30:19.000000000 -0500
@@ -280,7 +280,8 @@
         return NULL;
     }
     vaddr = get_vaddr(s);
-    /* XXX: check error */
+    if (vaddr == (unsigned long) -1)
+        return NULL;
     set_vaddr_page_index(s, vaddr, page_index, host_page, 0);
     mon_set_pte(s, 0, vaddr, page_index,
                 PG_PRESENT_MASK | PG_GLOBAL(s) | PG_RW_MASK);
@@ -296,10 +297,14 @@
     if (USE_PAE(s)) {
         uint64_t *ptep;
         ptep = mon_get_ptep_l3(s, as_index, vaddr, 1);
+        if (!ptep)
+            monitor_panic(s, "mon_get_ptel_l3() failed\n");
         *ptep = ((uint64_t)page_index << PAGE_SHIFT) | pte_flags;
     } else {
         uint32_t *ptep;
         ptep = mon_get_ptep_l2(s, as_index, vaddr, 1);
+        if (!ptep)
+            monitor_panic(s, "mon_get_ptep_l2() failed\n");
         *ptep = (page_index << PAGE_SHIFT) | pte_flags;
     }
     asm volatile("invlpg %0" : : "m" (*(uint8_t *)vaddr));
@@ -328,6 +333,8 @@
         if (!host_page)
             monitor_panic(s, "could not lock user page %p", (void *)l2_uaddr);
         ptr = (void *)get_vaddr(s);
+        if (ptr == (void *) -1)
+            monitor_panic(s, "phys_page_find(): vaddr=-1\n");
         set_vaddr_page_index(s, (unsigned long)ptr, paddr, host_page, 1);
         s->phys_to_ram_map_pages[l1_index] = ptr;
         mon_set_pte(s, 0, (unsigned long)ptr, paddr,
@@ -574,12 +581,16 @@
                                                        GET_AS(vaddr), addr, 0);
                 else
                     ptep = mon_get_ptep_l2(s, GET_AS(vaddr), addr, 0);
+                if (!ptep)
+                    monitor_panic(s, "ram_set_read_only(); ptep=NULL\n");
                 *ptep &= ~PG_RW_MASK;
                 asm volatile("invlpg %0" : : "m" (*(uint8_t *)addr));
             }
             if (IS_LAST_VADDR(vaddr))
                 break;
             nptep = get_ram_page_next_mapping(s, GET_AS(vaddr), addr);
+            if (!nptep)
+                monitor_panic(s, "ram_set_read_only(); nptep=NULL\n");
             vaddr = *nptep;
         }
     }
diff -Naur kqemu_1.0.3pre11-20070520/common/common.c kqemu_1.0.3pre11-20070520.new/common/common.c
--- kqemu_1.0.3pre11-20070520/common/common.c	2007-05-20 07:35:07.000000000 -0400
+++ kqemu_1.0.3pre11-20070520.new/common/common.c	2008-02-15 16:29:21.000000000 -0500
@@ -252,6 +252,8 @@
         pdp_page_index = pml4e >> PAGE_SHIFT;
     }
     pdp_page = page_index_to_addr(s, pdp_page_index);
+    if (!pdp_page)
+        return NULL;
 
     pdpe_index = (vaddr >> 30) & 0x1ff;
     pdpe = pdp_page[pdpe_index];
@@ -267,6 +269,8 @@
         pde_page_index = pdpe >> PAGE_SHIFT;
     }
     pde_page = page_index_to_addr(s, pde_page_index);
+    if (!pde_page)
+        return NULL;
     
     pde_index = (vaddr >> 21) & 0x1ff;
     if (alloc == 2)
@@ -284,6 +288,8 @@
         pte_page_index = pde >> PAGE_SHIFT;
     }
     pte_page = page_index_to_addr(s, pte_page_index);
+    if (!pte_page)
+        return NULL;    /* XXX - ?? we don't dereference pte_page here */
         
     pte_index = (vaddr >> 12) & 0x1ff;
 #ifndef IN_MONITOR
@@ -331,6 +337,8 @@
         pde_page_index = pdpe >> PAGE_SHIFT;
     }
     pde_page = page_index_to_addr(s, pde_page_index);
+    if (!pde_page)
+        return NULL;
     
     pde_index = (vaddr >> 21) & 0x1ff;
     if (alloc == 2)
@@ -348,6 +356,8 @@
         pte_page_index = pde >> PAGE_SHIFT;
     }
     pte_page = page_index_to_addr(s, pte_page_index);
+    if (!pte_page)
+        return NULL;    /* XXX - ?? we don't dereference pte_page here */
         
     pte_index = (vaddr >> 12) & 0x1ff;
 #ifndef IN_MONITOR
@@ -387,6 +397,9 @@
         pte_page_index = pde >> PAGE_SHIFT;
     }
     pte_page = page_index_to_addr(s, pte_page_index);
+    if (!pte_page)
+        return NULL;    /* XXX - ?? we don't dereference pte_page here */
+
     pte_index = (vaddr >> PAGE_SHIFT) & PTE_MASK;
 #ifndef IN_MONITOR
     if (pvptep) {
diff -Naur kqemu_1.0.3pre11-20070520/common/kernel.c kqemu_1.0.3pre11-20070520.new/common/kernel.c
--- kqemu_1.0.3pre11-20070520/common/kernel.c	2007-05-20 07:35:07.000000000 -0400
+++ kqemu_1.0.3pre11-20070520.new/common/kernel.c	2008-02-15 18:39:03.000000000 -0500
@@ -156,6 +156,13 @@
         return NULL;
     }
     vaddr = get_vaddr(s);
+    if (vaddr == (unsigned long) -1) {
+#ifdef DEBUG
+        kqemu_log("mon_alloc_page: vaddr=-1\n");
+#endif
+        kqemu_free_page(host_page);
+        return NULL;
+    }
     set_vaddr_page_index(s, vaddr, page_index, host_page, 0);
     /* avoid recursion during init */
     if (!s->in_page_init)
@@ -209,6 +216,13 @@
         if (!host_page)
             return NULL;
         vaddr = get_vaddr(s);
+        if (vaddr == (unsigned long) -1) {
+#ifdef DEBUG
+            kqemu_log("mon_user_map: vaddr=-1\n");
+#endif
+            kqemu_unlock_user_page(host_page);
+            return NULL;
+        }
         set_vaddr_page_index(s, vaddr, page_index, host_page, 1);
         mon_set_pte(s, vaddr, page_index, 
                     PG_PRESENT_MASK | PG_GLOBAL(s) | pte_flags);
@@ -561,6 +575,13 @@
                 vaddr = s->monitor_vaddr + j;
                 pdep = mon_get_ptep_l3(s, 0, vaddr, 2, NULL);
                 pdep1 = mon_get_ptep_l3(s, i, vaddr, 2, NULL);
+		if (!pdep || !pdep1) {
+			kqemu_log("kqemu_init(): mon_get_ptep_l3(i=0,"
+					"vaddr=%08lx)=%p\n", vaddr, pdep);
+			kqemu_log("kqemu_init(): mon_get_ptep_l3(i=%d,"
+					"vaddr=%08lx)=%p\n", i, vaddr, pdep);
+			goto fail;
+		}
                 *pdep1 = *pdep;
             }
         } else {
@@ -569,6 +590,13 @@
                 vaddr = s->monitor_vaddr + j;
                 pdep = mon_get_ptep_l2(s, 0, vaddr, 2, NULL);
                 pdep1 = mon_get_ptep_l2(s, i, vaddr, 2, NULL);
+		if (!pdep || !pdep1) {
+			kqemu_log("kqemu_init(): mon_get_ptep_l2(i=0,"
+					"vaddr=%08lx)=%p\n", vaddr, pdep);
+			kqemu_log("kqemu_init(): mon_get_ptep_l2(i=%d,"
+					"vaddr=%08lx)=%p\n", i, vaddr, pdep);
+			goto fail;
+		}
                 *pdep1 = *pdep;
             }
         }
diff -Naur kqemu_1.0.3pre11-20070520/common/monitor.c kqemu_1.0.3pre11-20070520.new/common/monitor.c
--- kqemu_1.0.3pre11-20070520/common/monitor.c	2007-05-20 07:35:07.000000000 -0400
+++ kqemu_1.0.3pre11-20070520.new/common/monitor.c	2008-02-15 17:30:19.000000000 -0500
@@ -280,7 +280,8 @@
         return NULL;
     }
     vaddr = get_vaddr(s);
-    /* XXX: check error */
+    if (vaddr == (unsigned long) -1)
+        return NULL; 
     set_vaddr_page_index(s, vaddr, page_index, host_page, 0);
     mon_set_pte(s, 0, vaddr, page_index, 
                 PG_PRESENT_MASK | PG_GLOBAL(s) | PG_RW_MASK);
@@ -296,10 +297,14 @@
     if (USE_PAE(s)) {
         uint64_t *ptep;
         ptep = mon_get_ptep_l3(s, as_index, vaddr, 1);
+        if (!ptep)
+            monitor_panic(s, "mon_get_ptel_l3() failed\n");
         *ptep = ((uint64_t)page_index << PAGE_SHIFT) | pte_flags;
     } else {
         uint32_t *ptep;
         ptep = mon_get_ptep_l2(s, as_index, vaddr, 1);
+        if (!ptep)
+            monitor_panic(s, "mon_get_ptep_l2() failed\n");
         *ptep = (page_index << PAGE_SHIFT) | pte_flags;
     }
     asm volatile("invlpg %0" : : "m" (*(uint8_t *)vaddr));
@@ -328,6 +333,8 @@
         if (!host_page)
             monitor_panic(s, "could not lock user page %p", (void *)l2_uaddr);
         ptr = (void *)get_vaddr(s);
+        if (ptr == (void *) -1)
+            monitor_panic(s, "phys_page_find(): vaddr=-1\n");
         set_vaddr_page_index(s, (unsigned long)ptr, paddr, host_page, 1);
         s->phys_to_ram_map_pages[l1_index] = ptr;
         mon_set_pte(s, 0, (unsigned long)ptr, paddr, 
@@ -574,12 +581,16 @@
                                                        GET_AS(vaddr), addr, 0);
                 else
                     ptep = mon_get_ptep_l2(s, GET_AS(vaddr), addr, 0);
+                if (!ptep)
+                    monitor_panic(s, "ram_set_read_only(); ptep=NULL\n");
                 *ptep &= ~PG_RW_MASK;
                 asm volatile("invlpg %0" : : "m" (*(uint8_t *)addr));
             }
             if (IS_LAST_VADDR(vaddr))
                 break;
             nptep = get_ram_page_next_mapping(s, GET_AS(vaddr), addr);
+            if (!nptep)
+                monitor_panic(s, "ram_set_read_only(); nptep=NULL\n");
             vaddr = *nptep;
         }
     }

Reply via email to