On Mon May 26 19:16:22 EDT 2014, lyn...@orthanc.ca wrote:

> For the last couple of days I have been plagued by many many diagnostics from 
> checkpages(), in conjunction with things like:
> 
>   rc: note: sys: trap: fault read addr=0x0 pc=0x000101c4
>   rc 50675: suicide: sys: trap: fault read addr=0x0 pc=0x000101c4

acid says that this is an abort.  

; acid /n/sources/plan9/386/bin/rc
/n/sources/plan9/386/bin/rc:386 plan 9 executable
/sys/lib/acid/port
/sys/lib/acid/386
acid; src(0x000101c4)
/sys/src/libc/9sys/abort.c:6
 1      #include <u.h>
 2      #include <libc.h>
 3      void
 4      abort(void)
 5      {
>6              while(*(int*)0)
 7                      ;
 8      }

the problem is without a backtrace, there are a few too many possibilities.
if the abort is legit, these would be good canidates
- notifyf (plan9.c)
- _vsaop (not very likely)
- assert:
io.c:101:                       assert(b->fd == -1 || b->bufp > b->buf);
pcmd.c:24:      assert(f != nil);


but ...

> The kernel print buffer holds corresponding entries like:
> 
>   coral# 10618 dns: checked 136 page table entries
>   dns 10618: suicide: sys: trap: fault write addr=0x0 pc=0x00015cea

/sys/src/libc/port/pool.c:974
 969            return a;
 970    }
 971    
 972    /* poolallocl: attempt to allocate block to hold dsize user bytes; 
assumes lock held */
 973    static void*
>974    poolallocl(Pool *p, ulong dsize)
 975    {
 976            ulong bsize;
 977            Free *fb;
 978            Alloc *ab;
 979    
acid; asm(0x00015cea)
poolallocl 0x00015cea   SUBL    $0x1c,SP
poolallocl+0x3 0x00015ced       MOVL    dsize+0x4(FP),DX
poolallocl+0x7 0x00015cf1       CMPL    DX,$0x80000000
poolallocl+0xd 0x00015cf7       JCS     poolallocl+0x22(SB)

this one doesn't make any sense, unless the stack ptr is smashed.

>   26591 rfcmirror: checked 270 page table entries
>   37326 rc: checked 51 page table entries
>   47773 rc: checked 57 page table entries
>   47773 rc: checked 57 page table entries
>   47773 rc: checked 57 page table entries
>   47773 rc: checked 57 page table entries
>   47773 rc: checked 57 page table entries
>   47773 rc: checked 57 page table entries
>   47773 rc: checked 57 page table entries
>   47773 rc: checked 57 page table entries
>   47773 rc: checked 57 page table entries
>   47773 rc: checked 57 page table entries
>   47773 rc: checked 57 page table entries
>   50675 rc: checked 53 page table entries

ah.  this is starting to make some sense.  remember above, there was
an abort in notifyf?  that was if the trap depth got too deep.  the problem
is we would need to see 33 events for pid 47773, but we don't.

i had a very similar problem under vbox on osx, and the solution
was to use gorka's ancient fix, which basically avoids clearing PTEs
which do not have the PteP bit set.  there are substantial differences
between the pc and nix kernel's here.

so for example mmuptefree() looks fishy to me since it clears
pages not present.  but i'm not sure.

- erik

the applied patch is /n/atom/patch/applied/vboxmmu

; diff -c mmu.c.orig mmu.c
mmu.c.orig:87,93 - mmu.c:87,93
  }
  
  void
- mmuflushtlb(uintmem)
+ xmmuflushtlb(uintmem)
  {
  
        m->tlbpurge++;
mmu.c.orig:98,104 - mmu.c:98,122
        putcr3(m->pml4->pa);
  }
  
+ /* hack for vbox */
  void
+ mmuflushtlb(uintmem)
+ {
+       int i;
+       PTE *pte;
+ 
+       m->tlbpurge++;
+       if(m->pml4->daddr){
+               pte = UINT2PTR(m->pml4->va);
+               for(i = 0; i < m->pml4->daddr; i++)
+                       if(pte[i] & PteP)
+                               pte[i] = 0;
+               m->pml4->daddr = 0;
+       }
+       putcr3(m->pml4->pa);
+ }
+ 
+ void
  mmuflush(void)
  {
        Mpl pl;
mmu.c.orig:259,264 - mmu.c:277,283
  void
  mmuswitch(Proc* proc)
  {
+       int i;
        PTE *pte;
        Page *page;
        Mpl pl;
mmu.c.orig:270,276 - mmu.c:289,300
        }
  
        if(m->pml4->daddr){
-               memset(UINT2PTR(m->pml4->va), 0, m->pml4->daddr*sizeof(PTE));
+               /* hack for vbox */
+ //            memset(UINT2PTR(m->pml4->va), 0, m->pml4->daddr*sizeof(PTE));
+               pte = UINT2PTR(m->pml4->va);
+               for(i = 0; i < m->pml4->daddr; i++)
+                       if(pte[i] & PteP)
+                               pte[i] = 0;
                m->pml4->daddr = 0;
        }

Reply via email to