YAMAMOTO Takashi <y...@mwd.biglobe.ne.jp> wrote: > > Further investigation shows that this ENOENT is returned by vget() call > > in puffs_cookie2vnode(). That suggests some kind of race condition, but > > that is not obvious. It means a vnode has been created on a lookup, then > > it gets recycled while looking up one of its child. > it should retry from puffs_cookie2pnode in that case.
I first tried to loop on vget but got a panic because I did not hold v_interlock anymore. I then came to this patch and got a uvm_fault (backtrace below) Index: sys/fs/puffs/puffs_node.c =================================================================== RCS file: /cvsroot/src/sys/fs/puffs/puffs_node.c,v retrieving revision 1.13.10.3 diff -U 4 -r1.13.10.3 puffs_node.c --- sys/fs/puffs/puffs_node.c 2 Nov 2011 20:11:12 -0000 1.13.10.3 +++ sys/fs/puffs/puffs_node.c 17 Jan 2012 02:36:02 -0000 @@ -56,8 +56,9 @@ .gop_alloc, should ask userspace #endif }; +static __inline int puffs_vget(struct puffs_mount *, struct vnode *, int); static __inline struct puffs_node_hashlist *puffs_cookie2hashlist(struct puffs_mount *, puffs_cookie_t); static struct puffs_node *puffs_cookie2pnode(struct puffs_mount *, puffs_cookie_t); @@ -271,8 +272,23 @@ return; } +static __inline int puffs_vget(struct puffs_mount *pmp, + struct vnode *vp, int flags) +{ + int rv; + + while ((rv = vget(vp, flags)) == ENOENT) { + printf("*** retry vget %p\n", vp); + mutex_enter(&pmp->pmp_lock); + mutex_enter(&vp->v_interlock); + mutex_exit(&pmp->pmp_lock); + } + + return rv; +} + static __inline struct puffs_node_hashlist * puffs_cookie2hashlist(struct puffs_mount *pmp, puffs_cookie_t ck) { uint32_t hash; @@ -320,9 +336,9 @@ vp = pmp->pmp_root; if (vp) { mutex_enter(&vp->v_interlock); mutex_exit(&pmp->pmp_lock); - if (vget(vp, LK_INTERLOCK) == 0) + if (puffs_vget(pmp, vp, LK_INTERLOCK) == 0) return 0; } else mutex_exit(&pmp->pmp_lock); @@ -405,9 +421,9 @@ vgetflags = LK_INTERLOCK; if (lock) vgetflags |= LK_EXCLUSIVE | LK_RETRY; - if ((rv = vget(vp, vgetflags))) + if ((rv = puffs_vget(pmp, vp, vgetflags))) return rv; *vpp = vp; return 0; It produced a uvm_fault in the domU, followed by a crash of the dom0 (no console access on that one, I do not have the dom0 backtrace yet). Here is what I have been able to copy/paste from the domU (only the panic string is missing): trap type 6 code 0 eip c03012da cs 9 eflags 10283 cr2 0 ilevel 7 kernel: supervisor trap page fault, code=0 Stopped in pid 18692.1 (sh) at netbsd:turnstile_block+0x1aa: movl 0x10(%eax),%eax db> bt turnstile_block(0,1,cb5b42ec,c046d89c,cc3baa9c,cb91ba60,0,cb5b42ec,1,cb4c3000) at netbsd:turnstile_block+0x1aa mutex_vector_enter(cb5b42ec,cb5b42ec,0,0,cb3fc39c,cb497000,cc3baacc,c0365ff8,c c3baac0,6) at netbsd:mutex_vector_enter+0xfa puffs_cookie2vnode(cb4c3000,bb9090c0,1,1,cc3bab38,0,cc3bab4c,c0350467,cb497000 ,cc3bab38) at netbsd:puffs_cookie2vnode+0x187 puffs_vfsop_root(cb497000,cc3bab38,cc3bac28,20002,ca21dc38,ca215bdc,cc3bab2c,c 0365fc5,20,0) at netbsd:puffs_vfsop_root+0x38 lookup(cc3bac00,20002,400,cc3bac1c,cb31a0b8,cb31a0e0,cc3babac,c0355e6c,cc3bac1 c,cc3bab9f) at netbsd:lookup+0x287 namei(cc3bac00,cc3bac70,cc3bac0c,c03bc307,1964000,0,cc3bac3c,bb9067cc,0,0) at netbsd:namei+0x144 do_sys_stat(bb9067cc,0,cc3bac70,c02e40b0,c0470dc8,0,3bac8c,cb01,41ed,369ddb94) at netbsd:do_sys_stat+0x37 sys___lstat30(cb91ba60,cc3bad00,cc3bad28,bb916010,c03bc307,61cb000,0,bb9067cc, bfbfde68,bfbfded8) at netbsd:sys___lstat30+0x29 syscall(cc3bad48,1f,1f,1f,1f,805fced,bb906797,bfbfded8,bb906796,bb9067dc) at netbsd:syscall+0xc7 -- Emmanuel Dreyfus http://hcpnet.free.fr/pubz m...@netbsd.org