Module Name: src Committed By: rmind Date: Fri Nov 8 15:44:23 UTC 2013
Modified Files: src/sys/fs/tmpfs: tmpfs.h tmpfs_rename.c tmpfs_subr.c tmpfs_vfsops.c tmpfs_vnops.c Log Message: tmpfs: replace the broken tmpfs_dircookie() logic which uses the node address truncated to 31 bits (required for 32-bit readdir compatibility, e.g. linux32). Instead, assign 2^31 range using the following logic: - The first half of the 2^31 is assigned incrementally (the fast path). - When exceeded, use the second half of 2^31, but manage with vmem(9). It will require 2 billion files per-directory to trigger vmem(9) usage. Also, while here, add some fixes for tmpfs_unmount(). Should fix PR/47739, PR/47480, PR/46088 and PR/41068. Thanks to wiz@ for stress testing. To generate a diff of this commit: cvs rdiff -u -r1.45 -r1.46 src/sys/fs/tmpfs/tmpfs.h cvs rdiff -u -r1.4 -r1.5 src/sys/fs/tmpfs/tmpfs_rename.c cvs rdiff -u -r1.82 -r1.83 src/sys/fs/tmpfs/tmpfs_subr.c cvs rdiff -u -r1.52 -r1.53 src/sys/fs/tmpfs/tmpfs_vfsops.c cvs rdiff -u -r1.105 -r1.106 src/sys/fs/tmpfs/tmpfs_vnops.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/fs/tmpfs/tmpfs.h diff -u src/sys/fs/tmpfs/tmpfs.h:1.45 src/sys/fs/tmpfs/tmpfs.h:1.46 --- src/sys/fs/tmpfs/tmpfs.h:1.45 Tue Sep 27 01:10:43 2011 +++ src/sys/fs/tmpfs/tmpfs.h Fri Nov 8 15:44:23 2013 @@ -1,4 +1,4 @@ -/* $NetBSD: tmpfs.h,v 1.45 2011/09/27 01:10:43 christos Exp $ */ +/* $NetBSD: tmpfs.h,v 1.46 2013/11/08 15:44:23 rmind Exp $ */ /* * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc. @@ -54,6 +54,9 @@ typedef struct tmpfs_dirent { /* Pointer to the inode this entry refers to. */ struct tmpfs_node * td_node; + /* Sequence number, see tmpfs_dir_getseq(). */ + uint32_t td_seq; + /* Name and its length. */ char * td_name; uint16_t td_namelen; @@ -61,47 +64,10 @@ typedef struct tmpfs_dirent { TAILQ_HEAD(tmpfs_dir, tmpfs_dirent); -#if defined(_KERNEL) - -#define TMPFS_MAXNAMLEN 255 -/* Validate maximum td_namelen length. */ -CTASSERT(TMPFS_MAXNAMLEN < UINT16_MAX); - -#define TMPFS_DIRCOOKIE_DOT 0 -#define TMPFS_DIRCOOKIE_DOTDOT 1 -#define TMPFS_DIRCOOKIE_EOF 2 - -/* - * Each entry in a directory has a cookie that identifies it. Cookies - * supersede offsets within directories, as tmpfs has no offsets as such. - * - * The '.', '..' and the end of directory markers have fixed cookies, - * which cannot collide with the cookies generated by other entries. - * - * The cookies for the other entries are generated based on the memory - * address of their representative meta-data structure. - * - * XXX: Truncating directory cookies to 31 bits now - workaround for - * problem with Linux compat, see PR/32034. - */ -static inline off_t -tmpfs_dircookie(tmpfs_dirent_t *de) -{ - off_t cookie; - - cookie = ((off_t)(uintptr_t)de >> 1) & 0x7FFFFFFF; - KASSERT(cookie != TMPFS_DIRCOOKIE_DOT); - KASSERT(cookie != TMPFS_DIRCOOKIE_DOTDOT); - KASSERT(cookie != TMPFS_DIRCOOKIE_EOF); - - return cookie; -} -#endif - /* * Internal representation of a tmpfs file system node -- inode. * - * This structure is splitted in two parts: one holds attributes common + * This structure is split in two parts: one holds attributes common * to all file types and the other holds data that is only applicable to * a particular type. * @@ -169,11 +135,14 @@ typedef struct tmpfs_node { /* List of directory entries. */ struct tmpfs_dir tn_dir; + /* Last given sequence number and their arena. */ + uint32_t tn_next_seq; + void * tn_seq_arena; + /* - * Number and pointer of the last directory entry - * returned by the readdir(3) operation. + * Pointer of the last directory entry returned + * by the readdir(3) operation. */ - off_t tn_readdir_lastn; struct tmpfs_dirent * tn_readdir_lastp; } tn_dir; @@ -196,6 +165,24 @@ typedef struct tmpfs_node { LIST_HEAD(tmpfs_node_list, tmpfs_node); +#define TMPFS_MAXNAMLEN 255 +/* Validate maximum td_namelen length. */ +CTASSERT(TMPFS_MAXNAMLEN < UINT16_MAX); + +/* + * Reserved values for the virtual entries (the first must be 0) and EOF. + * The start/end of the incremental range, see tmpfs_dir_getseq(). + */ +#define TMPFS_DIRSEQ_DOT 0 +#define TMPFS_DIRSEQ_DOTDOT 1 +#define TMPFS_DIRSEQ_EOF 2 + +#define TMPFS_DIRSEQ_START 3 /* inclusive */ +#define TMPFS_DIRSEQ_END (1U << 30) /* exclusive */ + +/* Mark to indicate that the number is not set. */ +#define TMPFS_DIRSEQ_NONE (1U << 31) + /* Status flags. */ #define TMPFS_NODE_ACCESSED 0x01 #define TMPFS_NODE_MODIFIED 0x02 @@ -270,15 +257,14 @@ int tmpfs_vnode_get(struct mount *, tmp int tmpfs_alloc_dirent(tmpfs_mount_t *, const char *, uint16_t, tmpfs_dirent_t **); void tmpfs_free_dirent(tmpfs_mount_t *, tmpfs_dirent_t *); -void tmpfs_dir_attach(vnode_t *, tmpfs_dirent_t *, tmpfs_node_t *); -void tmpfs_dir_detach(vnode_t *, tmpfs_dirent_t *); +void tmpfs_dir_attach(tmpfs_node_t *, tmpfs_dirent_t *, tmpfs_node_t *); +void tmpfs_dir_detach(tmpfs_node_t *, tmpfs_dirent_t *); tmpfs_dirent_t *tmpfs_dir_lookup(tmpfs_node_t *, struct componentname *); tmpfs_dirent_t *tmpfs_dir_cached(tmpfs_node_t *); -int tmpfs_dir_getdotdent(tmpfs_node_t *, struct uio *); -int tmpfs_dir_getdotdotdent(tmpfs_node_t *, struct uio *); -tmpfs_dirent_t *tmpfs_dir_lookupbycookie(tmpfs_node_t *, off_t); +uint32_t tmpfs_dir_getseq(tmpfs_node_t *, tmpfs_dirent_t *); +tmpfs_dirent_t *tmpfs_dir_lookupbyseq(tmpfs_node_t *, off_t); int tmpfs_dir_getdents(tmpfs_node_t *, struct uio *, off_t *); int tmpfs_reg_resize(vnode_t *, off_t); @@ -321,12 +307,10 @@ bool tmpfs_strname_neqlen(struct compon * Ensures that the node pointed by 'node' is a directory and that its * contents are consistent with respect to directories. */ -#define TMPFS_VALIDATE_DIR(node) \ +#define TMPFS_VALIDATE_DIR(node) \ + KASSERT((node)->tn_vnode == NULL || VOP_ISLOCKED((node)->tn_vnode)); \ KASSERT((node)->tn_type == VDIR); \ - KASSERT((node)->tn_size % sizeof(tmpfs_dirent_t) == 0); \ - KASSERT((node)->tn_spec.tn_dir.tn_readdir_lastp == NULL || \ - tmpfs_dircookie((node)->tn_spec.tn_dir.tn_readdir_lastp) == \ - (node)->tn_spec.tn_dir.tn_readdir_lastn); + KASSERT((node)->tn_size % sizeof(tmpfs_dirent_t) == 0); /* * Memory management stuff. Index: src/sys/fs/tmpfs/tmpfs_rename.c diff -u src/sys/fs/tmpfs/tmpfs_rename.c:1.4 src/sys/fs/tmpfs/tmpfs_rename.c:1.5 --- src/sys/fs/tmpfs/tmpfs_rename.c:1.4 Thu Sep 27 17:40:51 2012 +++ src/sys/fs/tmpfs/tmpfs_rename.c Fri Nov 8 15:44:23 2013 @@ -1,4 +1,4 @@ -/* $NetBSD: tmpfs_rename.c,v 1.4 2012/09/27 17:40:51 riastradh Exp $ */ +/* $NetBSD: tmpfs_rename.c,v 1.5 2013/11/08 15:44:23 rmind Exp $ */ /*- * Copyright (c) 2012 The NetBSD Foundation, Inc. @@ -34,7 +34,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: tmpfs_rename.c,v 1.4 2012/09/27 17:40:51 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tmpfs_rename.c,v 1.5 2013/11/08 15:44:23 rmind Exp $"); #include <sys/param.h> #include <sys/errno.h> @@ -313,8 +313,11 @@ tmpfs_gro_rename(struct mount *mp, kauth * source entry and reattach it to the target directory. */ if (fdvp != tdvp) { - tmpfs_dir_detach(fdvp, *fdep); - tmpfs_dir_attach(tdvp, *fdep, VP_TO_TMPFS_NODE(fvp)); + tmpfs_node_t *fdnode = VP_TO_TMPFS_DIR(fdvp); + tmpfs_node_t *tdnode = VP_TO_TMPFS_DIR(tdvp); + + tmpfs_dir_detach(fdnode, *fdep); + tmpfs_dir_attach(tdnode, *fdep, VP_TO_TMPFS_NODE(fvp)); } else if (tvp == NULL) { /* * We are changing the directory. tmpfs_dir_attach and @@ -331,6 +334,8 @@ tmpfs_gro_rename(struct mount *mp, kauth * XXX What if the target is a directory with whiteout entries? */ if (tvp != NULL) { + tmpfs_node_t *tdnode = VP_TO_TMPFS_DIR(tdvp); + KASSERT((*tdep) != NULL); KASSERT((*tdep)->td_node == VP_TO_TMPFS_NODE(tvp)); KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR)); @@ -349,7 +354,7 @@ tmpfs_gro_rename(struct mount *mp, kauth */ VP_TO_TMPFS_NODE(tvp)->tn_links--; } - tmpfs_dir_detach(tdvp, *tdep); + tmpfs_dir_detach(tdnode, *tdep); tmpfs_free_dirent(VFS_TO_TMPFS(mp), *tdep); } @@ -388,6 +393,7 @@ static int tmpfs_gro_remove(struct mount *mp, kauth_cred_t cred, struct vnode *dvp, struct componentname *cnp, void *de, struct vnode *vp) { + tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp); struct tmpfs_dirent **dep = de; (void)vp; @@ -404,7 +410,7 @@ tmpfs_gro_remove(struct mount *mp, kauth KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); - tmpfs_dir_detach(dvp, *dep); + tmpfs_dir_detach(dnode, *dep); tmpfs_free_dirent(VFS_TO_TMPFS(mp), *dep); return 0; Index: src/sys/fs/tmpfs/tmpfs_subr.c diff -u src/sys/fs/tmpfs/tmpfs_subr.c:1.82 src/sys/fs/tmpfs/tmpfs_subr.c:1.83 --- src/sys/fs/tmpfs/tmpfs_subr.c:1.82 Fri Nov 1 15:38:45 2013 +++ src/sys/fs/tmpfs/tmpfs_subr.c Fri Nov 8 15:44:23 2013 @@ -1,7 +1,7 @@ -/* $NetBSD: tmpfs_subr.c,v 1.82 2013/11/01 15:38:45 rmind Exp $ */ +/* $NetBSD: tmpfs_subr.c,v 1.83 2013/11/08 15:44:23 rmind Exp $ */ /* - * Copyright (c) 2005-2011 The NetBSD Foundation, Inc. + * Copyright (c) 2005-2013 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -74,7 +74,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.82 2013/11/01 15:38:45 rmind Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.83 2013/11/08 15:44:23 rmind Exp $"); #include <sys/param.h> #include <sys/dirent.h> @@ -98,6 +98,8 @@ __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c #include <fs/tmpfs/tmpfs_specops.h> #include <fs/tmpfs/tmpfs_vnops.h> +static void tmpfs_dir_putseq(tmpfs_node_t *, tmpfs_dirent_t *); + /* * tmpfs_alloc_node: allocate a new inode of a specified type and * insert it into the list of specified mount point. @@ -155,7 +157,8 @@ tmpfs_alloc_node(tmpfs_mount_t *tmp, enu /* Directory. */ TAILQ_INIT(&nnode->tn_spec.tn_dir.tn_dir); nnode->tn_spec.tn_dir.tn_parent = NULL; - nnode->tn_spec.tn_dir.tn_readdir_lastn = 0; + nnode->tn_spec.tn_dir.tn_seq_arena = NULL; + nnode->tn_spec.tn_dir.tn_next_seq = TMPFS_DIRSEQ_START; nnode->tn_spec.tn_dir.tn_readdir_lastp = NULL; /* Extra link count for the virtual '.' entry. */ @@ -240,11 +243,10 @@ tmpfs_free_node(tmpfs_mount_t *tmp, tmpf } break; case VDIR: - /* - * KASSERT(TAILQ_EMPTY(&node->tn_spec.tn_dir.tn_dir)); - * KASSERT(node->tn_spec.tn_dir.tn_parent == NULL || - * node == tmp->tm_root); - */ + KASSERT(node->tn_spec.tn_dir.tn_seq_arena == NULL); + KASSERT(TAILQ_EMPTY(&node->tn_spec.tn_dir.tn_dir)); + KASSERT(node->tn_spec.tn_dir.tn_parent == NULL || + node == tmp->tm_root); break; default: break; @@ -390,12 +392,12 @@ tmpfs_alloc_file(vnode_t *dvp, vnode_t * if (cnp->cn_flags & ISWHITEOUT) { wde = tmpfs_dir_lookup(dnode, cnp); KASSERT(wde != NULL && wde->td_node == TMPFS_NODE_WHITEOUT); - tmpfs_dir_detach(dvp, wde); + tmpfs_dir_detach(dnode, wde); tmpfs_free_dirent(tmp, wde); } /* Associate inode and attach the entry into the directory. */ - tmpfs_dir_attach(dvp, de, node); + tmpfs_dir_attach(dnode, de, node); /* Make node opaque if requested. */ if (cnp->cn_flags & ISWHITEOUT) @@ -426,6 +428,7 @@ tmpfs_alloc_dirent(tmpfs_mount_t *tmp, c } nde->td_namelen = len; memcpy(nde->td_name, name, len); + nde->td_seq = TMPFS_DIRSEQ_NONE; *de = nde; return 0; @@ -437,8 +440,8 @@ tmpfs_alloc_dirent(tmpfs_mount_t *tmp, c void tmpfs_free_dirent(tmpfs_mount_t *tmp, tmpfs_dirent_t *de) { - - /* KASSERT(de->td_node == NULL); */ + KASSERT(de->td_node == NULL); + KASSERT(de->td_seq == TMPFS_DIRSEQ_NONE); tmpfs_strname_free(tmp, de->td_name, de->td_namelen); tmpfs_dirent_put(tmp, de); } @@ -453,13 +456,18 @@ tmpfs_free_dirent(tmpfs_mount_t *tmp, tm * => Triggers kqueue events here. */ void -tmpfs_dir_attach(vnode_t *dvp, tmpfs_dirent_t *de, tmpfs_node_t *node) +tmpfs_dir_attach(tmpfs_node_t *dnode, tmpfs_dirent_t *de, tmpfs_node_t *node) { - tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp); + vnode_t *dvp = dnode->tn_vnode; int events = NOTE_WRITE; + KASSERT(dvp != NULL); KASSERT(VOP_ISLOCKED(dvp)); + /* Get a new sequence number. */ + KASSERT(de->td_seq == TMPFS_DIRSEQ_NONE); + de->td_seq = tmpfs_dir_getseq(dnode, de); + /* Associate directory entry and the inode. */ de->td_node = node; if (node != TMPFS_NODE_WHITEOUT) { @@ -498,28 +506,30 @@ tmpfs_dir_attach(vnode_t *dvp, tmpfs_dir * => Decreases link count on the associated node. * => Decreases the link count on directory node, if our node is VDIR. * => Triggers kqueue events here. + * + * => Note: dvp and vp may be NULL only if called by tmpfs_unmount(). */ void -tmpfs_dir_detach(vnode_t *dvp, tmpfs_dirent_t *de) +tmpfs_dir_detach(tmpfs_node_t *dnode, tmpfs_dirent_t *de) { - tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp); tmpfs_node_t *node = de->td_node; + vnode_t *vp, *dvp = dnode->tn_vnode; int events = NOTE_WRITE; - KASSERT(VOP_ISLOCKED(dvp)); - - if (node != TMPFS_NODE_WHITEOUT) { - vnode_t *vp = node->tn_vnode; - - KASSERT(VOP_ISLOCKED(vp)); + KASSERT(dvp == NULL || VOP_ISLOCKED(dvp)); + if (__predict_true(node != TMPFS_NODE_WHITEOUT)) { /* Deassociate the inode and entry. */ de->td_node = NULL; node->tn_dirent_hint = NULL; KASSERT(node->tn_links > 0); node->tn_links--; - VN_KNOTE(vp, node->tn_links ? NOTE_LINK : NOTE_DELETE); + + if ((vp = node->tn_vnode) != NULL) { + KASSERT(VOP_ISLOCKED(vp)); + VN_KNOTE(vp, node->tn_links ? NOTE_LINK : NOTE_DELETE); + } /* If directory - decrease the link count of parent. */ if (node->tn_type == VDIR) { @@ -534,15 +544,18 @@ tmpfs_dir_detach(vnode_t *dvp, tmpfs_dir /* Remove the entry from the directory. */ if (dnode->tn_spec.tn_dir.tn_readdir_lastp == de) { - dnode->tn_spec.tn_dir.tn_readdir_lastn = 0; dnode->tn_spec.tn_dir.tn_readdir_lastp = NULL; } TAILQ_REMOVE(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries); dnode->tn_size -= sizeof(tmpfs_dirent_t); dnode->tn_status |= TMPFS_NODE_STATUSALL; - uvm_vnp_setsize(dvp, dnode->tn_size); - VN_KNOTE(dvp, events); + tmpfs_dir_putseq(dnode, de); + + if (dvp) { + uvm_vnp_setsize(dvp, dnode->tn_size); + VN_KNOTE(dvp, events); + } } /* @@ -576,7 +589,7 @@ tmpfs_dir_lookup(tmpfs_node_t *node, str /* * tmpfs_dir_cached: get a cached directory entry if it is valid. Used to - * avoid unnecessary tmpds_dir_lookup(). + * avoid unnecessary tmpfs_dir_lookup(). * * => The vnode must be locked. */ @@ -600,103 +613,162 @@ tmpfs_dir_cached(tmpfs_node_t *node) } /* - * tmpfs_dir_getdotdent: helper function for tmpfs_readdir. Creates a - * '.' entry for the given directory and returns it in the uio space. + * tmpfs_dir_getseq: get a per-directory sequence number for the entry. + * + * => Shall not be larger than 2^31 for linux32 compatibility. */ -int -tmpfs_dir_getdotdent(tmpfs_node_t *node, struct uio *uio) +uint32_t +tmpfs_dir_getseq(tmpfs_node_t *dnode, tmpfs_dirent_t *de) { - struct dirent *dentp; + uint32_t seq = de->td_seq; + vmem_t *seq_arena; + vmem_addr_t off; int error; - TMPFS_VALIDATE_DIR(node); - KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); + TMPFS_VALIDATE_DIR(dnode); - dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP); - dentp->d_fileno = node->tn_id; - dentp->d_type = DT_DIR; - dentp->d_namlen = 1; - dentp->d_name[0] = '.'; - dentp->d_name[1] = '\0'; - dentp->d_reclen = _DIRENT_SIZE(dentp); - - if (dentp->d_reclen > uio->uio_resid) - error = -1; - else { - error = uiomove(dentp, dentp->d_reclen, uio); - if (error == 0) - uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT; + if (__predict_true(seq != TMPFS_DIRSEQ_NONE)) { + /* Already set. */ + KASSERT(seq >= TMPFS_DIRSEQ_START); + return seq; + } + + /* + * The "." and ".." and the end-of-directory have reserved numbers. + * The other sequence numbers are allocated as following: + * + * - The first half of the 2^31 is assigned incrementally. + * + * - If that range is exceeded, then the second half of 2^31 + * is used, but managed by vmem(9). + */ + + seq = dnode->tn_spec.tn_dir.tn_next_seq; + KASSERT(seq >= TMPFS_DIRSEQ_START); + + if (__predict_true(seq < TMPFS_DIRSEQ_END)) { + /* First half: just increment and return. */ + dnode->tn_spec.tn_dir.tn_next_seq++; + return seq; + } + + /* + * First half exceeded, use the second half. May need to create + * vmem(9) arena for the directory first. + */ + if ((seq_arena = dnode->tn_spec.tn_dir.tn_seq_arena) == NULL) { + seq_arena = vmem_create("tmpfscoo", 0, + TMPFS_DIRSEQ_END - 1, 1, NULL, NULL, NULL, 0, + VM_SLEEP, IPL_NONE); + dnode->tn_spec.tn_dir.tn_seq_arena = seq_arena; + KASSERT(seq_arena != NULL); + } + error = vmem_alloc(seq_arena, 1, VM_SLEEP | VM_BESTFIT, &off); + KASSERT(error == 0); + + KASSERT(off < TMPFS_DIRSEQ_END); + seq = off | TMPFS_DIRSEQ_END; + return seq; +} + +static void +tmpfs_dir_putseq(tmpfs_node_t *dnode, tmpfs_dirent_t *de) +{ + vmem_t *seq_arena = dnode->tn_spec.tn_dir.tn_seq_arena; + uint32_t seq = de->td_seq; + + TMPFS_VALIDATE_DIR(dnode); + + if (seq == TMPFS_DIRSEQ_NONE || seq < TMPFS_DIRSEQ_END) { + /* First half (or no sequence number set yet). */ + KASSERT(de->td_seq >= TMPFS_DIRSEQ_START); + } else { + /* Second half. */ + KASSERT(seq_arena != NULL); + KASSERT(seq >= TMPFS_DIRSEQ_END); + seq &= ~TMPFS_DIRSEQ_END; + vmem_free(seq_arena, seq, 1); + } + de->td_seq = TMPFS_DIRSEQ_NONE; + + /* Empty? We can reset. */ + if (seq_arena && dnode->tn_size == 0) { + dnode->tn_spec.tn_dir.tn_seq_arena = NULL; + dnode->tn_spec.tn_dir.tn_next_seq = TMPFS_DIRSEQ_START; + vmem_destroy(seq_arena); } - node->tn_status |= TMPFS_NODE_ACCESSED; - kmem_free(dentp, sizeof(struct dirent)); - return error; } /* - * tmpfs_dir_getdotdotdent: helper function for tmpfs_readdir. Creates a - * '..' entry for the given directory and returns it in the uio space. + * tmpfs_dir_lookupbyseq: lookup a directory entry by the sequence number. */ -int -tmpfs_dir_getdotdotdent(tmpfs_node_t *node, struct uio *uio) +tmpfs_dirent_t * +tmpfs_dir_lookupbyseq(tmpfs_node_t *node, off_t seq) { - struct dirent *dentp; - int error; + tmpfs_dirent_t *de = node->tn_spec.tn_dir.tn_readdir_lastp; TMPFS_VALIDATE_DIR(node); - KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); - - dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP); - dentp->d_fileno = node->tn_spec.tn_dir.tn_parent->tn_id; - dentp->d_type = DT_DIR; - dentp->d_namlen = 2; - dentp->d_name[0] = '.'; - dentp->d_name[1] = '.'; - dentp->d_name[2] = '\0'; - dentp->d_reclen = _DIRENT_SIZE(dentp); - - if (dentp->d_reclen > uio->uio_resid) - error = -1; - else { - error = uiomove(dentp, dentp->d_reclen, uio); - if (error == 0) { - tmpfs_dirent_t *de; - de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir); - if (de == NULL) - uio->uio_offset = TMPFS_DIRCOOKIE_EOF; - else - uio->uio_offset = tmpfs_dircookie(de); - } + /* + * First, check the cache. If does not match - perform a lookup. + */ + if (de && de->td_seq == seq) { + KASSERT(de->td_seq >= TMPFS_DIRSEQ_START); + KASSERT(de->td_seq != TMPFS_DIRSEQ_NONE); + return de; } - node->tn_status |= TMPFS_NODE_ACCESSED; - kmem_free(dentp, sizeof(struct dirent)); - return error; + TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) { + KASSERT(de->td_seq >= TMPFS_DIRSEQ_START); + KASSERT(de->td_seq != TMPFS_DIRSEQ_NONE); + if (de->td_seq == seq) + return de; + } + return NULL; } /* - * tmpfs_dir_lookupbycookie: lookup a directory entry by associated cookie. + * tmpfs_dir_getdotents: helper function for tmpfs_readdir() to get the + * dot meta entries, that is, "." or "..". Copy it to the UIO space. */ -tmpfs_dirent_t * -tmpfs_dir_lookupbycookie(tmpfs_node_t *node, off_t cookie) +static int +tmpfs_dir_getdotents(tmpfs_node_t *node, struct dirent *dp, struct uio *uio) { tmpfs_dirent_t *de; + off_t next = 0; + int error; - KASSERT(VOP_ISLOCKED(node->tn_vnode)); + dp->d_fileno = node->tn_id; + dp->d_type = DT_DIR; - if (cookie == node->tn_spec.tn_dir.tn_readdir_lastn && - node->tn_spec.tn_dir.tn_readdir_lastp != NULL) { - return node->tn_spec.tn_dir.tn_readdir_lastp; + switch (uio->uio_offset) { + case TMPFS_DIRSEQ_DOT: + strlcpy(dp->d_name, ".", sizeof(dp->d_name)); + next = TMPFS_DIRSEQ_DOTDOT; + break; + case TMPFS_DIRSEQ_DOTDOT: + strlcpy(dp->d_name, "..", sizeof(dp->d_name)); + de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir); + next = de ? tmpfs_dir_getseq(node, de) : TMPFS_DIRSEQ_EOF; + break; + default: + KASSERT(false); } - TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) { - if (tmpfs_dircookie(de) == cookie) { - break; - } + dp->d_namlen = strlen(dp->d_name); + dp->d_reclen = _DIRENT_SIZE(dp); + + if (dp->d_reclen > uio->uio_resid) { + return EJUSTRETURN; } - return de; + if ((error = uiomove(dp, dp->d_reclen, uio)) != 0) { + return error; + } + + uio->uio_offset = next; + return error; } /* - * tmpfs_dir_getdents: relper function for tmpfs_readdir. + * tmpfs_dir_getdents: helper function for tmpfs_readdir. * * => Returns as much directory entries as can fit in the uio space. * => The read starts at uio->uio_offset. @@ -706,69 +778,53 @@ tmpfs_dir_getdents(tmpfs_node_t *node, s { tmpfs_dirent_t *de; struct dirent *dentp; - off_t startcookie; - int error; + int error = 0; KASSERT(VOP_ISLOCKED(node->tn_vnode)); TMPFS_VALIDATE_DIR(node); /* - * Locate the first directory entry we have to return. We have cached - * the last readdir in the node, so use those values if appropriate. - * Otherwise do a linear scan to find the requested entry. + * Allocate struct dirent and first check for the "." and "..". + * Note: tmpfs_dir_getdotents() will "seek" for us. */ - startcookie = uio->uio_offset; - KASSERT(startcookie != TMPFS_DIRCOOKIE_DOT); - KASSERT(startcookie != TMPFS_DIRCOOKIE_DOTDOT); - if (startcookie == TMPFS_DIRCOOKIE_EOF) { - return 0; - } else { - de = tmpfs_dir_lookupbycookie(node, startcookie); + dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP); + + if (uio->uio_offset == TMPFS_DIRSEQ_DOT) { + if ((error = tmpfs_dir_getdotents(node, dentp, uio)) != 0) { + goto done; + } + (*cntp)++; + } + if (uio->uio_offset == TMPFS_DIRSEQ_DOTDOT) { + if ((error = tmpfs_dir_getdotents(node, dentp, uio)) != 0) { + goto done; + } + (*cntp)++; } + + /* Done if we reached the end. */ + if (uio->uio_offset == TMPFS_DIRSEQ_EOF) { + goto done; + } + + /* Locate the directory entry given by the given sequence number. */ + de = tmpfs_dir_lookupbyseq(node, uio->uio_offset); if (de == NULL) { - return EINVAL; + error = EINVAL; + goto done; } /* - * Read as much entries as possible; i.e., until we reach the end - * of the directory or we exhaust uio space. + * Read as many entries as possible; i.e., until we reach the end + * of the directory or we exhaust UIO space. */ - dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP); do { - /* - * Create a dirent structure representing the current - * inode and fill it. - */ if (de->td_node == TMPFS_NODE_WHITEOUT) { dentp->d_fileno = 1; dentp->d_type = DT_WHT; } else { dentp->d_fileno = de->td_node->tn_id; - switch (de->td_node->tn_type) { - case VBLK: - dentp->d_type = DT_BLK; - break; - case VCHR: - dentp->d_type = DT_CHR; - break; - case VDIR: - dentp->d_type = DT_DIR; - break; - case VFIFO: - dentp->d_type = DT_FIFO; - break; - case VLNK: - dentp->d_type = DT_LNK; - break; - case VREG: - dentp->d_type = DT_REG; - break; - case VSOCK: - dentp->d_type = DT_SOCK; - break; - default: - KASSERT(false); - } + dentp->d_type = vtype2dt(de->td_node->tn_type); } dentp->d_namlen = de->td_namelen; KASSERT(de->td_namelen < sizeof(dentp->d_name)); @@ -776,35 +832,34 @@ tmpfs_dir_getdents(tmpfs_node_t *node, s dentp->d_name[de->td_namelen] = '\0'; dentp->d_reclen = _DIRENT_SIZE(dentp); - /* Stop reading if the directory entry we are treating is - * bigger than the amount of data that can be returned. */ if (dentp->d_reclen > uio->uio_resid) { - error = -1; + /* Exhausted UIO space. */ + error = EJUSTRETURN; break; } - /* - * Copy the new dirent structure into the output buffer and - * advance pointers. - */ + /* Copy out the directory entry and continue. */ error = uiomove(dentp, dentp->d_reclen, uio); - + if (error) { + break; + } (*cntp)++; de = TAILQ_NEXT(de, td_entries); - } while (error == 0 && uio->uio_resid > 0 && de != NULL); - /* Update the offset and cache. */ - if (de == NULL) { - uio->uio_offset = TMPFS_DIRCOOKIE_EOF; - node->tn_spec.tn_dir.tn_readdir_lastn = 0; - node->tn_spec.tn_dir.tn_readdir_lastp = NULL; - } else { - node->tn_spec.tn_dir.tn_readdir_lastn = uio->uio_offset = - tmpfs_dircookie(de); - node->tn_spec.tn_dir.tn_readdir_lastp = de; - } + } while (uio->uio_resid > 0 && de); + + /* Cache the last entry or clear and mark EOF. */ + uio->uio_offset = de ? tmpfs_dir_getseq(node, de) : TMPFS_DIRSEQ_EOF; + node->tn_spec.tn_dir.tn_readdir_lastp = de; +done: node->tn_status |= TMPFS_NODE_ACCESSED; kmem_free(dentp, sizeof(struct dirent)); + + if (error == EJUSTRETURN) { + /* Exhausted UIO space - just return. */ + error = 0; + } + KASSERT(error >= 0); return error; } Index: src/sys/fs/tmpfs/tmpfs_vfsops.c diff -u src/sys/fs/tmpfs/tmpfs_vfsops.c:1.52 src/sys/fs/tmpfs/tmpfs_vfsops.c:1.53 --- src/sys/fs/tmpfs/tmpfs_vfsops.c:1.52 Tue Sep 27 01:10:43 2011 +++ src/sys/fs/tmpfs/tmpfs_vfsops.c Fri Nov 8 15:44:23 2013 @@ -1,4 +1,4 @@ -/* $NetBSD: tmpfs_vfsops.c,v 1.52 2011/09/27 01:10:43 christos Exp $ */ +/* $NetBSD: tmpfs_vfsops.c,v 1.53 2013/11/08 15:44:23 rmind Exp $ */ /* * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc. @@ -42,7 +42,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: tmpfs_vfsops.c,v 1.52 2011/09/27 01:10:43 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tmpfs_vfsops.c,v 1.53 2013/11/08 15:44:23 rmind Exp $"); #include <sys/param.h> #include <sys/types.h> @@ -205,8 +205,8 @@ tmpfs_start(struct mount *mp, int flags) static int tmpfs_unmount(struct mount *mp, int mntflags) { - tmpfs_mount_t *tmp; - tmpfs_node_t *node; + tmpfs_mount_t *tmp = VFS_TO_TMPFS(mp); + tmpfs_node_t *node, *cnode; int error, flags = 0; /* Handle forced unmounts. */ @@ -218,25 +218,28 @@ tmpfs_unmount(struct mount *mp, int mntf if (error != 0) return error; - tmp = VFS_TO_TMPFS(mp); - - /* Destroy any existing inodes. */ - while ((node = LIST_FIRST(&tmp->tm_nodes)) != NULL) { - if (node->tn_type == VDIR) { - tmpfs_dirent_t *de; + /* + * First round, detach and destroy all directory entries. + * Also, clear the pointers to the vnodes - they are gone. + */ + LIST_FOREACH(node, &tmp->tm_nodes, tn_entries) { + tmpfs_dirent_t *de; - /* Destroy any directory entries. */ - de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir); - while (de != NULL) { - tmpfs_dirent_t *nde; - - nde = TAILQ_NEXT(de, td_entries); - tmpfs_free_dirent(tmp, de); - node->tn_size -= sizeof(tmpfs_dirent_t); - de = nde; + node->tn_vnode = NULL; + if (node->tn_type != VDIR) { + continue; + } + while ((de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir)) != NULL) { + if ((cnode = de->td_node) != NULL) { + cnode->tn_vnode = NULL; } + tmpfs_dir_detach(node, de); + tmpfs_free_dirent(tmp, de); } - /* Removes inode from the list. */ + } + + /* Second round, destroy all inodes. */ + while ((node = LIST_FIRST(&tmp->tm_nodes)) != NULL) { tmpfs_free_node(tmp, node); } Index: src/sys/fs/tmpfs/tmpfs_vnops.c diff -u src/sys/fs/tmpfs/tmpfs_vnops.c:1.105 src/sys/fs/tmpfs/tmpfs_vnops.c:1.106 --- src/sys/fs/tmpfs/tmpfs_vnops.c:1.105 Fri Nov 1 15:38:45 2013 +++ src/sys/fs/tmpfs/tmpfs_vnops.c Fri Nov 8 15:44:23 2013 @@ -1,4 +1,4 @@ -/* $NetBSD: tmpfs_vnops.c,v 1.105 2013/11/01 15:38:45 rmind Exp $ */ +/* $NetBSD: tmpfs_vnops.c,v 1.106 2013/11/08 15:44:23 rmind Exp $ */ /* * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc. @@ -35,7 +35,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.105 2013/11/01 15:38:45 rmind Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.106 2013/11/08 15:44:23 rmind Exp $"); #include <sys/param.h> #include <sys/dirent.h> @@ -710,9 +710,9 @@ tmpfs_remove(void *v) * Note: the inode referred by it will not be destroyed * until the vnode is reclaimed/recycled. */ - tmpfs_dir_detach(dvp, de); + tmpfs_dir_detach(dnode, de); if (ap->a_cnp->cn_flags & DOWHITEOUT) - tmpfs_dir_attach(dvp, de, TMPFS_NODE_WHITEOUT); + tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT); else tmpfs_free_dirent(VFS_TO_TMPFS(vp->v_mount), de); @@ -747,7 +747,7 @@ tmpfs_link(void *v) vnode_t *dvp = ap->a_dvp; vnode_t *vp = ap->a_vp; struct componentname *cnp = ap->a_cnp; - tmpfs_node_t *node; + tmpfs_node_t *dnode, *node; tmpfs_dirent_t *de; int error; @@ -756,6 +756,7 @@ tmpfs_link(void *v) KASSERT(vp->v_type != VDIR); KASSERT(dvp->v_mount == vp->v_mount); + dnode = VP_TO_TMPFS_DIR(dvp); node = VP_TO_TMPFS_NODE(vp); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); @@ -780,11 +781,11 @@ tmpfs_link(void *v) goto out; } - /* + /* * Insert the entry into the directory. * It will increase the inode link count. */ - tmpfs_dir_attach(dvp, de, node); + tmpfs_dir_attach(dnode, de, node); /* Update the timestamps and trigger the event. */ if (node->tn_vnode) { @@ -872,7 +873,7 @@ tmpfs_rmdir(void *v) node->tn_status |= TMPFS_NODE_STATUSALL; /* Detach the directory entry from the directory. */ - tmpfs_dir_detach(dvp, de); + tmpfs_dir_detach(dnode, de); /* Purge the cache for parent. */ cache_purge(dvp); @@ -883,14 +884,14 @@ tmpfs_rmdir(void *v) * until the vnode is reclaimed. */ if (ap->a_cnp->cn_flags & DOWHITEOUT) - tmpfs_dir_attach(dvp, de, TMPFS_NODE_WHITEOUT); + tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT); else tmpfs_free_dirent(tmp, de); /* Destroy the whiteout entries from the node. */ while ((de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir)) != NULL) { KASSERT(de->td_node == TMPFS_NODE_WHITEOUT); - tmpfs_dir_detach(vp, de); + tmpfs_dir_detach(node, de); tmpfs_free_dirent(tmp, de); } @@ -951,68 +952,49 @@ tmpfs_readdir(void *v) node = VP_TO_TMPFS_DIR(vp); startoff = uio->uio_offset; cnt = 0; - if (node->tn_links == 0) { - error = 0; - goto out; - } - if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) { - error = tmpfs_dir_getdotdent(node, uio); - if (error != 0) { - if (error == -1) - error = 0; - goto out; - } - cnt++; - } - if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) { - error = tmpfs_dir_getdotdotdent(node, uio); - if (error != 0) { - if (error == -1) - error = 0; - goto out; - } - cnt++; - } - error = tmpfs_dir_getdents(node, uio, &cnt); - if (error == -1) { + /* + * Retrieve the directory entries, unless it is being destroyed. + */ + if (node->tn_links) { + error = tmpfs_dir_getdents(node, uio, &cnt); + } else { error = 0; } - KASSERT(error >= 0); -out: + if (eofflag != NULL) { - *eofflag = (!error && uio->uio_offset == TMPFS_DIRCOOKIE_EOF); + *eofflag = !error && uio->uio_offset == TMPFS_DIRSEQ_EOF; } if (error || cookies == NULL || ncookies == NULL) { return error; } /* Update NFS-related variables, if any. */ - off_t i, off = startoff; tmpfs_dirent_t *de = NULL; + off_t i, off = startoff; *cookies = malloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK); *ncookies = cnt; for (i = 0; i < cnt; i++) { - KASSERT(off != TMPFS_DIRCOOKIE_EOF); - if (off != TMPFS_DIRCOOKIE_DOT) { - if (off == TMPFS_DIRCOOKIE_DOTDOT) { + KASSERT(off != TMPFS_DIRSEQ_EOF); + if (off != TMPFS_DIRSEQ_DOT) { + if (off == TMPFS_DIRSEQ_DOTDOT) { de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir); } else if (de != NULL) { de = TAILQ_NEXT(de, td_entries); } else { - de = tmpfs_dir_lookupbycookie(node, off); + de = tmpfs_dir_lookupbyseq(node, off); KASSERT(de != NULL); de = TAILQ_NEXT(de, td_entries); } if (de == NULL) { - off = TMPFS_DIRCOOKIE_EOF; + off = TMPFS_DIRSEQ_EOF; } else { - off = tmpfs_dircookie(de); + off = tmpfs_dir_getseq(node, de); } } else { - off = TMPFS_DIRCOOKIE_DOTDOT; + off = TMPFS_DIRSEQ_DOTDOT; } (*cookies)[i] = off; } @@ -1284,6 +1266,7 @@ tmpfs_whiteout(void *v) struct componentname *cnp = ap->a_cnp; const int flags = ap->a_flags; tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount); + tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp); tmpfs_dirent_t *de; int error; @@ -1295,14 +1278,14 @@ tmpfs_whiteout(void *v) cnp->cn_namelen, &de); if (error) return error; - tmpfs_dir_attach(dvp, de, TMPFS_NODE_WHITEOUT); + tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT); break; case DELETE: cnp->cn_flags &= ~DOWHITEOUT; /* when in doubt, cargo cult */ - de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), cnp); + de = tmpfs_dir_lookup(dnode, cnp); if (de == NULL) return ENOENT; - tmpfs_dir_detach(dvp, de); + tmpfs_dir_detach(dnode, de); tmpfs_free_dirent(tmp, de); break; }