Module Name: src
Committed By: rmind
Date: Fri Nov 8 15:44:23 UTC 2013
Modified Files:
src/sys/fs/tmpfs: tmpfs.h tmpfs_rename.c tmpfs_subr.c tmpfs_vfsops.c
tmpfs_vnops.c
Log Message:
tmpfs: replace the broken tmpfs_dircookie() logic which uses the node
address truncated to 31 bits (required for 32-bit readdir compatibility,
e.g. linux32). Instead, assign 2^31 range using the following logic:
- The first half of the 2^31 is assigned incrementally (the fast path).
- When exceeded, use the second half of 2^31, but manage with vmem(9).
It will require 2 billion files per-directory to trigger vmem(9) usage.
Also, while here, add some fixes for tmpfs_unmount().
Should fix PR/47739, PR/47480, PR/46088 and PR/41068.
Thanks to wiz@ for stress testing.
To generate a diff of this commit:
cvs rdiff -u -r1.45 -r1.46 src/sys/fs/tmpfs/tmpfs.h
cvs rdiff -u -r1.4 -r1.5 src/sys/fs/tmpfs/tmpfs_rename.c
cvs rdiff -u -r1.82 -r1.83 src/sys/fs/tmpfs/tmpfs_subr.c
cvs rdiff -u -r1.52 -r1.53 src/sys/fs/tmpfs/tmpfs_vfsops.c
cvs rdiff -u -r1.105 -r1.106 src/sys/fs/tmpfs/tmpfs_vnops.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/fs/tmpfs/tmpfs.h
diff -u src/sys/fs/tmpfs/tmpfs.h:1.45 src/sys/fs/tmpfs/tmpfs.h:1.46
--- src/sys/fs/tmpfs/tmpfs.h:1.45 Tue Sep 27 01:10:43 2011
+++ src/sys/fs/tmpfs/tmpfs.h Fri Nov 8 15:44:23 2013
@@ -1,4 +1,4 @@
-/* $NetBSD: tmpfs.h,v 1.45 2011/09/27 01:10:43 christos Exp $ */
+/* $NetBSD: tmpfs.h,v 1.46 2013/11/08 15:44:23 rmind Exp $ */
/*
* Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
@@ -54,6 +54,9 @@ typedef struct tmpfs_dirent {
/* Pointer to the inode this entry refers to. */
struct tmpfs_node * td_node;
+ /* Sequence number, see tmpfs_dir_getseq(). */
+ uint32_t td_seq;
+
/* Name and its length. */
char * td_name;
uint16_t td_namelen;
@@ -61,47 +64,10 @@ typedef struct tmpfs_dirent {
TAILQ_HEAD(tmpfs_dir, tmpfs_dirent);
-#if defined(_KERNEL)
-
-#define TMPFS_MAXNAMLEN 255
-/* Validate maximum td_namelen length. */
-CTASSERT(TMPFS_MAXNAMLEN < UINT16_MAX);
-
-#define TMPFS_DIRCOOKIE_DOT 0
-#define TMPFS_DIRCOOKIE_DOTDOT 1
-#define TMPFS_DIRCOOKIE_EOF 2
-
-/*
- * Each entry in a directory has a cookie that identifies it. Cookies
- * supersede offsets within directories, as tmpfs has no offsets as such.
- *
- * The '.', '..' and the end of directory markers have fixed cookies,
- * which cannot collide with the cookies generated by other entries.
- *
- * The cookies for the other entries are generated based on the memory
- * address of their representative meta-data structure.
- *
- * XXX: Truncating directory cookies to 31 bits now - workaround for
- * problem with Linux compat, see PR/32034.
- */
-static inline off_t
-tmpfs_dircookie(tmpfs_dirent_t *de)
-{
- off_t cookie;
-
- cookie = ((off_t)(uintptr_t)de >> 1) & 0x7FFFFFFF;
- KASSERT(cookie != TMPFS_DIRCOOKIE_DOT);
- KASSERT(cookie != TMPFS_DIRCOOKIE_DOTDOT);
- KASSERT(cookie != TMPFS_DIRCOOKIE_EOF);
-
- return cookie;
-}
-#endif
-
/*
* Internal representation of a tmpfs file system node -- inode.
*
- * This structure is splitted in two parts: one holds attributes common
+ * This structure is split in two parts: one holds attributes common
* to all file types and the other holds data that is only applicable to
* a particular type.
*
@@ -169,11 +135,14 @@ typedef struct tmpfs_node {
/* List of directory entries. */
struct tmpfs_dir tn_dir;
+ /* Last given sequence number and their arena. */
+ uint32_t tn_next_seq;
+ void * tn_seq_arena;
+
/*
- * Number and pointer of the last directory entry
- * returned by the readdir(3) operation.
+ * Pointer of the last directory entry returned
+ * by the readdir(3) operation.
*/
- off_t tn_readdir_lastn;
struct tmpfs_dirent * tn_readdir_lastp;
} tn_dir;
@@ -196,6 +165,24 @@ typedef struct tmpfs_node {
LIST_HEAD(tmpfs_node_list, tmpfs_node);
+#define TMPFS_MAXNAMLEN 255
+/* Validate maximum td_namelen length. */
+CTASSERT(TMPFS_MAXNAMLEN < UINT16_MAX);
+
+/*
+ * Reserved values for the virtual entries (the first must be 0) and EOF.
+ * The start/end of the incremental range, see tmpfs_dir_getseq().
+ */
+#define TMPFS_DIRSEQ_DOT 0
+#define TMPFS_DIRSEQ_DOTDOT 1
+#define TMPFS_DIRSEQ_EOF 2
+
+#define TMPFS_DIRSEQ_START 3 /* inclusive */
+#define TMPFS_DIRSEQ_END (1U << 30) /* exclusive */
+
+/* Mark to indicate that the number is not set. */
+#define TMPFS_DIRSEQ_NONE (1U << 31)
+
/* Status flags. */
#define TMPFS_NODE_ACCESSED 0x01
#define TMPFS_NODE_MODIFIED 0x02
@@ -270,15 +257,14 @@ int tmpfs_vnode_get(struct mount *, tmp
int tmpfs_alloc_dirent(tmpfs_mount_t *, const char *, uint16_t,
tmpfs_dirent_t **);
void tmpfs_free_dirent(tmpfs_mount_t *, tmpfs_dirent_t *);
-void tmpfs_dir_attach(vnode_t *, tmpfs_dirent_t *, tmpfs_node_t *);
-void tmpfs_dir_detach(vnode_t *, tmpfs_dirent_t *);
+void tmpfs_dir_attach(tmpfs_node_t *, tmpfs_dirent_t *, tmpfs_node_t *);
+void tmpfs_dir_detach(tmpfs_node_t *, tmpfs_dirent_t *);
tmpfs_dirent_t *tmpfs_dir_lookup(tmpfs_node_t *, struct componentname *);
tmpfs_dirent_t *tmpfs_dir_cached(tmpfs_node_t *);
-int tmpfs_dir_getdotdent(tmpfs_node_t *, struct uio *);
-int tmpfs_dir_getdotdotdent(tmpfs_node_t *, struct uio *);
-tmpfs_dirent_t *tmpfs_dir_lookupbycookie(tmpfs_node_t *, off_t);
+uint32_t tmpfs_dir_getseq(tmpfs_node_t *, tmpfs_dirent_t *);
+tmpfs_dirent_t *tmpfs_dir_lookupbyseq(tmpfs_node_t *, off_t);
int tmpfs_dir_getdents(tmpfs_node_t *, struct uio *, off_t *);
int tmpfs_reg_resize(vnode_t *, off_t);
@@ -321,12 +307,10 @@ bool tmpfs_strname_neqlen(struct compon
* Ensures that the node pointed by 'node' is a directory and that its
* contents are consistent with respect to directories.
*/
-#define TMPFS_VALIDATE_DIR(node) \
+#define TMPFS_VALIDATE_DIR(node) \
+ KASSERT((node)->tn_vnode == NULL || VOP_ISLOCKED((node)->tn_vnode)); \
KASSERT((node)->tn_type == VDIR); \
- KASSERT((node)->tn_size % sizeof(tmpfs_dirent_t) == 0); \
- KASSERT((node)->tn_spec.tn_dir.tn_readdir_lastp == NULL || \
- tmpfs_dircookie((node)->tn_spec.tn_dir.tn_readdir_lastp) == \
- (node)->tn_spec.tn_dir.tn_readdir_lastn);
+ KASSERT((node)->tn_size % sizeof(tmpfs_dirent_t) == 0);
/*
* Memory management stuff.
Index: src/sys/fs/tmpfs/tmpfs_rename.c
diff -u src/sys/fs/tmpfs/tmpfs_rename.c:1.4 src/sys/fs/tmpfs/tmpfs_rename.c:1.5
--- src/sys/fs/tmpfs/tmpfs_rename.c:1.4 Thu Sep 27 17:40:51 2012
+++ src/sys/fs/tmpfs/tmpfs_rename.c Fri Nov 8 15:44:23 2013
@@ -1,4 +1,4 @@
-/* $NetBSD: tmpfs_rename.c,v 1.4 2012/09/27 17:40:51 riastradh Exp $ */
+/* $NetBSD: tmpfs_rename.c,v 1.5 2013/11/08 15:44:23 rmind Exp $ */
/*-
* Copyright (c) 2012 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tmpfs_rename.c,v 1.4 2012/09/27 17:40:51 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tmpfs_rename.c,v 1.5 2013/11/08 15:44:23 rmind Exp $");
#include <sys/param.h>
#include <sys/errno.h>
@@ -313,8 +313,11 @@ tmpfs_gro_rename(struct mount *mp, kauth
* source entry and reattach it to the target directory.
*/
if (fdvp != tdvp) {
- tmpfs_dir_detach(fdvp, *fdep);
- tmpfs_dir_attach(tdvp, *fdep, VP_TO_TMPFS_NODE(fvp));
+ tmpfs_node_t *fdnode = VP_TO_TMPFS_DIR(fdvp);
+ tmpfs_node_t *tdnode = VP_TO_TMPFS_DIR(tdvp);
+
+ tmpfs_dir_detach(fdnode, *fdep);
+ tmpfs_dir_attach(tdnode, *fdep, VP_TO_TMPFS_NODE(fvp));
} else if (tvp == NULL) {
/*
* We are changing the directory. tmpfs_dir_attach and
@@ -331,6 +334,8 @@ tmpfs_gro_rename(struct mount *mp, kauth
* XXX What if the target is a directory with whiteout entries?
*/
if (tvp != NULL) {
+ tmpfs_node_t *tdnode = VP_TO_TMPFS_DIR(tdvp);
+
KASSERT((*tdep) != NULL);
KASSERT((*tdep)->td_node == VP_TO_TMPFS_NODE(tvp));
KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR));
@@ -349,7 +354,7 @@ tmpfs_gro_rename(struct mount *mp, kauth
*/
VP_TO_TMPFS_NODE(tvp)->tn_links--;
}
- tmpfs_dir_detach(tdvp, *tdep);
+ tmpfs_dir_detach(tdnode, *tdep);
tmpfs_free_dirent(VFS_TO_TMPFS(mp), *tdep);
}
@@ -388,6 +393,7 @@ static int
tmpfs_gro_remove(struct mount *mp, kauth_cred_t cred,
struct vnode *dvp, struct componentname *cnp, void *de, struct vnode *vp)
{
+ tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
struct tmpfs_dirent **dep = de;
(void)vp;
@@ -404,7 +410,7 @@ tmpfs_gro_remove(struct mount *mp, kauth
KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
- tmpfs_dir_detach(dvp, *dep);
+ tmpfs_dir_detach(dnode, *dep);
tmpfs_free_dirent(VFS_TO_TMPFS(mp), *dep);
return 0;
Index: src/sys/fs/tmpfs/tmpfs_subr.c
diff -u src/sys/fs/tmpfs/tmpfs_subr.c:1.82 src/sys/fs/tmpfs/tmpfs_subr.c:1.83
--- src/sys/fs/tmpfs/tmpfs_subr.c:1.82 Fri Nov 1 15:38:45 2013
+++ src/sys/fs/tmpfs/tmpfs_subr.c Fri Nov 8 15:44:23 2013
@@ -1,7 +1,7 @@
-/* $NetBSD: tmpfs_subr.c,v 1.82 2013/11/01 15:38:45 rmind Exp $ */
+/* $NetBSD: tmpfs_subr.c,v 1.83 2013/11/08 15:44:23 rmind Exp $ */
/*
- * Copyright (c) 2005-2011 The NetBSD Foundation, Inc.
+ * Copyright (c) 2005-2013 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -74,7 +74,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.82 2013/11/01 15:38:45 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.83 2013/11/08 15:44:23 rmind Exp $");
#include <sys/param.h>
#include <sys/dirent.h>
@@ -98,6 +98,8 @@ __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c
#include <fs/tmpfs/tmpfs_specops.h>
#include <fs/tmpfs/tmpfs_vnops.h>
+static void tmpfs_dir_putseq(tmpfs_node_t *, tmpfs_dirent_t *);
+
/*
* tmpfs_alloc_node: allocate a new inode of a specified type and
* insert it into the list of specified mount point.
@@ -155,7 +157,8 @@ tmpfs_alloc_node(tmpfs_mount_t *tmp, enu
/* Directory. */
TAILQ_INIT(&nnode->tn_spec.tn_dir.tn_dir);
nnode->tn_spec.tn_dir.tn_parent = NULL;
- nnode->tn_spec.tn_dir.tn_readdir_lastn = 0;
+ nnode->tn_spec.tn_dir.tn_seq_arena = NULL;
+ nnode->tn_spec.tn_dir.tn_next_seq = TMPFS_DIRSEQ_START;
nnode->tn_spec.tn_dir.tn_readdir_lastp = NULL;
/* Extra link count for the virtual '.' entry. */
@@ -240,11 +243,10 @@ tmpfs_free_node(tmpfs_mount_t *tmp, tmpf
}
break;
case VDIR:
- /*
- * KASSERT(TAILQ_EMPTY(&node->tn_spec.tn_dir.tn_dir));
- * KASSERT(node->tn_spec.tn_dir.tn_parent == NULL ||
- * node == tmp->tm_root);
- */
+ KASSERT(node->tn_spec.tn_dir.tn_seq_arena == NULL);
+ KASSERT(TAILQ_EMPTY(&node->tn_spec.tn_dir.tn_dir));
+ KASSERT(node->tn_spec.tn_dir.tn_parent == NULL ||
+ node == tmp->tm_root);
break;
default:
break;
@@ -390,12 +392,12 @@ tmpfs_alloc_file(vnode_t *dvp, vnode_t *
if (cnp->cn_flags & ISWHITEOUT) {
wde = tmpfs_dir_lookup(dnode, cnp);
KASSERT(wde != NULL && wde->td_node == TMPFS_NODE_WHITEOUT);
- tmpfs_dir_detach(dvp, wde);
+ tmpfs_dir_detach(dnode, wde);
tmpfs_free_dirent(tmp, wde);
}
/* Associate inode and attach the entry into the directory. */
- tmpfs_dir_attach(dvp, de, node);
+ tmpfs_dir_attach(dnode, de, node);
/* Make node opaque if requested. */
if (cnp->cn_flags & ISWHITEOUT)
@@ -426,6 +428,7 @@ tmpfs_alloc_dirent(tmpfs_mount_t *tmp, c
}
nde->td_namelen = len;
memcpy(nde->td_name, name, len);
+ nde->td_seq = TMPFS_DIRSEQ_NONE;
*de = nde;
return 0;
@@ -437,8 +440,8 @@ tmpfs_alloc_dirent(tmpfs_mount_t *tmp, c
void
tmpfs_free_dirent(tmpfs_mount_t *tmp, tmpfs_dirent_t *de)
{
-
- /* KASSERT(de->td_node == NULL); */
+ KASSERT(de->td_node == NULL);
+ KASSERT(de->td_seq == TMPFS_DIRSEQ_NONE);
tmpfs_strname_free(tmp, de->td_name, de->td_namelen);
tmpfs_dirent_put(tmp, de);
}
@@ -453,13 +456,18 @@ tmpfs_free_dirent(tmpfs_mount_t *tmp, tm
* => Triggers kqueue events here.
*/
void
-tmpfs_dir_attach(vnode_t *dvp, tmpfs_dirent_t *de, tmpfs_node_t *node)
+tmpfs_dir_attach(tmpfs_node_t *dnode, tmpfs_dirent_t *de, tmpfs_node_t *node)
{
- tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
+ vnode_t *dvp = dnode->tn_vnode;
int events = NOTE_WRITE;
+ KASSERT(dvp != NULL);
KASSERT(VOP_ISLOCKED(dvp));
+ /* Get a new sequence number. */
+ KASSERT(de->td_seq == TMPFS_DIRSEQ_NONE);
+ de->td_seq = tmpfs_dir_getseq(dnode, de);
+
/* Associate directory entry and the inode. */
de->td_node = node;
if (node != TMPFS_NODE_WHITEOUT) {
@@ -498,28 +506,30 @@ tmpfs_dir_attach(vnode_t *dvp, tmpfs_dir
* => Decreases link count on the associated node.
* => Decreases the link count on directory node, if our node is VDIR.
* => Triggers kqueue events here.
+ *
+ * => Note: dvp and vp may be NULL only if called by tmpfs_unmount().
*/
void
-tmpfs_dir_detach(vnode_t *dvp, tmpfs_dirent_t *de)
+tmpfs_dir_detach(tmpfs_node_t *dnode, tmpfs_dirent_t *de)
{
- tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
tmpfs_node_t *node = de->td_node;
+ vnode_t *vp, *dvp = dnode->tn_vnode;
int events = NOTE_WRITE;
- KASSERT(VOP_ISLOCKED(dvp));
-
- if (node != TMPFS_NODE_WHITEOUT) {
- vnode_t *vp = node->tn_vnode;
-
- KASSERT(VOP_ISLOCKED(vp));
+ KASSERT(dvp == NULL || VOP_ISLOCKED(dvp));
+ if (__predict_true(node != TMPFS_NODE_WHITEOUT)) {
/* Deassociate the inode and entry. */
de->td_node = NULL;
node->tn_dirent_hint = NULL;
KASSERT(node->tn_links > 0);
node->tn_links--;
- VN_KNOTE(vp, node->tn_links ? NOTE_LINK : NOTE_DELETE);
+
+ if ((vp = node->tn_vnode) != NULL) {
+ KASSERT(VOP_ISLOCKED(vp));
+ VN_KNOTE(vp, node->tn_links ? NOTE_LINK : NOTE_DELETE);
+ }
/* If directory - decrease the link count of parent. */
if (node->tn_type == VDIR) {
@@ -534,15 +544,18 @@ tmpfs_dir_detach(vnode_t *dvp, tmpfs_dir
/* Remove the entry from the directory. */
if (dnode->tn_spec.tn_dir.tn_readdir_lastp == de) {
- dnode->tn_spec.tn_dir.tn_readdir_lastn = 0;
dnode->tn_spec.tn_dir.tn_readdir_lastp = NULL;
}
TAILQ_REMOVE(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries);
dnode->tn_size -= sizeof(tmpfs_dirent_t);
dnode->tn_status |= TMPFS_NODE_STATUSALL;
- uvm_vnp_setsize(dvp, dnode->tn_size);
- VN_KNOTE(dvp, events);
+ tmpfs_dir_putseq(dnode, de);
+
+ if (dvp) {
+ uvm_vnp_setsize(dvp, dnode->tn_size);
+ VN_KNOTE(dvp, events);
+ }
}
/*
@@ -576,7 +589,7 @@ tmpfs_dir_lookup(tmpfs_node_t *node, str
/*
* tmpfs_dir_cached: get a cached directory entry if it is valid. Used to
- * avoid unnecessary tmpds_dir_lookup().
+ * avoid unnecessary tmpfs_dir_lookup().
*
* => The vnode must be locked.
*/
@@ -600,103 +613,162 @@ tmpfs_dir_cached(tmpfs_node_t *node)
}
/*
- * tmpfs_dir_getdotdent: helper function for tmpfs_readdir. Creates a
- * '.' entry for the given directory and returns it in the uio space.
+ * tmpfs_dir_getseq: get a per-directory sequence number for the entry.
+ *
+ * => Shall not be larger than 2^31 for linux32 compatibility.
*/
-int
-tmpfs_dir_getdotdent(tmpfs_node_t *node, struct uio *uio)
+uint32_t
+tmpfs_dir_getseq(tmpfs_node_t *dnode, tmpfs_dirent_t *de)
{
- struct dirent *dentp;
+ uint32_t seq = de->td_seq;
+ vmem_t *seq_arena;
+ vmem_addr_t off;
int error;
- TMPFS_VALIDATE_DIR(node);
- KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT);
+ TMPFS_VALIDATE_DIR(dnode);
- dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP);
- dentp->d_fileno = node->tn_id;
- dentp->d_type = DT_DIR;
- dentp->d_namlen = 1;
- dentp->d_name[0] = '.';
- dentp->d_name[1] = '\0';
- dentp->d_reclen = _DIRENT_SIZE(dentp);
-
- if (dentp->d_reclen > uio->uio_resid)
- error = -1;
- else {
- error = uiomove(dentp, dentp->d_reclen, uio);
- if (error == 0)
- uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT;
+ if (__predict_true(seq != TMPFS_DIRSEQ_NONE)) {
+ /* Already set. */
+ KASSERT(seq >= TMPFS_DIRSEQ_START);
+ return seq;
+ }
+
+ /*
+ * The "." and ".." and the end-of-directory have reserved numbers.
+ * The other sequence numbers are allocated as following:
+ *
+ * - The first half of the 2^31 is assigned incrementally.
+ *
+ * - If that range is exceeded, then the second half of 2^31
+ * is used, but managed by vmem(9).
+ */
+
+ seq = dnode->tn_spec.tn_dir.tn_next_seq;
+ KASSERT(seq >= TMPFS_DIRSEQ_START);
+
+ if (__predict_true(seq < TMPFS_DIRSEQ_END)) {
+ /* First half: just increment and return. */
+ dnode->tn_spec.tn_dir.tn_next_seq++;
+ return seq;
+ }
+
+ /*
+ * First half exceeded, use the second half. May need to create
+ * vmem(9) arena for the directory first.
+ */
+ if ((seq_arena = dnode->tn_spec.tn_dir.tn_seq_arena) == NULL) {
+ seq_arena = vmem_create("tmpfscoo", 0,
+ TMPFS_DIRSEQ_END - 1, 1, NULL, NULL, NULL, 0,
+ VM_SLEEP, IPL_NONE);
+ dnode->tn_spec.tn_dir.tn_seq_arena = seq_arena;
+ KASSERT(seq_arena != NULL);
+ }
+ error = vmem_alloc(seq_arena, 1, VM_SLEEP | VM_BESTFIT, &off);
+ KASSERT(error == 0);
+
+ KASSERT(off < TMPFS_DIRSEQ_END);
+ seq = off | TMPFS_DIRSEQ_END;
+ return seq;
+}
+
+static void
+tmpfs_dir_putseq(tmpfs_node_t *dnode, tmpfs_dirent_t *de)
+{
+ vmem_t *seq_arena = dnode->tn_spec.tn_dir.tn_seq_arena;
+ uint32_t seq = de->td_seq;
+
+ TMPFS_VALIDATE_DIR(dnode);
+
+ if (seq == TMPFS_DIRSEQ_NONE || seq < TMPFS_DIRSEQ_END) {
+ /* First half (or no sequence number set yet). */
+ KASSERT(de->td_seq >= TMPFS_DIRSEQ_START);
+ } else {
+ /* Second half. */
+ KASSERT(seq_arena != NULL);
+ KASSERT(seq >= TMPFS_DIRSEQ_END);
+ seq &= ~TMPFS_DIRSEQ_END;
+ vmem_free(seq_arena, seq, 1);
+ }
+ de->td_seq = TMPFS_DIRSEQ_NONE;
+
+ /* Empty? We can reset. */
+ if (seq_arena && dnode->tn_size == 0) {
+ dnode->tn_spec.tn_dir.tn_seq_arena = NULL;
+ dnode->tn_spec.tn_dir.tn_next_seq = TMPFS_DIRSEQ_START;
+ vmem_destroy(seq_arena);
}
- node->tn_status |= TMPFS_NODE_ACCESSED;
- kmem_free(dentp, sizeof(struct dirent));
- return error;
}
/*
- * tmpfs_dir_getdotdotdent: helper function for tmpfs_readdir. Creates a
- * '..' entry for the given directory and returns it in the uio space.
+ * tmpfs_dir_lookupbyseq: lookup a directory entry by the sequence number.
*/
-int
-tmpfs_dir_getdotdotdent(tmpfs_node_t *node, struct uio *uio)
+tmpfs_dirent_t *
+tmpfs_dir_lookupbyseq(tmpfs_node_t *node, off_t seq)
{
- struct dirent *dentp;
- int error;
+ tmpfs_dirent_t *de = node->tn_spec.tn_dir.tn_readdir_lastp;
TMPFS_VALIDATE_DIR(node);
- KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT);
-
- dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP);
- dentp->d_fileno = node->tn_spec.tn_dir.tn_parent->tn_id;
- dentp->d_type = DT_DIR;
- dentp->d_namlen = 2;
- dentp->d_name[0] = '.';
- dentp->d_name[1] = '.';
- dentp->d_name[2] = '\0';
- dentp->d_reclen = _DIRENT_SIZE(dentp);
-
- if (dentp->d_reclen > uio->uio_resid)
- error = -1;
- else {
- error = uiomove(dentp, dentp->d_reclen, uio);
- if (error == 0) {
- tmpfs_dirent_t *de;
- de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir);
- if (de == NULL)
- uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
- else
- uio->uio_offset = tmpfs_dircookie(de);
- }
+ /*
+ * First, check the cache. If does not match - perform a lookup.
+ */
+ if (de && de->td_seq == seq) {
+ KASSERT(de->td_seq >= TMPFS_DIRSEQ_START);
+ KASSERT(de->td_seq != TMPFS_DIRSEQ_NONE);
+ return de;
}
- node->tn_status |= TMPFS_NODE_ACCESSED;
- kmem_free(dentp, sizeof(struct dirent));
- return error;
+ TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
+ KASSERT(de->td_seq >= TMPFS_DIRSEQ_START);
+ KASSERT(de->td_seq != TMPFS_DIRSEQ_NONE);
+ if (de->td_seq == seq)
+ return de;
+ }
+ return NULL;
}
/*
- * tmpfs_dir_lookupbycookie: lookup a directory entry by associated cookie.
+ * tmpfs_dir_getdotents: helper function for tmpfs_readdir() to get the
+ * dot meta entries, that is, "." or "..". Copy it to the UIO space.
*/
-tmpfs_dirent_t *
-tmpfs_dir_lookupbycookie(tmpfs_node_t *node, off_t cookie)
+static int
+tmpfs_dir_getdotents(tmpfs_node_t *node, struct dirent *dp, struct uio *uio)
{
tmpfs_dirent_t *de;
+ off_t next = 0;
+ int error;
- KASSERT(VOP_ISLOCKED(node->tn_vnode));
+ dp->d_fileno = node->tn_id;
+ dp->d_type = DT_DIR;
- if (cookie == node->tn_spec.tn_dir.tn_readdir_lastn &&
- node->tn_spec.tn_dir.tn_readdir_lastp != NULL) {
- return node->tn_spec.tn_dir.tn_readdir_lastp;
+ switch (uio->uio_offset) {
+ case TMPFS_DIRSEQ_DOT:
+ strlcpy(dp->d_name, ".", sizeof(dp->d_name));
+ next = TMPFS_DIRSEQ_DOTDOT;
+ break;
+ case TMPFS_DIRSEQ_DOTDOT:
+ strlcpy(dp->d_name, "..", sizeof(dp->d_name));
+ de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir);
+ next = de ? tmpfs_dir_getseq(node, de) : TMPFS_DIRSEQ_EOF;
+ break;
+ default:
+ KASSERT(false);
}
- TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
- if (tmpfs_dircookie(de) == cookie) {
- break;
- }
+ dp->d_namlen = strlen(dp->d_name);
+ dp->d_reclen = _DIRENT_SIZE(dp);
+
+ if (dp->d_reclen > uio->uio_resid) {
+ return EJUSTRETURN;
}
- return de;
+ if ((error = uiomove(dp, dp->d_reclen, uio)) != 0) {
+ return error;
+ }
+
+ uio->uio_offset = next;
+ return error;
}
/*
- * tmpfs_dir_getdents: relper function for tmpfs_readdir.
+ * tmpfs_dir_getdents: helper function for tmpfs_readdir.
*
* => Returns as much directory entries as can fit in the uio space.
* => The read starts at uio->uio_offset.
@@ -706,69 +778,53 @@ tmpfs_dir_getdents(tmpfs_node_t *node, s
{
tmpfs_dirent_t *de;
struct dirent *dentp;
- off_t startcookie;
- int error;
+ int error = 0;
KASSERT(VOP_ISLOCKED(node->tn_vnode));
TMPFS_VALIDATE_DIR(node);
/*
- * Locate the first directory entry we have to return. We have cached
- * the last readdir in the node, so use those values if appropriate.
- * Otherwise do a linear scan to find the requested entry.
+ * Allocate struct dirent and first check for the "." and "..".
+ * Note: tmpfs_dir_getdotents() will "seek" for us.
*/
- startcookie = uio->uio_offset;
- KASSERT(startcookie != TMPFS_DIRCOOKIE_DOT);
- KASSERT(startcookie != TMPFS_DIRCOOKIE_DOTDOT);
- if (startcookie == TMPFS_DIRCOOKIE_EOF) {
- return 0;
- } else {
- de = tmpfs_dir_lookupbycookie(node, startcookie);
+ dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP);
+
+ if (uio->uio_offset == TMPFS_DIRSEQ_DOT) {
+ if ((error = tmpfs_dir_getdotents(node, dentp, uio)) != 0) {
+ goto done;
+ }
+ (*cntp)++;
+ }
+ if (uio->uio_offset == TMPFS_DIRSEQ_DOTDOT) {
+ if ((error = tmpfs_dir_getdotents(node, dentp, uio)) != 0) {
+ goto done;
+ }
+ (*cntp)++;
}
+
+ /* Done if we reached the end. */
+ if (uio->uio_offset == TMPFS_DIRSEQ_EOF) {
+ goto done;
+ }
+
+ /* Locate the directory entry given by the given sequence number. */
+ de = tmpfs_dir_lookupbyseq(node, uio->uio_offset);
if (de == NULL) {
- return EINVAL;
+ error = EINVAL;
+ goto done;
}
/*
- * Read as much entries as possible; i.e., until we reach the end
- * of the directory or we exhaust uio space.
+ * Read as many entries as possible; i.e., until we reach the end
+ * of the directory or we exhaust UIO space.
*/
- dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP);
do {
- /*
- * Create a dirent structure representing the current
- * inode and fill it.
- */
if (de->td_node == TMPFS_NODE_WHITEOUT) {
dentp->d_fileno = 1;
dentp->d_type = DT_WHT;
} else {
dentp->d_fileno = de->td_node->tn_id;
- switch (de->td_node->tn_type) {
- case VBLK:
- dentp->d_type = DT_BLK;
- break;
- case VCHR:
- dentp->d_type = DT_CHR;
- break;
- case VDIR:
- dentp->d_type = DT_DIR;
- break;
- case VFIFO:
- dentp->d_type = DT_FIFO;
- break;
- case VLNK:
- dentp->d_type = DT_LNK;
- break;
- case VREG:
- dentp->d_type = DT_REG;
- break;
- case VSOCK:
- dentp->d_type = DT_SOCK;
- break;
- default:
- KASSERT(false);
- }
+ dentp->d_type = vtype2dt(de->td_node->tn_type);
}
dentp->d_namlen = de->td_namelen;
KASSERT(de->td_namelen < sizeof(dentp->d_name));
@@ -776,35 +832,34 @@ tmpfs_dir_getdents(tmpfs_node_t *node, s
dentp->d_name[de->td_namelen] = '\0';
dentp->d_reclen = _DIRENT_SIZE(dentp);
- /* Stop reading if the directory entry we are treating is
- * bigger than the amount of data that can be returned. */
if (dentp->d_reclen > uio->uio_resid) {
- error = -1;
+ /* Exhausted UIO space. */
+ error = EJUSTRETURN;
break;
}
- /*
- * Copy the new dirent structure into the output buffer and
- * advance pointers.
- */
+ /* Copy out the directory entry and continue. */
error = uiomove(dentp, dentp->d_reclen, uio);
-
+ if (error) {
+ break;
+ }
(*cntp)++;
de = TAILQ_NEXT(de, td_entries);
- } while (error == 0 && uio->uio_resid > 0 && de != NULL);
- /* Update the offset and cache. */
- if (de == NULL) {
- uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
- node->tn_spec.tn_dir.tn_readdir_lastn = 0;
- node->tn_spec.tn_dir.tn_readdir_lastp = NULL;
- } else {
- node->tn_spec.tn_dir.tn_readdir_lastn = uio->uio_offset =
- tmpfs_dircookie(de);
- node->tn_spec.tn_dir.tn_readdir_lastp = de;
- }
+ } while (uio->uio_resid > 0 && de);
+
+ /* Cache the last entry or clear and mark EOF. */
+ uio->uio_offset = de ? tmpfs_dir_getseq(node, de) : TMPFS_DIRSEQ_EOF;
+ node->tn_spec.tn_dir.tn_readdir_lastp = de;
+done:
node->tn_status |= TMPFS_NODE_ACCESSED;
kmem_free(dentp, sizeof(struct dirent));
+
+ if (error == EJUSTRETURN) {
+ /* Exhausted UIO space - just return. */
+ error = 0;
+ }
+ KASSERT(error >= 0);
return error;
}
Index: src/sys/fs/tmpfs/tmpfs_vfsops.c
diff -u src/sys/fs/tmpfs/tmpfs_vfsops.c:1.52 src/sys/fs/tmpfs/tmpfs_vfsops.c:1.53
--- src/sys/fs/tmpfs/tmpfs_vfsops.c:1.52 Tue Sep 27 01:10:43 2011
+++ src/sys/fs/tmpfs/tmpfs_vfsops.c Fri Nov 8 15:44:23 2013
@@ -1,4 +1,4 @@
-/* $NetBSD: tmpfs_vfsops.c,v 1.52 2011/09/27 01:10:43 christos Exp $ */
+/* $NetBSD: tmpfs_vfsops.c,v 1.53 2013/11/08 15:44:23 rmind Exp $ */
/*
* Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
@@ -42,7 +42,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tmpfs_vfsops.c,v 1.52 2011/09/27 01:10:43 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tmpfs_vfsops.c,v 1.53 2013/11/08 15:44:23 rmind Exp $");
#include <sys/param.h>
#include <sys/types.h>
@@ -205,8 +205,8 @@ tmpfs_start(struct mount *mp, int flags)
static int
tmpfs_unmount(struct mount *mp, int mntflags)
{
- tmpfs_mount_t *tmp;
- tmpfs_node_t *node;
+ tmpfs_mount_t *tmp = VFS_TO_TMPFS(mp);
+ tmpfs_node_t *node, *cnode;
int error, flags = 0;
/* Handle forced unmounts. */
@@ -218,25 +218,28 @@ tmpfs_unmount(struct mount *mp, int mntf
if (error != 0)
return error;
- tmp = VFS_TO_TMPFS(mp);
-
- /* Destroy any existing inodes. */
- while ((node = LIST_FIRST(&tmp->tm_nodes)) != NULL) {
- if (node->tn_type == VDIR) {
- tmpfs_dirent_t *de;
+ /*
+ * First round, detach and destroy all directory entries.
+ * Also, clear the pointers to the vnodes - they are gone.
+ */
+ LIST_FOREACH(node, &tmp->tm_nodes, tn_entries) {
+ tmpfs_dirent_t *de;
- /* Destroy any directory entries. */
- de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir);
- while (de != NULL) {
- tmpfs_dirent_t *nde;
-
- nde = TAILQ_NEXT(de, td_entries);
- tmpfs_free_dirent(tmp, de);
- node->tn_size -= sizeof(tmpfs_dirent_t);
- de = nde;
+ node->tn_vnode = NULL;
+ if (node->tn_type != VDIR) {
+ continue;
+ }
+ while ((de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir)) != NULL) {
+ if ((cnode = de->td_node) != NULL) {
+ cnode->tn_vnode = NULL;
}
+ tmpfs_dir_detach(node, de);
+ tmpfs_free_dirent(tmp, de);
}
- /* Removes inode from the list. */
+ }
+
+ /* Second round, destroy all inodes. */
+ while ((node = LIST_FIRST(&tmp->tm_nodes)) != NULL) {
tmpfs_free_node(tmp, node);
}
Index: src/sys/fs/tmpfs/tmpfs_vnops.c
diff -u src/sys/fs/tmpfs/tmpfs_vnops.c:1.105 src/sys/fs/tmpfs/tmpfs_vnops.c:1.106
--- src/sys/fs/tmpfs/tmpfs_vnops.c:1.105 Fri Nov 1 15:38:45 2013
+++ src/sys/fs/tmpfs/tmpfs_vnops.c Fri Nov 8 15:44:23 2013
@@ -1,4 +1,4 @@
-/* $NetBSD: tmpfs_vnops.c,v 1.105 2013/11/01 15:38:45 rmind Exp $ */
+/* $NetBSD: tmpfs_vnops.c,v 1.106 2013/11/08 15:44:23 rmind Exp $ */
/*
* Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.105 2013/11/01 15:38:45 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.106 2013/11/08 15:44:23 rmind Exp $");
#include <sys/param.h>
#include <sys/dirent.h>
@@ -710,9 +710,9 @@ tmpfs_remove(void *v)
* Note: the inode referred by it will not be destroyed
* until the vnode is reclaimed/recycled.
*/
- tmpfs_dir_detach(dvp, de);
+ tmpfs_dir_detach(dnode, de);
if (ap->a_cnp->cn_flags & DOWHITEOUT)
- tmpfs_dir_attach(dvp, de, TMPFS_NODE_WHITEOUT);
+ tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
else
tmpfs_free_dirent(VFS_TO_TMPFS(vp->v_mount), de);
@@ -747,7 +747,7 @@ tmpfs_link(void *v)
vnode_t *dvp = ap->a_dvp;
vnode_t *vp = ap->a_vp;
struct componentname *cnp = ap->a_cnp;
- tmpfs_node_t *node;
+ tmpfs_node_t *dnode, *node;
tmpfs_dirent_t *de;
int error;
@@ -756,6 +756,7 @@ tmpfs_link(void *v)
KASSERT(vp->v_type != VDIR);
KASSERT(dvp->v_mount == vp->v_mount);
+ dnode = VP_TO_TMPFS_DIR(dvp);
node = VP_TO_TMPFS_NODE(vp);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
@@ -780,11 +781,11 @@ tmpfs_link(void *v)
goto out;
}
- /*
+ /*
* Insert the entry into the directory.
* It will increase the inode link count.
*/
- tmpfs_dir_attach(dvp, de, node);
+ tmpfs_dir_attach(dnode, de, node);
/* Update the timestamps and trigger the event. */
if (node->tn_vnode) {
@@ -872,7 +873,7 @@ tmpfs_rmdir(void *v)
node->tn_status |= TMPFS_NODE_STATUSALL;
/* Detach the directory entry from the directory. */
- tmpfs_dir_detach(dvp, de);
+ tmpfs_dir_detach(dnode, de);
/* Purge the cache for parent. */
cache_purge(dvp);
@@ -883,14 +884,14 @@ tmpfs_rmdir(void *v)
* until the vnode is reclaimed.
*/
if (ap->a_cnp->cn_flags & DOWHITEOUT)
- tmpfs_dir_attach(dvp, de, TMPFS_NODE_WHITEOUT);
+ tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
else
tmpfs_free_dirent(tmp, de);
/* Destroy the whiteout entries from the node. */
while ((de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir)) != NULL) {
KASSERT(de->td_node == TMPFS_NODE_WHITEOUT);
- tmpfs_dir_detach(vp, de);
+ tmpfs_dir_detach(node, de);
tmpfs_free_dirent(tmp, de);
}
@@ -951,68 +952,49 @@ tmpfs_readdir(void *v)
node = VP_TO_TMPFS_DIR(vp);
startoff = uio->uio_offset;
cnt = 0;
- if (node->tn_links == 0) {
- error = 0;
- goto out;
- }
- if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) {
- error = tmpfs_dir_getdotdent(node, uio);
- if (error != 0) {
- if (error == -1)
- error = 0;
- goto out;
- }
- cnt++;
- }
- if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) {
- error = tmpfs_dir_getdotdotdent(node, uio);
- if (error != 0) {
- if (error == -1)
- error = 0;
- goto out;
- }
- cnt++;
- }
- error = tmpfs_dir_getdents(node, uio, &cnt);
- if (error == -1) {
+ /*
+ * Retrieve the directory entries, unless it is being destroyed.
+ */
+ if (node->tn_links) {
+ error = tmpfs_dir_getdents(node, uio, &cnt);
+ } else {
error = 0;
}
- KASSERT(error >= 0);
-out:
+
if (eofflag != NULL) {
- *eofflag = (!error && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
+ *eofflag = !error && uio->uio_offset == TMPFS_DIRSEQ_EOF;
}
if (error || cookies == NULL || ncookies == NULL) {
return error;
}
/* Update NFS-related variables, if any. */
- off_t i, off = startoff;
tmpfs_dirent_t *de = NULL;
+ off_t i, off = startoff;
*cookies = malloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
*ncookies = cnt;
for (i = 0; i < cnt; i++) {
- KASSERT(off != TMPFS_DIRCOOKIE_EOF);
- if (off != TMPFS_DIRCOOKIE_DOT) {
- if (off == TMPFS_DIRCOOKIE_DOTDOT) {
+ KASSERT(off != TMPFS_DIRSEQ_EOF);
+ if (off != TMPFS_DIRSEQ_DOT) {
+ if (off == TMPFS_DIRSEQ_DOTDOT) {
de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir);
} else if (de != NULL) {
de = TAILQ_NEXT(de, td_entries);
} else {
- de = tmpfs_dir_lookupbycookie(node, off);
+ de = tmpfs_dir_lookupbyseq(node, off);
KASSERT(de != NULL);
de = TAILQ_NEXT(de, td_entries);
}
if (de == NULL) {
- off = TMPFS_DIRCOOKIE_EOF;
+ off = TMPFS_DIRSEQ_EOF;
} else {
- off = tmpfs_dircookie(de);
+ off = tmpfs_dir_getseq(node, de);
}
} else {
- off = TMPFS_DIRCOOKIE_DOTDOT;
+ off = TMPFS_DIRSEQ_DOTDOT;
}
(*cookies)[i] = off;
}
@@ -1284,6 +1266,7 @@ tmpfs_whiteout(void *v)
struct componentname *cnp = ap->a_cnp;
const int flags = ap->a_flags;
tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
+ tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
tmpfs_dirent_t *de;
int error;
@@ -1295,14 +1278,14 @@ tmpfs_whiteout(void *v)
cnp->cn_namelen, &de);
if (error)
return error;
- tmpfs_dir_attach(dvp, de, TMPFS_NODE_WHITEOUT);
+ tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
break;
case DELETE:
cnp->cn_flags &= ~DOWHITEOUT; /* when in doubt, cargo cult */
- de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), cnp);
+ de = tmpfs_dir_lookup(dnode, cnp);
if (de == NULL)
return ENOENT;
- tmpfs_dir_detach(dvp, de);
+ tmpfs_dir_detach(dnode, de);
tmpfs_free_dirent(tmp, de);
break;
}