Module Name: src
Committed By: manu
Date: Mon Sep 20 07:00:22 UTC 2010
Modified Files:
src/lib/libperfuse: ops.c perfuse.c perfuse_priv.h subr.c
Log Message:
- performance improvement for read, readdir and write. Now we use
SOCK_DGRAM, we can send many pages at once without hitting any bug
- when creating a file, it is open for FUSE, but not for the kernel.
If the kernel does not do a subsequent open, we have a leak. We fight
against this by trying to close such file that the kernel left unopen
for some time.
- some code refactoring to make message exchange debug easier (more to come)
To generate a diff of this commit:
cvs rdiff -u -r1.15 -r1.16 src/lib/libperfuse/ops.c
cvs rdiff -u -r1.6 -r1.7 src/lib/libperfuse/perfuse.c
cvs rdiff -u -r1.10 -r1.11 src/lib/libperfuse/perfuse_priv.h
cvs rdiff -u -r1.4 -r1.5 src/lib/libperfuse/subr.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/lib/libperfuse/ops.c
diff -u src/lib/libperfuse/ops.c:1.15 src/lib/libperfuse/ops.c:1.16
--- src/lib/libperfuse/ops.c:1.15 Wed Sep 15 01:51:43 2010
+++ src/lib/libperfuse/ops.c Mon Sep 20 07:00:21 2010
@@ -1,4 +1,4 @@
-/* $NetBSD: ops.c,v 1.15 2010/09/15 01:51:43 manu Exp $ */
+/* $NetBSD: ops.c,v 1.16 2010/09/20 07:00:21 manu Exp $ */
/*-
* Copyright (c) 2010 Emmanuel Dreyfus. All rights reserved.
@@ -43,7 +43,8 @@
extern int perfuse_diagflags;
-static int node_close_common(struct puffs_usermount *, puffs_cookie_t, int);
+static int xchg_msg(struct puffs_usermount *, puffs_cookie_t,
+ perfuse_msg_t *, size_t, enum perfuse_xchg_pb_reply);
static int no_access(puffs_cookie_t, const struct puffs_cred *, mode_t);
static void fuse_attr_to_vap(struct perfuse_state *,
struct vattr *, struct fuse_attr *);
@@ -94,8 +95,8 @@
#define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12])
#define VTTOIF(indx) (vttoif_tab[(int)(indx)])
-static int
-node_close_common(pu, opc, mode)
+int
+perfuse_node_close_common(pu, opc, mode)
struct puffs_usermount *pu;
puffs_cookie_t opc;
int mode;
@@ -148,7 +149,8 @@
__func__, (void *)opc, pnd->pnd_ino, fri->fh);
#endif
- if ((error = XCHG_MSG(ps, pu, pm, NO_PAYLOAD_REPLY_LEN)) != 0)
+ if ((error = xchg_msg(pu, opc, pm,
+ NO_PAYLOAD_REPLY_LEN, wait_reply)) != 0)
goto out;
ps->ps_destroy_msg(pm);
@@ -163,6 +165,30 @@
return error;
}
+/* ARGSUSED1 */
+static int
+xchg_msg(pu, opc, pm, len, wait)
+ struct puffs_usermount *pu;
+ puffs_cookie_t opc;
+ perfuse_msg_t *pm;
+ size_t len;
+ enum perfuse_xchg_pb_reply wait;
+{
+ struct perfuse_state *ps;
+ int error;
+
+ ps = puffs_getspecific(pu);
+
+#ifdef PERFUSE_DEBUG
+ if ((perfuse_diagflags & PDF_FUSE) && (opc != 0))
+ DPRINTF("file = \"%s\"\n",
+ (char *)PNPATH((struct puffs_node *)opc));
+#endif
+ error = ps->ps_xchg_msg(pu, pm, len, wait);
+
+ return error;
+}
+
static int
no_access(opc, pcr, mode)
puffs_cookie_t opc;
@@ -285,7 +311,7 @@
pm = ps->ps_new_msg(pu, opc, FUSE_LOOKUP, len, NULL);
(void)strlcpy(_GET_INPAYLOAD(ps, pm, char *), path, len);
- if ((error = XCHG_MSG(ps, pu, pm, sizeof(*feo))) != 0)
+ if ((error = xchg_msg(pu, opc, pm, sizeof(*feo), wait_reply)) != 0)
goto out;
feo = GET_OUTPAYLOAD(ps, pm, fuse_entry_out);
@@ -327,7 +353,7 @@
ps = puffs_getspecific(pu);
- if ((error = XCHG_MSG(ps, pu, pm, sizeof(*feo))) != 0)
+ if ((error = xchg_msg(pu, opc, pm, sizeof(*feo), wait_reply)) != 0)
goto out;
feo = GET_OUTPAYLOAD(ps, pm, fuse_entry_out);
@@ -357,7 +383,8 @@
/*
* A fuse_attr_out is returned, but we ignore it.
*/
- error = XCHG_MSG(ps, pu, pm, sizeof(struct fuse_attr_out));
+ error = xchg_msg(pu, (puffs_cookie_t)pn,
+ pm, sizeof(struct fuse_attr_out), wait_reply);
/*
* The parent directory needs a sync
@@ -604,9 +631,6 @@
ps = perfuse_getspecific(pu);
#endif
- /*
- * XXX Add a lock he day we go multithreaded
- */
pnd = PERFUSE_NODE_DATA(opc);
pcq.pcq_type = type;
pcq.pcq_cc = puffs_cc_getcc(pu);
@@ -644,9 +668,6 @@
struct perfuse_node_data *pnd;
int dequeued;
- /*
- * XXX Add a lock he day we go multithreaded
- */
pnd = PERFUSE_NODE_DATA(opc);
dequeued = 0;
TAILQ_FOREACH(pcq, &pnd->pnd_pcq, pcq_next) {
@@ -684,7 +705,7 @@
int error;
ps = puffs_getspecific(pu);
-
+
if (puffs_mount(pu, ps->ps_target, ps->ps_mountflags, ps->ps_root) != 0)
DERR(EX_OSERR, "puffs_mount failed");
@@ -702,7 +723,7 @@
fii->max_readahead = 32 * PAGE_SIZE;
fii->flags = (FUSE_ASYNC_READ|FUSE_POSIX_LOCKS|FUSE_ATOMIC_O_TRUNC);
- if ((error = XCHG_MSG(ps, pu, pm, sizeof(*fio))) != 0)
+ if ((error = xchg_msg(pu, 0, pm, sizeof(*fio), wait_reply)) != 0)
DERRX(EX_SOFTWARE, "init message exchange failed (%d)", error);
fio = GET_OUTPAYLOAD(ps, pm, fuse_init_out);
@@ -729,7 +750,7 @@
opc = (puffs_cookie_t)puffs_getroot(pu);
pm = ps->ps_new_msg(pu, opc, FUSE_DESTROY, 0, NULL);
- if ((error = XCHG_MSG(ps, pu, pm, UNSPEC_REPLY_LEN)) != 0) {
+ if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0){
DWARN("unmount %s", ps->ps_target);
if (!(flags & MNT_FORCE))
goto out;
@@ -759,7 +780,7 @@
opc = (puffs_cookie_t)puffs_getroot(pu);
pm = ps->ps_new_msg(pu, opc, FUSE_STATFS, 0, NULL);
- if ((error = XCHG_MSG(ps, pu, pm, sizeof(*fso))) != 0)
+ if ((error = xchg_msg(pu, opc, pm, sizeof(*fso), wait_reply)) != 0)
goto out;
fso = GET_OUTPAYLOAD(ps, pm, fuse_statfs_out);
@@ -964,10 +985,17 @@
opc = (puffs_cookie_t)pn;
- error = perfuse_node_open(pu, opc, FREAD|FWRITE, pcn->pcn_cred);
+ error = perfuse_node_open(pu, opc, FWRITE, pcn->pcn_cred);
if (error != 0)
return error;
+ /*
+ * This node has been open in the filesystem,
+ * but not by the kernel. We will have to close
+ * it on our own to avoid a leak
+ */
+ PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_OPENFS;
+
return 0;
}
@@ -989,7 +1017,7 @@
(void)strlcpy((char*)(void *)(fci + 1), name, namelen);
len = sizeof(*feo) + sizeof(*foo);
- if ((error = XCHG_MSG(ps, pu, pm, len)) != 0)
+ if ((error = xchg_msg(pu, opc, pm, len, wait_reply)) != 0)
goto out;
feo = GET_OUTPAYLOAD(ps, pm, fuse_entry_out);
@@ -1002,7 +1030,7 @@
* so that we can reuse it later
*/
pn = perfuse_new_pn(pu, opc);
- perfuse_new_fh((puffs_cookie_t)pn, foo->fh, FWRITE);
+ perfuse_new_fh(pu, (puffs_cookie_t)pn, foo->fh, FWRITE);
PERFUSE_NODE_DATA(pn)->pnd_ino = feo->nodeid;
#ifdef PERFUSE_DEBUG
@@ -1020,6 +1048,14 @@
* The parent directory needs a sync
*/
PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_DIRTY;
+
+ /*
+ * This node has been open in the filesystem,
+ * but not by the kernel. We will have to close
+ * it on our own to avoid a leak
+ */
+ PERFUSE_NODE_DATA(pn)->pnd_flags |= PND_OPENFS;
+
out:
ps->ps_destroy_msg(pm);
@@ -1113,6 +1149,14 @@
pn = (struct puffs_node *)opc;
if (puffs_pn_getvap(pn)->va_type == VDIR) {
+ /*
+ * We may open removed files, but it seems much more
+ * troublesome to open removed directories. glusterfs says
+ * "OPENDIR (null) (fuse_loc_fill() failed)"
+ */
+ if (pnd->pnd_flags & PND_REMOVED)
+ return ENOENT;
+
op = FUSE_OPENDIR;
pmode = PUFFS_VREAD|PUFFS_VEXEC;
} else {
@@ -1143,10 +1187,20 @@
* Do not open twice, and do not reopen for reading
* if we already have write handle.
*/
- if ((mode & FREAD) && (pnd->pnd_flags & PND_RFH))
- return 0;
- if ((mode & FWRITE) && (pnd->pnd_flags & PND_WFH))
+ if (((mode & FREAD) && (pnd->pnd_flags & PND_RFH)) ||
+ ((mode & FWRITE) && (pnd->pnd_flags & PND_WFH))) {
+ /*
+ * If the file was created, it was open for
+ * the filesystem but not for the kernel. This
+ * is not the case anymore, therefore we cleanup
+ * the flag to avoid an unwanted cleanup close
+ * after PERFUSE_OPENFS_TIMEOUT.
+ */
+ pnd->pnd_flags &= ~PND_OPENFS;
+
return 0;
+ }
+
/*
* Convert PUFFS mode to FUSE mode: convert FREAD/FWRITE
@@ -1160,16 +1214,16 @@
foi->flags = fmode;
foi->unused = 0;
- if ((error = XCHG_MSG(ps, pu, pm, sizeof(*foo))) != 0)
+ if ((error = xchg_msg(pu, opc, pm, sizeof(*foo), wait_reply)) != 0)
goto out;
foo = GET_OUTPAYLOAD(ps, pm, fuse_open_out);
-
+
/*
* Save the file handle in node private data
* so that we can reuse it later
*/
- perfuse_new_fh((puffs_cookie_t)pn, foo->fh, mode);
+ perfuse_new_fh(pu, (puffs_cookie_t)pn, foo->fh, mode);
#ifdef PERFUSE_DEBUG
if (perfuse_diagflags & PDF_FH)
@@ -1180,6 +1234,7 @@
pnd->pnd_ino, mode & FREAD ? "r" : "",
mode & FWRITE ? "w" : "", foo->fh);
#endif
+
out:
ps->ps_destroy_msg(pm);
@@ -1210,7 +1265,7 @@
* therefore postpone the close operation at reclaim time.
*/
if (puffs_pn_getvap(pn)->va_type != VREG)
- return node_close_common(pu, opc, flags);
+ return perfuse_node_close_common(pu, opc, flags);
return 0;
}
@@ -1243,7 +1298,7 @@
fai = GET_INPAYLOAD(ps, pm, fuse_access_in);
fai->mask = mode;
- error = XCHG_MSG(ps, pu, pm, NO_PAYLOAD_REPLY_LEN);
+ error = xchg_msg(pu, opc, pm, NO_PAYLOAD_REPLY_LEN, wait_reply);
ps->ps_destroy_msg(pm);
}
@@ -1266,7 +1321,8 @@
"fh = 0x%"PRIx64"\n", __func__, (void *)opc,
PERFUSE_NODE_DATA(opc)->pnd_ino, fgi->fh);
#endif
- if ((error = XCHG_MSG(ps, pu, pm, sizeof(*fao))) != 0) {
+ if ((error = xchg_msg(pu, opc, pm,
+ sizeof(*fao), wait_reply)) != 0) {
ps->ps_destroy_msg(pm);
goto out;
}
@@ -1318,7 +1374,7 @@
fgi->dummy = 0;
fgi->fh = 0;
- if ((error = XCHG_MSG(ps, pu, pm, sizeof(*fao))) != 0)
+ if ((error = xchg_msg(pu, opc, pm, sizeof(*fao), wait_reply)) != 0)
goto out;
fao = GET_OUTPAYLOAD(ps, pm, fuse_attr_out);
@@ -1479,7 +1535,7 @@
/*
* A fuse_attr_out is returned, but we ignore it.
*/
- error = XCHG_MSG(ps, pu, pm, sizeof(struct fuse_attr_out));
+ error = xchg_msg(pu, opc, pm, sizeof(struct fuse_attr_out), wait_reply);
ps->ps_destroy_msg(pm);
@@ -1514,7 +1570,7 @@
__func__, (void *)opc,
PERFUSE_NODE_DATA(opc)->pnd_ino, fpi->fh);
#endif
- if ((error = XCHG_MSG(ps, pu, pm, sizeof(*fpo))) != 0)
+ if ((error = xchg_msg(pu, opc, pm, sizeof(*fpo), wait_reply)) != 0)
goto out;
fpo = GET_OUTPAYLOAD(ps, pm, fuse_poll_out);
@@ -1610,7 +1666,8 @@
PERFUSE_NODE_DATA(opc)->pnd_ino, ffi->fh);
#endif
- if ((error = XCHG_MSG(ps, pu, pm, NO_PAYLOAD_REPLY_LEN)) != 0)
+ if ((error = xchg_msg(pu, opc, pm,
+ NO_PAYLOAD_REPLY_LEN, wait_reply)) != 0)
goto out;
/*
@@ -1637,7 +1694,7 @@
ps->ps_destroy_msg(pm);
if (open_self)
- (void)node_close_common(pu, opc, FWRITE);
+ (void)perfuse_node_close_common(pu, opc, FWRITE);
return error;
}
@@ -1700,7 +1757,7 @@
path = _GET_INPAYLOAD(ps, pm, char *);
(void)strlcpy(path, name, len);
- if ((error = XCHG_MSG(ps, pu, pm, UNSPEC_REPLY_LEN)) != 0)
+ if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
goto out;
if (puffs_inval_namecache_dir(pu, opc) != 0)
@@ -1756,7 +1813,7 @@
fli->oldnodeid = PERFUSE_NODE_DATA(pn)->pnd_ino;
(void)strlcpy((char *)(void *)(fli + 1), name, len - sizeof(*fli));
- error = XCHG_MSG(ps, pu, pm, UNSPEC_REPLY_LEN);
+ error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply);
ps->ps_destroy_msg(pm);
@@ -1808,7 +1865,7 @@
np += oldname_len;
(void)strlcpy(np, newname, newname_len);
- if ((error = XCHG_MSG(ps, pu, pm, UNSPEC_REPLY_LEN)) != 0)
+ if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
goto out;
/*
@@ -1898,7 +1955,7 @@
path = _GET_INPAYLOAD(ps, pm, char *);
(void)strlcpy(path, name, len);
- if ((error = XCHG_MSG(ps, pu, pm, UNSPEC_REPLY_LEN)) != 0)
+ if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
goto out;
if (puffs_inval_namecache_dir(pu, opc) != 0)
@@ -1982,6 +2039,7 @@
int error;
int open_self;
uint64_t fd_offset;
+ size_t fd_maxlen;
pm = NULL;
error = 0;
@@ -2033,6 +2091,7 @@
pnd->pnd_all_fd = NULL;
pnd->pnd_all_fd_len = 0;
fd_offset = 0;
+ fd_maxlen = ps->ps_max_readahead - sizeof(*foh);
do {
size_t fd_len;
@@ -2042,24 +2101,17 @@
/*
* read_flags, lock_owner and flags are unused in libfuse
- *
- * XXX if fri->size is too big (bigger than PAGE_SIZE?), * we get strange bugs. ktrace shows 16 bytes or garbage
- * at the end of sent frames, but perfused does not receive
- * that data. The data length is hoverver the same, which
- * cause perfused to use the last 16 bytes of the frame
- * as the frame header of the next frame.
- *
- * This may be a kernel bug.
*/
fri = GET_INPAYLOAD(ps, pm, fuse_read_in);
fri->fh = fh;
fri->offset = fd_offset;
- fri->size = PAGE_SIZE - sizeof(struct fuse_out_header);
+ fri->size = fd_maxlen;
fri->read_flags = 0;
fri->lock_owner = 0;
fri->flags = 0;
- if ((error = XCHG_MSG(ps, pu, pm, UNSPEC_REPLY_LEN)) != 0)
+ if ((error = xchg_msg(pu, opc, pm,
+ UNSPEC_REPLY_LEN, wait_reply)) != 0)
goto out;
/*
@@ -2071,8 +2123,7 @@
foh_len = foh->len;
/*
- * It seems that the only way to discover the end
- * of the buffer is to get an empty read
+ * Empty read: we reached the end of the buffer.
*/
if (foh_len == sizeof(*foh))
break;
@@ -2103,7 +2154,15 @@
ps->ps_destroy_msg(pm);
pm = NULL;
- } while (1 /* CONSTCOND */);
+
+ /*
+ * If the buffer was not completely filled,
+ * that is, if there is room for the biggest
+ * struct dirent possible, then we are done:
+ * no need to issue another READDIR to see
+ * an empty reply.
+ */
+ } while (foh_len >= fd_maxlen - (sizeof(*fd) + MAXPATHLEN));
if (fuse_to_dirent(pu, opc, pnd->pnd_all_fd, pnd->pnd_all_fd_len) == -1)
error = EIO;
@@ -2170,7 +2229,7 @@
pm = ps->ps_new_msg(pu, opc, FUSE_READLINK, 0, pcr);
- if ((error = XCHG_MSG(ps, pu, pm, UNSPEC_REPLY_LEN)) != 0)
+ if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
goto out;
foh = GET_OUTHDR(ps, pm);
@@ -2243,7 +2302,7 @@
/*
* Make sure all operation are finished
- * There can be an ongoing write, or queued operations
+ * There can be an ongoing write or
*/
while (pnd->pnd_flags & PND_INWRITE) {
requeue_request(pu, opc, PCQ_AFTERWRITE);
@@ -2267,10 +2326,10 @@
* Close open files
*/
if (pnd->pnd_flags & PND_WFH)
- (void)node_close_common(pu, opc, FWRITE);
+ (void)perfuse_node_close_common(pu, opc, FWRITE);
if (pnd->pnd_flags & PND_RFH)
- (void)node_close_common(pu, opc, FREAD);
+ (void)perfuse_node_close_common(pu, opc, FREAD);
/*
* And send the FORGET message
@@ -2281,13 +2340,14 @@
ffi->nlookup = pnd->pnd_nlookup;
/*
- * No reply is expected, pm is freed in XCHG_MSG
+ * No reply is expected, pm is freed in xchg_msg
*/
- (void)XCHG_MSG_NOREPLY(ps, pu, pm, UNSPEC_REPLY_LEN);
+ (void)xchg_msg(pu, (puffs_cookie_t)pn,
+ pm, UNSPEC_REPLY_LEN, no_reply);
parent_pn = pnd->pnd_parent;
- perfuse_destroy_pn(pn);
+ perfuse_destroy_pn(pu, pn);
puffs_pn_put(pn);
pn = parent_pn;
@@ -2370,7 +2430,7 @@
PERFUSE_NODE_DATA(opc)->pnd_ino, fli->fh);
#endif
- if ((error = XCHG_MSG(ps, pu, pm, sizeof(*flo))) != 0)
+ if ((error = xchg_msg(pu, opc, pm, sizeof(*flo), wait_reply)) != 0)
goto out;
flo = GET_OUTPAYLOAD(ps, pm, fuse_lk_out);
@@ -2416,7 +2476,6 @@
struct fuse_read_in *fri;
struct fuse_out_header *foh;
size_t readen;
- size_t requested;
int error;
ps = puffs_getspecific(pu);
@@ -2426,27 +2485,19 @@
if (puffs_pn_getvap((struct puffs_node *)opc)->va_type == VDIR)
return EBADF;
- requested = *resid;
- if ((ps->ps_readahead + requested) > ps->ps_max_readahead) {
- if (perfuse_diagflags & PDF_REQUEUE)
- DPRINTF("readahead = %zd\n", ps->ps_readahead);
- requeue_request(pu, opc, PCQ_READ);
- }
- ps->ps_readahead += requested;
-
do {
+ size_t max_read;
+
+ max_read = ps->ps_max_readahead - sizeof(*foh);
/*
* flags may be set to FUSE_READ_LOCKOWNER
* if lock_owner is provided.
- *
- * XXX See comment about fri->size in perfuse_node_readdir
- * We encounter the same bug here.
*/
pm = ps->ps_new_msg(pu, opc, FUSE_READ, sizeof(*fri), pcr);
fri = GET_INPAYLOAD(ps, pm, fuse_read_in);
fri->fh = perfuse_get_fh(opc, FREAD);
fri->offset = offset;
- fri->size = (uint32_t)MIN(*resid, PAGE_SIZE - sizeof(*foh));
+ fri->size = (uint32_t)MIN(*resid, max_read);
fri->read_flags = 0; /* XXX Unused by libfuse? */
fri->lock_owner = pnd->pnd_lock_owner;
fri->flags = 0;
@@ -2457,7 +2508,7 @@
DPRINTF("%s: opc = %p, ino = %"PRId64", fh = 0x%"PRIx64"\n",
__func__, (void *)opc, pnd->pnd_ino, fri->fh);
#endif
- error = XCHG_MSG(ps, pu, pm, UNSPEC_REPLY_LEN);
+ error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply);
if (error != 0)
goto out;
@@ -2465,6 +2516,12 @@
foh = GET_OUTHDR(ps, pm);
readen = foh->len - sizeof(*foh);
+#ifdef PERFUSE_DEBUG
+ if (readen > *resid)
+ DERRX(EX_SOFTWARE, "%s: Unexpected big read %zd",
+ __func__, readen);
+#endif
+
(void)memcpy(buf, _GET_OUTPAYLOAD(ps, pm, char *), readen);
buf += readen;
@@ -2484,10 +2541,6 @@
if (pm != NULL)
ps->ps_destroy_msg(pm);
- ps->ps_readahead -= requested;
-
- (void)dequeue_requests(ps, opc, PCQ_READ, 1);
-
return error;
}
@@ -2509,7 +2562,6 @@
size_t data_len;
size_t payload_len;
size_t written;
- size_t requested;
int error;
ps = puffs_getspecific(pu);
@@ -2520,27 +2572,28 @@
if (puffs_pn_getvap((struct puffs_node *)opc)->va_type == VDIR)
return EBADF;
+ /*
+ * We need to queue write requests in order to avoid
+ * dequeueing PCQ_AFTERWRITE when there are pending writes.
+ */
while (pnd->pnd_flags & PND_INWRITE)
requeue_request(pu, opc, PCQ_WRITE);
pnd->pnd_flags |= PND_INWRITE;
-
- requested = *resid;
- if ((ps->ps_write + requested) > ps->ps_max_write) {
- if (perfuse_diagflags & PDF_REQUEUE)
- DPRINTF("write = %zd\n", ps->ps_write);
- requeue_request(pu, opc, PCQ_WRITE);
- }
- ps->ps_write += requested;
-
do {
+ size_t max_write;
/*
- * It seems libfuse does not expects big chunks, so
- * send it page per page. The writepage feature is
- * probably there to minmize data movement.
- * XXX use ps->ps_maxwrite?
+ * There is a writepage flag when data
+ * is PAGE_SIZE-aligned. Use it for
+ * everything but the data after the last
+ * page boundary.
*/
- data_len = MIN(*resid, PAGE_SIZE);
+ max_write = ps->ps_max_write - sizeof(*fwi);
+
+ data_len = MIN(*resid, max_write);
+ if (data_len > PAGE_SIZE)
+ data_len = data_len & ~(PAGE_SIZE - 1);
+
payload_len = data_len + sizeof(*fwi);
/*
@@ -2565,11 +2618,17 @@
DPRINTF("%s: opc = %p, ino = %"PRId64", fh = 0x%"PRIx64"\n",
__func__, (void *)opc, pnd->pnd_ino, fwi->fh);
#endif
- if ((error = XCHG_MSG(ps, pu, pm, sizeof(*fwo))) != 0)
+ if ((error = xchg_msg(pu, opc, pm,
+ sizeof(*fwo), wait_reply)) != 0)
goto out;
fwo = GET_OUTPAYLOAD(ps, pm, fuse_write_out);
written = fwo->size;
+#ifdef PERFUSE_DEBUG
+ if (written > *resid)
+ DERRX(EX_SOFTWARE, "%s: Unexpected big write %zd",
+ __func__, written);
+#endif
*resid -= written;
offset += written;
buf += written;
@@ -2605,9 +2664,6 @@
if (pm != NULL)
ps->ps_destroy_msg(pm);
- ps->ps_write -= requested;
-
-
/*
* If there are no more queued write, we can resume
* an operation awaiting write completion.
Index: src/lib/libperfuse/perfuse.c
diff -u src/lib/libperfuse/perfuse.c:1.6 src/lib/libperfuse/perfuse.c:1.7
--- src/lib/libperfuse/perfuse.c:1.6 Wed Sep 15 01:51:43 2010
+++ src/lib/libperfuse/perfuse.c Mon Sep 20 07:00:21 2010
@@ -1,4 +1,4 @@
-/* $NetBSD: perfuse.c,v 1.6 2010/09/15 01:51:43 manu Exp $ */
+/* $NetBSD: perfuse.c,v 1.7 2010/09/20 07:00:21 manu Exp $ */
/*-
* Copyright (c) 2010 Emmanuel Dreyfus. All rights reserved.
@@ -58,6 +58,7 @@
(void)memset(ps, 0, sizeof(*ps));
ps->ps_max_write = UINT_MAX;
ps->ps_max_readahead = UINT_MAX;
+ TAILQ_INIT(&ps->ps_pnd);
return ps;
}
@@ -220,7 +221,7 @@
struct perfuse_mount_out *pmo;
#if (PERFUSE_SOCKTYPE == SOCK_DGRAM)
struct sockaddr_storage ss;
- struct sockaddr_un sun;
+ struct sockaddr_un *sun;
struct sockaddr *sa;
socklen_t sa_len;
#endif
@@ -246,22 +247,22 @@
sock_len = 0;
#if (PERFUSE_SOCKTYPE == SOCK_DGRAM)
sa = (struct sockaddr *)(void *)&ss;
+ sun = (struct sockaddr_un *)(void *)&ss;
sa_len = sizeof(ss);
if ((getpeername(s, sa, &sa_len) == 0) &&
(sa->sa_family = AF_LOCAL) &&
- (strcmp(((struct sockaddr_un *)sa)->sun_path, _PATH_FUSE) == 0)) {
+ (strcmp(sun->sun_path, _PATH_FUSE) == 0)) {
- sa = (struct sockaddr *)(void *)&sun;
- sun.sun_len = sizeof(sun);
- sun.sun_family = AF_LOCAL;
- (void)sprintf(sun.sun_path, "%s/%s-%d",
+ sun->sun_len = sizeof(*sun);
+ sun->sun_family = AF_LOCAL;
+ (void)sprintf(sun->sun_path, "%s/%s-%d",
_PATH_TMP, getprogname(), getpid());
- if (bind(s, sa, sa->sa_len) != 0)
+ if (bind(s, sa, (socklen_t)sa->sa_len) != 0)
DERR(EX_OSERR, "%s:%d bind to \"%s\" failed",
- __func__, __LINE__, sun.sun_path);
+ __func__, __LINE__, sun->sun_path);
- sock_len = strlen(sun.sun_path) + 1;
+ sock_len = strlen(sun->sun_path) + 1;
}
#endif /* PERFUSE_SOCKTYPE */
@@ -317,7 +318,7 @@
}
if (sock_len != 0) {
- (void)strcpy(cp, sun.sun_path);
+ (void)strcpy(cp, sun->sun_path);
cp += pmo->pmo_sock_len;
}
Index: src/lib/libperfuse/perfuse_priv.h
diff -u src/lib/libperfuse/perfuse_priv.h:1.10 src/lib/libperfuse/perfuse_priv.h:1.11
--- src/lib/libperfuse/perfuse_priv.h:1.10 Wed Sep 15 01:51:43 2010
+++ src/lib/libperfuse/perfuse_priv.h Mon Sep 20 07:00:22 2010
@@ -1,4 +1,4 @@
-/* $NetBSD: perfuse_priv.h,v 1.10 2010/09/15 01:51:43 manu Exp $ */
+/* $NetBSD: perfuse_priv.h,v 1.11 2010/09/20 07:00:22 manu Exp $ */
/*-
* Copyright (c) 2010 Emmanuel Dreyfus. All rights reserved.
@@ -37,6 +37,17 @@
#include "perfuse_if.h"
#include "fuse.h"
+/*
+ * When a file is created, it is open for the filesystem, but not
+ * for the kernel. We keep the file open to avoid re-open it, but
+ * once we open PERFUSE_OPENFS_MAXFILES files, we start closing
+ * on our own any file that has not been open for PERFUSE_OPENFS_TIMEOUT
+ * seconds. This is to avoid file leaks and getting "Too many open
+ * files in system"
+ */
+#define PERFUSE_OPENFS_TIMEOUT 3
+#define PERFUSE_OPENFS_MAXFILES 32
+
struct perfuse_state {
void *ps_private; /* Private field for libperfuse user */
struct puffs_usermount *ps_pu;
@@ -58,8 +69,6 @@
char *ps_filesystemtype;
int ps_mountflags;
uint64_t ps_unique;
- size_t ps_readahead;
- size_t ps_write;
perfuse_new_msg_fn ps_new_msg;
perfuse_xchg_msg_fn ps_xchg_msg;
perfuse_destroy_msg_fn ps_destroy_msg;
@@ -67,10 +76,13 @@
perfuse_get_inpayload_fn ps_get_inpayload;
perfuse_get_outhdr_fn ps_get_outhdr;
perfuse_get_outpayload_fn ps_get_outpayload;
+ TAILQ_HEAD(, perfuse_node_data) ps_pnd;
+ int ps_pnd_count;
};
-enum perfuse_qtype { PCQ_READDIR, PCQ_READ, PCQ_WRITE, PCQ_AFTERWRITE };
+enum perfuse_qtype { PCQ_READDIR, PCQ_READ, PCQ_WRITE, PCQ_AFTERWRITE };
+
#ifdef PERFUSE_DEBUG
extern const char *perfuse_qtypestr[];
#endif
@@ -81,7 +93,6 @@
TAILQ_ENTRY(perfuse_cc_queue) pcq_next;
};
-
struct perfuse_node_data {
uint64_t pnd_rfh;
uint64_t pnd_wfh;
@@ -95,18 +106,22 @@
size_t pnd_all_fd_len;
TAILQ_HEAD(,perfuse_cc_queue) pnd_pcq; /* queued requests */
int pnd_flags;
-#define PND_RECLAIMED 0x01 /* reclaim pending */
-#define PND_INREADDIR 0x02 /* readdir in progress */
-#define PND_DIRTY 0x04 /* There is some data to sync */
-#define PND_RFH 0x08 /* Read FH allocated */
-#define PND_WFH 0x10 /* Write FH allocated */
-#define PND_REMOVED 0x20 /* Node was removed */
-#define PND_INWRITE 0x40 /* write in progress */
+#define PND_RECLAIMED 0x001 /* reclaim pending */
+#define PND_INREADDIR 0x002 /* readdir in progress */
+#define PND_DIRTY 0x004 /* There is some data to sync */
+#define PND_RFH 0x008 /* Read FH allocated */
+#define PND_WFH 0x010 /* Write FH allocated */
+#define PND_REMOVED 0x020 /* Node was removed */
+#define PND_INWRITE 0x040 /* write in progress */
+#define PND_OPENFS 0x080 /* Open by fs but not by kernel */
#define PND_OPEN (PND_RFH|PND_WFH) /* File is open */
#define PND_BUSY (PND_INREADDIR|PND_INWRITE)
puffs_cookie_t pnd_parent;
int pnd_childcount;
+ time_t pnd_timestamp;
+ TAILQ_ENTRY(perfuse_node_data) pnd_next;
+ puffs_cookie_t pnd_pn;
};
#define PERFUSE_NODE_DATA(opc) \
@@ -125,19 +140,16 @@
(struct type *)(void *)ps->ps_get_outpayload(pm)
#define _GET_OUTPAYLOAD(ps, pm, type) (type)ps->ps_get_outpayload(pm)
-#define XCHG_MSG(ps, pu, opc, len) ps->ps_xchg_msg(pu, opc, len, wait_reply)
-#define XCHG_MSG_NOREPLY(ps, pu, opc, len) \
- ps->ps_xchg_msg(pu, opc, len, no_reply)
-
__BEGIN_DECLS
struct puffs_node *perfuse_new_pn(struct puffs_usermount *,
struct puffs_node *);
-void perfuse_destroy_pn(struct puffs_node *);
-void perfuse_new_fh(puffs_cookie_t, uint64_t, int);
+void perfuse_destroy_pn(struct puffs_usermount *, struct puffs_node *);
+void perfuse_new_fh(struct puffs_usermount *, puffs_cookie_t, uint64_t, int);
void perfuse_destroy_fh(puffs_cookie_t, uint64_t);
uint64_t perfuse_get_fh(puffs_cookie_t, int);
uint64_t perfuse_next_unique(struct puffs_usermount *);
+int perfuse_node_close_common(struct puffs_usermount *, puffs_cookie_t, int);
char *perfuse_fs_mount(int, ssize_t);
Index: src/lib/libperfuse/subr.c
diff -u src/lib/libperfuse/subr.c:1.4 src/lib/libperfuse/subr.c:1.5
--- src/lib/libperfuse/subr.c:1.4 Fri Sep 3 07:15:18 2010
+++ src/lib/libperfuse/subr.c Mon Sep 20 07:00:22 2010
@@ -1,4 +1,4 @@
-/* $NetBSD: subr.c,v 1.4 2010/09/03 07:15:18 manu Exp $ */
+/* $NetBSD: subr.c,v 1.5 2010/09/20 07:00:22 manu Exp $ */
/*-
* Copyright (c) 2010 Emmanuel Dreyfus. All rights reserved.
@@ -41,9 +41,12 @@
struct puffs_usermount *pu;
struct puffs_node *parent;
{
+ struct perfuse_state *ps;
struct puffs_node *pn;
struct perfuse_node_data *pnd;
+ ps = puffs_getspecific(pu);
+
if ((pnd = malloc(sizeof(*pnd))) == NULL)
DERR(EX_OSERR, "malloc failed");
@@ -56,8 +59,13 @@
pnd->pnd_ino = PERFUSE_UNKNOWN_INO;
pnd->pnd_nlookup = 1;
pnd->pnd_parent = parent;
+ pnd->pnd_timestamp = time(NULL);
+ pnd->pnd_pn = (puffs_cookie_t)pn;
TAILQ_INIT(&pnd->pnd_pcq);
+ TAILQ_INSERT_TAIL(&ps->ps_pnd, pnd, pnd_next);
+ ps->ps_pnd_count++;
+
if (parent != NULL)
PERFUSE_NODE_DATA(parent)->pnd_childcount++;
@@ -65,11 +73,19 @@
}
void
-perfuse_destroy_pn(pn)
+perfuse_destroy_pn(pu, pn)
+ struct puffs_usermount *pu;
struct puffs_node *pn;
{
+ struct perfuse_state *ps;
struct perfuse_node_data *pnd;
+ ps = puffs_getspecific(pu);
+ pnd = PERFUSE_NODE_DATA(pn);
+
+ TAILQ_REMOVE(&ps->ps_pnd, pnd, pnd_next);
+ ps->ps_pnd_count--;
+
if ((pnd = puffs_pn_getpriv(pn)) != NULL) {
if (pnd->pnd_parent != NULL)
PERFUSE_NODE_DATA(pnd->pnd_parent)->pnd_childcount--;
@@ -97,13 +113,40 @@
void
-perfuse_new_fh(opc, fh, mode)
+perfuse_new_fh(pu, opc, fh, mode)
+ struct puffs_usermount *pu;
puffs_cookie_t opc;
uint64_t fh;
int mode;
{
+ struct perfuse_state *ps;
struct perfuse_node_data *pnd;
+ ps = puffs_getspecific(pu);
+
+ /*
+ * Nodes file with PND_OPENFS are open by the filesystem but
+ * not by the kernel, because of a CREATE operation. If
+ * the kernel never opens them, we have a leak to fix.
+ * If we have enough open files, we start closing the
+ * one that had been open for too long.
+ */
+ if (ps->ps_pnd_count > PERFUSE_OPENFS_MAXFILES) {
+ time_t now;
+
+ now = time(NULL);
+
+ TAILQ_FOREACH(pnd, &ps->ps_pnd, pnd_next) {
+ if ((pnd->pnd_ino == FUSE_ROOT_ID) ||
+ !(pnd->pnd_flags & PND_OPENFS) ||
+ (now < pnd->pnd_timestamp + PERFUSE_OPENFS_TIMEOUT))
+ continue;
+
+ pnd->pnd_flags &= ~PND_OPENFS;
+ perfuse_node_close_common(pu, pnd->pnd_pn, FWRITE);
+ }
+ }
+
pnd = PERFUSE_NODE_DATA(opc);
if (mode & FWRITE) {