Module Name:    src
Committed By:   manu
Date:           Fri Sep  9 15:45:28 UTC 2011

Modified Files:
        src/lib/libperfuse: ops.c perfuse_if.h perfuse_priv.h
        src/usr.sbin/perfused: Makefile perfused.c

Log Message:
Serialize access to file size. We already have such a thing in the
kernel, where it fixes race for PUFFS filesystems, but we need it again
in perfused since FUSE filesystems are allowed to reorder requests.

The huge issue is in the asyncrhonous SETATTR sent by fsync. It is
followed by a syncrhnous FSYNC, so if the filesystem does not reorder
requests, once the FSYNC returns, we are confident the SETATTR is done.
But since FUSE can reorder, we need to implement sync in perfused.


To generate a diff of this commit:
cvs rdiff -u -r1.39 -r1.40 src/lib/libperfuse/ops.c
cvs rdiff -u -r1.15 -r1.16 src/lib/libperfuse/perfuse_if.h
cvs rdiff -u -r1.21 -r1.22 src/lib/libperfuse/perfuse_priv.h
cvs rdiff -u -r1.3 -r1.4 src/usr.sbin/perfused/Makefile
cvs rdiff -u -r1.14 -r1.15 src/usr.sbin/perfused/perfused.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/lib/libperfuse/ops.c
diff -u src/lib/libperfuse/ops.c:1.39 src/lib/libperfuse/ops.c:1.40
--- src/lib/libperfuse/ops.c:1.39	Sat Aug 13 23:12:15 2011
+++ src/lib/libperfuse/ops.c	Fri Sep  9 15:45:28 2011
@@ -1,4 +1,4 @@
-/*  $NetBSD: ops.c,v 1.39 2011/08/13 23:12:15 christos Exp $ */
+/*  $NetBSD: ops.c,v 1.40 2011/09/09 15:45:28 manu Exp $ */
 
 /*-
  *  Copyright (c) 2010-2011 Emmanuel Dreyfus. All rights reserved.
@@ -1487,14 +1487,22 @@
 {
 	perfuse_msg_t *pm;
 	struct perfuse_state *ps;
+	struct perfuse_node_data *pnd = PERFUSE_NODE_DATA(opc);
 	struct fuse_getattr_in *fgi;
 	struct fuse_attr_out *fao;
 	u_quad_t va_size;
 	int error;
 	
-	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
+	if (pnd->pnd_flags & PND_REMOVED)
 		return ENOENT;
 
+	/* 
+	 * Serialize size access, see comment in perfuse_node_setattr().
+	 */
+	while (pnd->pnd_flags & PND_INRESIZE)
+		requeue_request(pu, opc, PCQ_RESIZE);
+	pnd->pnd_flags |= PND_INRESIZE;
+
 	ps = puffs_getspecific(pu);
 	va_size = vap->va_size;
 
@@ -1513,11 +1521,22 @@
 		fgi->getattr_flags |= FUSE_GETATTR_FH;
 	}
 
+#ifdef PERFUSE_DEBUG
+	if (perfuse_diagflags & PDF_RESIZE)
+		DPRINTF(">> %s %p %lld\n", __func__, (void *)opc, va_size);
+#endif
+
 	if ((error = xchg_msg(pu, opc, pm, sizeof(*fao), wait_reply)) != 0)
 		goto out;
 
 	fao = GET_OUTPAYLOAD(ps, pm, fuse_attr_out);
 
+#ifdef PERFUSE_DEBUG
+	if (perfuse_diagflags & PDF_RESIZE)
+		DPRINTF("<< %s %p %lld -> %lld\n", __func__, (void *)opc, 
+			va_size, fao->attr.size);
+#endif
+
 	/* 
 	 * The message from filesystem has a cache timeout
 	 * XXX this is ignored yet, is that right?
@@ -1528,16 +1547,12 @@
 	 */
 	fuse_attr_to_vap(ps, vap, &fao->attr);
 
-	/*
-	 * If a write is in progress, do not trust filesystem opinion 
-	 * of file size, use the one from kernel.
-	 */
-	if ((PERFUSE_NODE_DATA(opc)->pnd_flags & PND_INWRITE) &&
-	    (va_size != (u_quad_t)PUFFS_VNOVAL))
-		vap->va_size = MAX(va_size, vap->va_size);;
 out:
 	ps->ps_destroy_msg(pm);
 
+	pnd->pnd_flags &= ~PND_INRESIZE;
+	(void)dequeue_requests(ps, opc, PCQ_RESIZE, DEQUEUE_ALL);
+
 	return error;
 }
 
@@ -1555,8 +1570,11 @@
 	struct fuse_setattr_in *fsi;
 	struct fuse_attr_out *fao;
 	struct vattr *old_va;
-	u_quad_t va_size;
 	int error;
+#ifdef PERFUSE_DEBUG
+	struct vattr *old_vap;
+	int resize_debug = 0;
+#endif
 
 	ps = puffs_getspecific(pu);
 	pnd = PERFUSE_NODE_DATA(opc);
@@ -1611,16 +1629,6 @@
 				old_va->va_type, vap->va_mode, pcr)) != 0)
 		return EACCES;
 	
-	/*
-	 * If a write is in progress, set the highest
-	 * value in the filesystem, otherwise we break 
-	 * IO_APPEND.
-	 */
-	va_size = vap->va_size;
-	if ((pnd->pnd_flags & PND_INWRITE) &&
-	    (va_size != (u_quad_t)PUFFS_VNOVAL))
-		va_size = MAX(va_size, old_va->va_size);
-
 	pm = ps->ps_new_msg(pu, opc, FUSE_SETATTR, sizeof(*fsi), pcr);
 	fsi = GET_INPAYLOAD(ps, pm, fuse_setattr_in);
 	fsi->valid = 0;
@@ -1634,9 +1642,19 @@
 		fsi->valid |= FUSE_FATTR_FH;
 	}
 
-	if (va_size != (u_quad_t)PUFFS_VNOVAL) {
-		fsi->size = va_size;
+	if (vap->va_size != (u_quad_t)PUFFS_VNOVAL) {
+		fsi->size = vap->va_size;
 		fsi->valid |= FUSE_FATTR_SIZE;
+
+		/* 
+		 * Serialize anything that can touch file size
+		 * to avoid reordered GETATTR and SETATTR.
+		 * Out of order SETATTR can report stale size,
+		 * which will cause the kernel to truncate the file.
+		 */
+		while (pnd->pnd_flags & PND_INRESIZE)
+			requeue_request(pu, opc, PCQ_RESIZE);
+		pnd->pnd_flags |= PND_INRESIZE;
 	}
 
 	/*
@@ -1696,7 +1714,7 @@
 	 * Try to adapt and remove FATTR_ATIME|FATTR_MTIME
 	 * if we suspect a ftruncate().
 	 */ 
-	if ((va_size != (u_quad_t)PUFFS_VNOVAL) &&
+	if ((vap->va_size != (u_quad_t)PUFFS_VNOVAL) &&
 	    ((vap->va_mode == (mode_t)PUFFS_VNOVAL) &&
 	     (vap->va_uid == (uid_t)PUFFS_VNOVAL) &&
 	     (vap->va_gid == (gid_t)PUFFS_VNOVAL))) {
@@ -1716,6 +1734,19 @@
 		goto out;
 	}
 
+#ifdef PERFUSE_DEBUG
+	old_vap = puffs_pn_getvap((struct puffs_node *)opc);
+
+	if ((perfuse_diagflags & PDF_RESIZE) &&
+	    (old_vap->va_size != (u_quad_t)PUFFS_VNOVAL)) {
+		resize_debug = 1;
+
+		DPRINTF(">> %s %p %lld -> %lld\n", __func__, (void *)opc, 
+			puffs_pn_getvap((struct puffs_node *)opc)->va_size, 
+			fsi->size);
+	}
+#endif
+
 	if ((error = xchg_msg(pu, opc, pm, sizeof(*fao), wait_reply)) != 0)
 		goto out;
 
@@ -1723,12 +1754,23 @@
 	 * Copy back the new values 
 	 */
 	fao = GET_OUTPAYLOAD(ps, pm, fuse_attr_out);
+
+#ifdef PERFUSE_DEBUG
+	if (resize_debug)
+		DPRINTF("<< %s %p %lld -> %lld\n", __func__, (void *)opc, 
+			old_vap->va_size, fao->attr.size);
+#endif
+
 	fuse_attr_to_vap(ps, old_va, &fao->attr);
 out:
-
 	if (pm != NULL)
 		ps->ps_destroy_msg(pm);
 
+	if (pnd->pnd_flags & PND_INRESIZE) {
+		pnd->pnd_flags &= ~PND_INRESIZE;
+		(void)dequeue_requests(ps, opc, PCQ_RESIZE, DEQUEUE_ALL);
+	}
+
 	return error;
 }
 
@@ -2781,6 +2823,7 @@
 {
 	struct perfuse_state *ps;
 	struct perfuse_node_data *pnd;
+	const struct vattr *vap;
 	perfuse_msg_t *pm;
 	struct fuse_read_in *fri;
 	struct fuse_out_header *foh;
@@ -2789,8 +2832,13 @@
 	
 	ps = puffs_getspecific(pu);
 	pnd = PERFUSE_NODE_DATA(opc);
+	vap = puffs_pn_getvap((struct puffs_node *)opc);
 	pm = NULL;
 
+	if (offset + *resid > vap->va_size)
+		DWARNX("%s %p read %lld@%d beyond EOF %lld\n",
+		       __func__, (void *)opc, offset, *resid, vap->va_size);
+
 	do {
 		size_t max_read;
 
@@ -2869,12 +2917,14 @@
 	size_t data_len;
 	size_t payload_len;
 	size_t written;
+	int inresize;
 	int error;
 	
 	ps = puffs_getspecific(pu);
 	pnd = PERFUSE_NODE_DATA(opc);
 	vap = puffs_pn_getvap((struct puffs_node *)opc);
 	written = 0;
+	inresize = 0;
 	pm = NULL;
 
 	if (vap->va_type == VDIR) 
@@ -2888,11 +2938,23 @@
 		requeue_request(pu, opc, PCQ_WRITE);
 	pnd->pnd_flags |= PND_INWRITE;
 
+	/* 
+	 * Serialize size access, see comment in perfuse_node_setattr().
+	 */
+	if (offset + *resid > vap->va_size) {
+		while (pnd->pnd_flags & PND_INRESIZE)
+			requeue_request(pu, opc, PCQ_RESIZE);
+		pnd->pnd_flags |= PND_INRESIZE;
+		inresize = 1;
+	}
+
 	/*
 	 * append flag: re-read the file size so that 
 	 * we get the latest value.
 	 */
 	if (ioflag & PUFFS_IO_APPEND) {
+		DWARNX("%s: PUFFS_IO_APPEND set, untested code", __func__);
+
 		if ((error = perfuse_node_getattr(pu, opc, vap, pcr)) != 0)
 			goto out;
 
@@ -2901,6 +2963,12 @@
 
 	pm = NULL;
 
+#ifdef PERFUSE_DEBUG
+	if (perfuse_diagflags & PDF_RESIZE)
+		DPRINTF(">> %s %p %lld \n", __func__,
+			(void *)opc, vap->va_size);
+#endif
+
 	do {
 		size_t max_write;
 		/*
@@ -2967,12 +3035,32 @@
 	if (*resid != 0)
 		error = EFBIG;
 
+#ifdef PERFUSE_DEBUG
+	if (perfuse_diagflags & PDF_RESIZE) {
+		if (offset > (off_t)vap->va_size)
+			DPRINTF("<< %s %p %lld -> %lld\n", __func__, 
+				(void *)opc, vap->va_size, offset);
+		else
+			DPRINTF("<< %s %p \n", __func__, (void *)opc);
+	}
+#endif
+
 	/*
 	 * Update file size if we wrote beyond the end
 	 */
 	if (offset > (off_t)vap->va_size) 
 		vap->va_size = offset;
 
+	if (inresize) {
+#ifdef PERFUSE_DEBUG
+		if (!(pnd->pnd_flags & PND_INRESIZE))
+			DERRX(EX_SOFTWARE, "file write grow without resize");
+#endif
+		pnd->pnd_flags &= ~PND_INRESIZE;
+		(void)dequeue_requests(ps, opc, PCQ_RESIZE, DEQUEUE_ALL);
+	}
+
+
 	/*
 	 * Statistics
 	 */

Index: src/lib/libperfuse/perfuse_if.h
diff -u src/lib/libperfuse/perfuse_if.h:1.15 src/lib/libperfuse/perfuse_if.h:1.16
--- src/lib/libperfuse/perfuse_if.h:1.15	Sun Aug 14 08:19:04 2011
+++ src/lib/libperfuse/perfuse_if.h	Fri Sep  9 15:45:28 2011
@@ -1,4 +1,4 @@
-/*  $NetBSD: perfuse_if.h,v 1.15 2011/08/14 08:19:04 christos Exp $ */
+/*  $NetBSD: perfuse_if.h,v 1.16 2011/09/09 15:45:28 manu Exp $ */
 
 /*-
  *  Copyright (c) 2010-2011 Emmanuel Dreyfus. All rights reserved.
@@ -40,18 +40,19 @@
  * Diagnostic flags. This global is used only for DPRINTF/DERR/DWARN
  */
 extern int perfuse_diagflags;
-#define PDF_FOREGROUND	0x001	/* we run in foreground */
-#define PDF_FUSE	0x002	/* Display FUSE reqeusts and reply */
-#define PDF_DUMP	0x004	/* Dump FUSE frames */
-#define PDF_PUFFS	0x008	/* Display PUFFS requets and reply */
-#define PDF_FH		0x010	/* File handles */
-#define PDF_RECLAIM	0x020	/* Reclaimed files */
-#define PDF_READDIR	0x040	/* readdir operations */
-#define PDF_REQUEUE	0x080	/* reueued messages */
-#define PDF_SYNC	0x100	/* fsync and dirty flags */
-#define PDF_MISC	0x200	/* Miscelaneous messages */
-#define PDF_SYSLOG	0x400	/* use syslog */
-#define PDF_FILENAME	0x800	/* File names */
+#define PDF_FOREGROUND	0x0001	/* we run in foreground */
+#define PDF_FUSE	0x0002	/* Display FUSE reqeusts and reply */
+#define PDF_DUMP	0x0004	/* Dump FUSE frames */
+#define PDF_PUFFS	0x0008	/* Display PUFFS requets and reply */
+#define PDF_FH		0x0010	/* File handles */
+#define PDF_RECLAIM	0x0020	/* Reclaimed files */
+#define PDF_READDIR	0x0040	/* readdir operations */
+#define PDF_REQUEUE	0x0080	/* reueued messages */
+#define PDF_SYNC	0x0100	/* fsync and dirty flags */
+#define PDF_MISC	0x0200	/* Miscelaneous messages */
+#define PDF_SYSLOG	0x0400	/* use syslog */
+#define PDF_FILENAME	0x0800	/* File names */
+#define PDF_RESIZE	0x1000	/* Resize operations */
 
 /*
  * Diagnostic functions

Index: src/lib/libperfuse/perfuse_priv.h
diff -u src/lib/libperfuse/perfuse_priv.h:1.21 src/lib/libperfuse/perfuse_priv.h:1.22
--- src/lib/libperfuse/perfuse_priv.h:1.21	Thu Jul 14 15:37:32 2011
+++ src/lib/libperfuse/perfuse_priv.h	Fri Sep  9 15:45:28 2011
@@ -1,4 +1,4 @@
-/*  $NetBSD: perfuse_priv.h,v 1.21 2011/07/14 15:37:32 manu Exp $ */
+/*  $NetBSD: perfuse_priv.h,v 1.22 2011/09/09 15:45:28 manu Exp $ */
 
 /*-
  *  Copyright (c) 2010-2011 Emmanuel Dreyfus. All rights reserved.
@@ -75,7 +75,8 @@
 	PCQ_WRITE,
 	PCQ_AFTERWRITE,
 	PCQ_OPEN,
-	PCQ_AFTERXCHG
+	PCQ_AFTERXCHG,
+	PCQ_RESIZE
 };
 
 #ifdef PERFUSE_DEBUG
@@ -110,6 +111,7 @@
 #define PND_INWRITE		0x040	/* write in progress */
 #define PND_INOPEN		0x100	/* open in progress */
 #define PND_INXCHG		0x400	/* FUSE exchange in progress */
+#define PND_INRESIZE		0x800	/* resize in progress */
 
 #define PND_OPEN		(PND_RFH|PND_WFH)	/* File is open */
 #define PND_BUSY		(PND_INREADDIR|PND_INWRITE|PND_INOPEN)

Index: src/usr.sbin/perfused/Makefile
diff -u src/usr.sbin/perfused/Makefile:1.3 src/usr.sbin/perfused/Makefile:1.4
--- src/usr.sbin/perfused/Makefile:1.3	Thu May 26 12:56:34 2011
+++ src/usr.sbin/perfused/Makefile	Fri Sep  9 15:45:28 2011
@@ -1,10 +1,8 @@
-# $NetBSD: Makefile,v 1.3 2011/05/26 12:56:34 joerg Exp $
+# $NetBSD: Makefile,v 1.4 2011/09/09 15:45:28 manu Exp $
 
 PROG=            perfused
 
-.ifdef DEBUG
 PERFUSE_OPT_DEBUG_FLAGS=   -g -DPERFUSE_DEBUG
-.endif
 
 CFLAGS+=        ${PERFUSE_OPT_DEBUG_FLAGS}
 SRCS=           perfused.c msg.c debug.c

Index: src/usr.sbin/perfused/perfused.c
diff -u src/usr.sbin/perfused/perfused.c:1.14 src/usr.sbin/perfused/perfused.c:1.15
--- src/usr.sbin/perfused/perfused.c:1.14	Tue Aug 30 20:17:01 2011
+++ src/usr.sbin/perfused/perfused.c	Fri Sep  9 15:45:28 2011
@@ -1,4 +1,4 @@
-/*  $NetBSD: perfused.c,v 1.14 2011/08/30 20:17:01 joerg Exp $ */
+/*  $NetBSD: perfused.c,v 1.15 2011/09/09 15:45:28 manu Exp $ */
 
 /*-
  *  Copyright (c) 2010 Emmanuel Dreyfus. All rights reserved.
@@ -308,6 +308,8 @@
 			retval |= PDF_MISC;
 		else if (strcmp(opt, "filename") == 0)
 			retval |= PDF_FILENAME;
+		else if (strcmp(opt, "reize") == 0)
+			retval |= PDF_RESIZE;
 		else
 			DWARNX("unknown debug flag \"%s\"", opt);
 	}

Reply via email to