Module Name:    src
Committed By:   drochner
Date:           Fri Oct 19 17:09:08 UTC 2012

Modified Files:
        src/include: mntopts.h
        src/sbin/atactl: atactl.c
        src/sbin/mount: mount.8
        src/sbin/mount_ffs: mount_ffs.c
        src/sys/dev/ata: atareg.h wd.c
        src/sys/sys: dkio.h fstypes.h
        src/sys/ufs/ffs: ffs_alloc.c ffs_extern.h ffs_vfsops.c
        src/sys/ufs/ufs: ufsmount.h

Log Message:
Implement experimental support to pass notifications that a file
was deleted from the filesystem to the disk driver, commonly
known as "discard" or "trim".
fs/driver support is in ffs and ata wd for now.
This is what was posted here:
http://mail-index.netbsd.org/tech-kern/2012/02/28/msg012813.html
with minor cleanup, and the global switch replaced by a mount option.


To generate a diff of this commit:
cvs rdiff -u -r1.14 -r1.15 src/include/mntopts.h
cvs rdiff -u -r1.66 -r1.67 src/sbin/atactl/atactl.c
cvs rdiff -u -r1.77 -r1.78 src/sbin/mount/mount.8
cvs rdiff -u -r1.27 -r1.28 src/sbin/mount_ffs/mount_ffs.c
cvs rdiff -u -r1.40 -r1.41 src/sys/dev/ata/atareg.h
cvs rdiff -u -r1.400 -r1.401 src/sys/dev/ata/wd.c
cvs rdiff -u -r1.17 -r1.18 src/sys/sys/dkio.h
cvs rdiff -u -r1.30 -r1.31 src/sys/sys/fstypes.h
cvs rdiff -u -r1.130 -r1.131 src/sys/ufs/ffs/ffs_alloc.c
cvs rdiff -u -r1.78 -r1.79 src/sys/ufs/ffs/ffs_extern.h
cvs rdiff -u -r1.278 -r1.279 src/sys/ufs/ffs/ffs_vfsops.c
cvs rdiff -u -r1.38 -r1.39 src/sys/ufs/ufs/ufsmount.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/include/mntopts.h
diff -u src/include/mntopts.h:1.14 src/include/mntopts.h:1.15
--- src/include/mntopts.h:1.14	Fri Jun 17 14:23:50 2011
+++ src/include/mntopts.h	Fri Oct 19 17:09:06 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: mntopts.h,v 1.14 2011/06/17 14:23:50 manu Exp $	*/
+/*	$NetBSD: mntopts.h,v 1.15 2012/10/19 17:09:06 drochner Exp $	*/
 
 /*-
  * Copyright (c) 1994
@@ -58,6 +58,7 @@ struct mntopt {
 #define MOPT_LOG		{ "log",	0, MNT_LOG, 0 }
 #define MOPT_IGNORE		{ "hidden",	0, MNT_IGNORE, 0 }
 #define MOPT_EXTATTR		{ "extattr",	0, MNT_EXTATTR, 0 }
+#define MOPT_DISCARD		{ "discard",	0, MNT_DISCARD, 0 }
 
 /* Control flags. */
 #define MOPT_FORCE		{ "force",	0, MNT_FORCE, 0 }

Index: src/sbin/atactl/atactl.c
diff -u src/sbin/atactl/atactl.c:1.66 src/sbin/atactl/atactl.c:1.67
--- src/sbin/atactl/atactl.c:1.66	Mon Oct 31 15:26:11 2011
+++ src/sbin/atactl/atactl.c	Fri Oct 19 17:09:07 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: atactl.c,v 1.66 2011/10/31 15:26:11 jakllsch Exp $	*/
+/*	$NetBSD: atactl.c,v 1.67 2012/10/19 17:09:07 drochner Exp $	*/
 
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
 #include <sys/cdefs.h>
 
 #ifndef lint
-__RCSID("$NetBSD: atactl.c,v 1.66 2011/10/31 15:26:11 jakllsch Exp $");
+__RCSID("$NetBSD: atactl.c,v 1.67 2012/10/19 17:09:07 drochner Exp $");
 #endif
 
 
@@ -177,6 +177,7 @@ static const struct bitinfo ata_vers[] =
 	{ WDC_VER_ATA5,	"ATA-5" },
 	{ WDC_VER_ATA6,	"ATA-6" },
 	{ WDC_VER_ATA7,	"ATA-7" },
+	{ WDC_VER_ATA8, "ATA-8" },
 	{ 0, NULL },
 };
 
@@ -1041,6 +1042,10 @@ device_identify(int argc, char *argv[])
 			    inqbuf->atap_sata_features_supp, ata_sata_feat);
 	}
 
+	if ((inqbuf->atap_ata_major & WDC_VER_ATA8) &&
+	    (inqbuf->support_dsm & ATA_SUPPORT_DSM_TRIM))
+		printf("TRIM supported\n");
+
 	return;
 }
 

Index: src/sbin/mount/mount.8
diff -u src/sbin/mount/mount.8:1.77 src/sbin/mount/mount.8:1.78
--- src/sbin/mount/mount.8:1.77	Wed Oct  3 19:36:11 2012
+++ src/sbin/mount/mount.8	Fri Oct 19 17:09:07 2012
@@ -1,4 +1,4 @@
-.\"	$NetBSD: mount.8,v 1.77 2012/10/03 19:36:11 wiz Exp $
+.\"	$NetBSD: mount.8,v 1.78 2012/10/19 17:09:07 drochner Exp $
 .\"
 .\" Copyright (c) 1980, 1989, 1991, 1993
 .\"	The Regents of the University of California.  All rights reserved.
@@ -190,6 +190,9 @@ recovery mechanism, or are willing to re
 Clear
 .Cm async
 mode.
+.It Cm discard
+Use DISCARD/TRIM commands if disk and driver support it.
+EXPERIMENTAL!
 .It Cm extattr
 Enable extended attributes, if the filesystem supports them and
 does not enable them by default.

Index: src/sbin/mount_ffs/mount_ffs.c
diff -u src/sbin/mount_ffs/mount_ffs.c:1.27 src/sbin/mount_ffs/mount_ffs.c:1.28
--- src/sbin/mount_ffs/mount_ffs.c:1.27	Mon Aug 29 14:35:01 2011
+++ src/sbin/mount_ffs/mount_ffs.c	Fri Oct 19 17:09:07 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: mount_ffs.c,v 1.27 2011/08/29 14:35:01 joerg Exp $	*/
+/*	$NetBSD: mount_ffs.c,v 1.28 2012/10/19 17:09:07 drochner Exp $	*/
 
 /*-
  * Copyright (c) 1993, 1994
@@ -39,7 +39,7 @@ __COPYRIGHT("@(#) Copyright (c) 1993, 19
 #if 0
 static char sccsid[] = "@(#)mount_ufs.c	8.4 (Berkeley) 4/26/95";
 #else
-__RCSID("$NetBSD: mount_ffs.c,v 1.27 2011/08/29 14:35:01 joerg Exp $");
+__RCSID("$NetBSD: mount_ffs.c,v 1.28 2012/10/19 17:09:07 drochner Exp $");
 #endif
 #endif /* not lint */
 
@@ -75,6 +75,7 @@ static const struct mntopt mopts[] = {
 	MOPT_LOG,
 	MOPT_GETARGS,
 	MOPT_EXTATTR,
+	MOPT_DISCARD,
 	MOPT_NULL,
 };
 

Index: src/sys/dev/ata/atareg.h
diff -u src/sys/dev/ata/atareg.h:1.40 src/sys/dev/ata/atareg.h:1.41
--- src/sys/dev/ata/atareg.h:1.40	Mon Oct 24 20:52:34 2011
+++ src/sys/dev/ata/atareg.h	Fri Oct 19 17:09:07 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: atareg.h,v 1.40 2011/10/24 20:52:34 jakllsch Exp $	*/
+/*	$NetBSD: atareg.h,v 1.41 2012/10/19 17:09:07 drochner Exp $	*/
 
 /*
  * Copyright (c) 1998, 2001 Manuel Bouyer.
@@ -90,6 +90,7 @@
 
 /* Commands for Disk Controller. */
 #define	WDCC_NOP		0x00	/* Always fail with "aborted command" */
+#define ATA_DATA_SET_MANAGEMENT	0x06
 #define	WDCC_RECAL		0x10	/* disk restore code -- resets cntlr */
 
 #define	WDCC_READ		0x20	/* disk read code */
@@ -387,6 +388,7 @@ struct ataparams {
 #define	WDC_VER_ATA5	0x0020
 #define	WDC_VER_ATA6	0x0040
 #define	WDC_VER_ATA7	0x0080
+#define	WDC_VER_ATA8	0x0100
     uint16_t	atap_ata_minor;		/* 81: Minor version number */
     uint16_t	atap_cmd_set1;		/* 82: command set supported */
 #define	WDC_CMD1_NOP	0x4000		/*	NOP */
@@ -451,7 +453,8 @@ struct ataparams {
     uint16_t	atap_apm_val;		/* 91: current APM value */
     uint16_t	__reserved5[8];		/* 92-99: reserved */
     uint16_t	atap_max_lba[4];	/* 100-103: Max. user LBA addr */
-    uint16_t	__reserved6[2];		/* 104-105: reserved */
+    uint16_t	__reserved6;		/* 104: reserved */
+    uint16_t	max_dsm_blocks;		/* 105: DSM (ATA-8/ACS-2) */
     uint16_t	atap_secsz;		/* 106: physical/logical sector size */
 #define ATA_SECSZ_VALID_MASK 0xc000
 #define ATA_SECSZ_VALID      0x4000
@@ -480,7 +483,10 @@ struct ataparams {
 #define ATA_CFA_MODE1_DIS 0x1000	/* CFA Mode 1 Disabled */
 #define ATA_CFA_MODE1_REQ 0x2000	/* CFA Mode 1 Required */
 #define ATA_CFA_WORD160   0x8000	/* Word 160 supported */
-    uint16_t	__reserved10[15];	/* 161-175: reserved for CFA */
+    uint16_t	__reserved10[8];	/* 161-168: reserved for CFA */
+    uint16_t	support_dsm;		/* 169: DSM (ATA-8/ACS-2) */
+#define ATA_SUPPORT_DSM_TRIM	0x0001
+    uint16_t	__reserved10a[6];	/* 170-175: reserved for CFA */
     uint8_t	atap_media_serial[60];	/* 176-205: media serial number */
     uint16_t	__reserved11[3];	/* 206-208: */
     uint16_t	atap_logical_align;	/* 209: logical/physical alignment */

Index: src/sys/dev/ata/wd.c
diff -u src/sys/dev/ata/wd.c:1.400 src/sys/dev/ata/wd.c:1.401
--- src/sys/dev/ata/wd.c:1.400	Tue Jul 31 15:50:34 2012
+++ src/sys/dev/ata/wd.c	Fri Oct 19 17:09:07 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: wd.c,v 1.400 2012/07/31 15:50:34 bouyer Exp $ */
+/*	$NetBSD: wd.c,v 1.401 2012/10/19 17:09:07 drochner Exp $ */
 
 /*
  * Copyright (c) 1998, 2001 Manuel Bouyer.  All rights reserved.
@@ -54,7 +54,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: wd.c,v 1.400 2012/07/31 15:50:34 bouyer Exp $");
+__KERNEL_RCSID(0, "$NetBSD: wd.c,v 1.401 2012/10/19 17:09:07 drochner Exp $");
 
 #include "opt_ata.h"
 
@@ -178,6 +178,7 @@ void  wdrestart(void *);
 void  wddone(void *);
 int   wd_get_params(struct wd_softc *, u_int8_t, struct ataparams *);
 int   wd_flushcache(struct wd_softc *, int);
+int   wd_trim(struct wd_softc *, int, struct disk_discard_range *);
 bool  wd_shutdown(device_t, int);
 
 int   wd_getcache(struct wd_softc *, int *);
@@ -1526,6 +1527,20 @@ wdioctl(dev_t dev, u_long xfer, void *ad
 		return 0;
 	    }
 
+	case DIOCGDISCARDPARAMS: {
+		struct disk_discard_params * tp;
+
+		if (!(wd->sc_params.atap_ata_major & WDC_VER_ATA8)
+		    || !(wd->sc_params.support_dsm & ATA_SUPPORT_DSM_TRIM))
+			return ENOTTY;
+		tp = (struct disk_discard_params *)addr;
+		tp->maxsize = 0xffff; /*wd->sc_params.max_dsm_blocks*/
+		printf("wd: maxtrimsize %ld\n", tp->maxsize);
+		return 0;
+	}
+	case DIOCDISCARD:
+		return wd_trim(wd, WDPART(dev), (struct disk_discard_range *)addr);
+
 	default:
 		return ENOTTY;
 	}
@@ -1934,6 +1949,57 @@ wd_flushcache(struct wd_softc *wd, int f
 	return 0;
 }
 
+int
+wd_trim(struct wd_softc *wd, int part, struct disk_discard_range *tr)
+{
+	struct ata_command ata_c;
+	unsigned char *req;
+	daddr_t bno = tr->bno;
+
+	if (part != RAW_PART)
+		bno += wd->sc_dk.dk_label->d_partitions[part].p_offset;;
+
+	req = kmem_zalloc(512, KM_SLEEP);
+	req[0] = bno & 0xff;
+	req[1] = (bno >> 8) & 0xff;
+	req[2] = (bno >> 16) & 0xff;
+	req[3] = (bno >> 24) & 0xff;
+	req[4] = (bno >> 32) & 0xff;
+	req[5] = (bno >> 40) & 0xff;
+	req[6] = tr->size & 0xff;
+	req[7] = (tr->size >> 8) & 0xff;
+
+	memset(&ata_c, 0, sizeof(struct ata_command));
+	ata_c.r_command = ATA_DATA_SET_MANAGEMENT;
+	ata_c.r_count = 1;
+	ata_c.r_features = ATA_SUPPORT_DSM_TRIM;
+	ata_c.r_st_bmask = WDCS_DRDY;
+	ata_c.r_st_pmask = WDCS_DRDY;
+	ata_c.timeout = 30000; /* 30s timeout */
+	ata_c.data = req;
+	ata_c.bcount = 512;
+	ata_c.flags |= AT_WRITE | AT_WAIT;
+	if (wd->atabus->ata_exec_command(wd->drvp, &ata_c) != ATACMD_COMPLETE) {
+		aprint_error_dev(wd->sc_dev,
+		    "trim command didn't complete\n");
+		kmem_free(req, 512);
+		return EIO;
+	}
+	kmem_free(req, 512);
+	if (ata_c.flags & AT_ERROR) {
+		if (ata_c.r_error == WDCE_ABRT) /* command not supported */
+			return ENODEV;
+	}
+	if (ata_c.flags & (AT_ERROR | AT_TIMEOU | AT_DF)) {
+		char sbuf[sizeof(at_errbits) + 64];
+		snprintb(sbuf, sizeof(sbuf), at_errbits, ata_c.flags);
+		aprint_error_dev(wd->sc_dev, "wd_trim: status=%s\n",
+		    sbuf);
+		return EIO;
+	}
+	return 0;
+}
+
 bool
 wd_shutdown(device_t dev, int how)
 {

Index: src/sys/sys/dkio.h
diff -u src/sys/sys/dkio.h:1.17 src/sys/sys/dkio.h:1.18
--- src/sys/sys/dkio.h:1.17	Tue Jan 18 19:52:24 2011
+++ src/sys/sys/dkio.h	Fri Oct 19 17:09:07 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: dkio.h,v 1.17 2011/01/18 19:52:24 matt Exp $	*/
+/*	$NetBSD: dkio.h,v 1.18 2012/10/19 17:09:07 drochner Exp $	*/
 
 /*
  * Copyright (c) 1987, 1988, 1993
@@ -109,4 +109,15 @@
 
 #define	DIOCTUR		_IOR('d', 128, int)	/* test unit ready */
 
+struct disk_discard_params {
+	long maxsize; /* in DEV_BSIZE units */
+};
+#define DIOCGDISCARDPARAMS _IOR('d', 129, struct disk_discard_params)
+
+struct disk_discard_range {
+	daddr_t bno;
+	long size;
+};
+#define DIOCDISCARD	_IOW('d', 130, struct disk_discard_range)
+
 #endif /* _SYS_DKIO_H_ */

Index: src/sys/sys/fstypes.h
diff -u src/sys/sys/fstypes.h:1.30 src/sys/sys/fstypes.h:1.31
--- src/sys/sys/fstypes.h:1.30	Fri Nov 18 21:17:45 2011
+++ src/sys/sys/fstypes.h	Fri Oct 19 17:09:07 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: fstypes.h,v 1.30 2011/11/18 21:17:45 christos Exp $	*/
+/*	$NetBSD: fstypes.h,v 1.31 2012/10/19 17:09:07 drochner Exp $	*/
 
 /*
  * Copyright (c) 1989, 1991, 1993
@@ -84,7 +84,6 @@ typedef struct fhandle	fhandle_t;
  */
 
 #define	__MNT_UNUSED1	0x00200000
-#define	__MNT_UNUSED2	0x00800000
 
 #define	MNT_RDONLY	0x00000001	/* read only filesystem */
 #define	MNT_SYNCHRONOUS	0x00000002	/* file system written synchronously */
@@ -96,6 +95,7 @@ typedef struct fhandle	fhandle_t;
 #define	MNT_NOCOREDUMP	0x00008000	/* don't write core dumps to this FS */
 #define	MNT_RELATIME	0x00020000	/* only update access time if mod/ch */
 #define	MNT_IGNORE	0x00100000	/* don't show entry in df */
+#define	MNT_DISCARD	0x00800000	/* use DISCARD/TRIM if supported */
 #define	MNT_EXTATTR	0x01000000	/* enable extended attributes */
 #define	MNT_LOG		0x02000000	/* Use logging */
 #define	MNT_NOATIME	0x04000000	/* Never update access times in fs */
@@ -105,6 +105,7 @@ typedef struct fhandle	fhandle_t;
 
 #define	__MNT_BASIC_FLAGS \
 	{ MNT_ASYNC,		0,	"asynchronous" }, \
+	{ MNT_DISCARD,		0,	"discard" }, \
 	{ MNT_EXTATTR,		0,	"extattr" }, \
 	{ MNT_IGNORE,		0,	"hidden" }, \
 	{ MNT_LOG,		0,	"log" }, \
@@ -121,9 +122,9 @@ typedef struct fhandle	fhandle_t;
 	{ MNT_SYNCHRONOUS,	0,	"synchronous" }, \
 	{ MNT_UNION,		0,	"union" }, \
 
-#define MNT_BASIC_FLAGS (MNT_ASYNC | MNT_EXTATTR | MNT_LOG | MNT_NOATIME | \
-    MNT_NOCOREDUMP | MNT_NODEV | MNT_NODEVMTIME | MNT_NOEXEC | MNT_NOSUID | \
-    MNT_RDONLY | MNT_RELATIME | MNT_SOFTDEP | MNT_SYMPERM | \
+#define MNT_BASIC_FLAGS (MNT_ASYNC | MNT_DISCARD | MNT_EXTATTR | MNT_LOG | \
+    MNT_NOATIME | MNT_NOCOREDUMP | MNT_NODEV | MNT_NODEVMTIME | MNT_NOEXEC | \
+    MNT_NOSUID | MNT_RDONLY | MNT_RELATIME | MNT_SOFTDEP | MNT_SYMPERM | \
     MNT_SYNCHRONOUS | MNT_UNION)
 /*
  * exported mount flags.
@@ -235,7 +236,7 @@ typedef struct fhandle	fhandle_t;
 	"\33MNT_NOATIME" \
 	"\32MNT_LOG" \
 	"\31MNT_EXTATTR" \
-	"\30MNT_UNUSED" \
+	"\30MNT_DISCARD" \
 	"\27MNT_GETARGS" \
 	"\26MNT_UNUSED" \
 	"\25MNT_IGNORE" \

Index: src/sys/ufs/ffs/ffs_alloc.c
diff -u src/sys/ufs/ffs/ffs_alloc.c:1.130 src/sys/ufs/ffs/ffs_alloc.c:1.131
--- src/sys/ufs/ffs/ffs_alloc.c:1.130	Mon Nov 28 08:05:07 2011
+++ src/sys/ufs/ffs/ffs_alloc.c	Fri Oct 19 17:09:08 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: ffs_alloc.c,v 1.130 2011/11/28 08:05:07 tls Exp $	*/
+/*	$NetBSD: ffs_alloc.c,v 1.131 2012/10/19 17:09:08 drochner Exp $	*/
 
 /*-
  * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
@@ -70,7 +70,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.130 2011/11/28 08:05:07 tls Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.131 2012/10/19 17:09:08 drochner Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_ffs.h"
@@ -1552,9 +1552,8 @@ ffs_blkalloc_ump(struct ufsmount *ump, d
  *
  * => um_lock not held on entry or exit
  */
-void
-ffs_blkfree(struct fs *fs, struct vnode *devvp, daddr_t bno, long size,
-    ino_t inum)
+static void
+ffs_blkfree_cg(struct fs *fs, struct vnode *devvp, daddr_t bno, long size)
 {
 	struct cg *cgp;
 	struct buf *bp;
@@ -1574,12 +1573,6 @@ ffs_blkfree(struct fs *fs, struct vnode 
 	ump = VFSTOUFS(devvp->v_specmountpoint);
 	KASSERT(fs == ump->um_fs);
 	cgblkno = fsbtodb(fs, cgtod(fs, cg));
-	if (ffs_snapblkfree(fs, devvp, bno, size, inum))
-		return;
-
-	error = ffs_check_bad_allocation(__func__, fs, bno, size, dev, inum);
-	if (error)
-		return;
 
 	error = bread(devvp, cgblkno, (int)fs->fs_cgsize,
 	    NOCRED, B_MODIFY, &bp);
@@ -1598,6 +1591,225 @@ ffs_blkfree(struct fs *fs, struct vnode 
 	bdwrite(bp);
 }
 
+struct discardopdata {
+	struct work wk; /* must be first */
+	struct vnode *devvp;
+	daddr_t bno;
+	long size;
+};
+
+struct discarddata {
+	struct fs *fs;
+	struct discardopdata *entry;
+	long maxsize;
+	kmutex_t entrylk;
+	struct workqueue *wq;
+	int wqcnt, wqdraining;
+	kmutex_t wqlk;
+	kcondvar_t wqcv;
+	/* timer for flush? */
+};
+
+static void
+ffs_blkfree_td(struct fs *fs, struct discardopdata *td)
+{
+	long todo;
+
+	while (td->size) {
+		todo = min(td->size,
+		  lfragtosize(fs, (fs->fs_frag - fragnum(fs, td->bno))));
+		ffs_blkfree_cg(fs, td->devvp, td->bno, todo);
+		td->bno += numfrags(fs, todo);
+		td->size -= todo;
+	}
+}
+
+static void
+ffs_discardcb(struct work *wk, void *arg)
+{
+	struct discardopdata *td = (void *)wk;
+	struct discarddata *ts = arg;
+	struct fs *fs = ts->fs;
+	struct disk_discard_range ta;
+	int error;
+
+	ta.bno = fsbtodb(fs, td->bno);
+	ta.size = td->size >> DEV_BSHIFT;
+	error = VOP_IOCTL(td->devvp, DIOCDISCARD, &ta, FWRITE, FSCRED);
+#ifdef TRIMDEBUG
+	printf("trim(%" PRId64 ",%ld):%d\n", td->bno, td->size, error);
+#endif
+
+	ffs_blkfree_td(fs, td);
+	kmem_free(td, sizeof(*td));
+	mutex_enter(&ts->wqlk);
+	ts->wqcnt--;
+	if (ts->wqdraining && !ts->wqcnt)
+		cv_signal(&ts->wqcv);
+	mutex_exit(&ts->wqlk);
+}
+
+void *
+ffs_discard_init(struct vnode *devvp, struct fs *fs)
+{
+	struct disk_discard_params tp;
+	struct discarddata *ts;
+	int error;
+
+	error = VOP_IOCTL(devvp, DIOCGDISCARDPARAMS, &tp, FREAD, FSCRED);
+	if (error) {
+		printf("DIOCGDISCARDPARAMS: %d\n", error);
+		return NULL;
+	}
+	if (tp.maxsize * DEV_BSIZE < fs->fs_bsize) {
+		printf("tp.maxsize=%ld, fs_bsize=%d\n", tp.maxsize, fs->fs_bsize);
+		return NULL;
+	}
+
+	ts = kmem_zalloc(sizeof (*ts), KM_SLEEP);
+	error = workqueue_create(&ts->wq, "trimwq", ffs_discardcb, ts,
+				 0, 0, 0);
+	if (error) {
+		kmem_free(ts, sizeof (*ts));
+		return NULL;
+	}
+	mutex_init(&ts->entrylk, MUTEX_DEFAULT, IPL_NONE);
+	mutex_init(&ts->wqlk, MUTEX_DEFAULT, IPL_NONE);
+	cv_init(&ts->wqcv, "trimwqcv");
+	ts->maxsize = max(tp.maxsize * DEV_BSIZE, 100*1024); /* XXX */
+	ts->fs = fs;
+	return ts;
+}
+
+void
+ffs_discard_finish(void *vts, int flags)
+{
+	struct discarddata *ts = vts;
+	struct discardopdata *td = NULL;
+	int res = 0;
+
+	/* wait for workqueue to drain */
+	mutex_enter(&ts->wqlk);
+	if (ts->wqcnt) {
+		ts->wqdraining = 1;
+		res = cv_timedwait(&ts->wqcv, &ts->wqlk, mstohz(5000));
+	}
+	mutex_exit(&ts->wqlk);
+	if (res)
+		printf("ffs_discarddata drain timeout\n");
+
+	mutex_enter(&ts->entrylk);
+	if (ts->entry) {
+		td = ts->entry;
+		ts->entry = NULL;
+	}
+	mutex_exit(&ts->entrylk);
+	if (td) {
+		/* XXX don't tell disk, its optional */
+		ffs_blkfree_td(ts->fs, td);
+#ifdef TRIMDEBUG
+		printf("finish(%" PRId64 ",%ld)\n", td->bno, td->size);
+#endif
+		kmem_free(td, sizeof(*td));
+	}
+
+	cv_destroy(&ts->wqcv);
+	mutex_destroy(&ts->entrylk);
+	mutex_destroy(&ts->wqlk);
+	workqueue_destroy(ts->wq);
+	kmem_free(ts, sizeof(*ts));
+}
+
+void
+ffs_blkfree(struct fs *fs, struct vnode *devvp, daddr_t bno, long size,
+    ino_t inum)
+{
+	struct ufsmount *ump;
+	int error;
+	dev_t dev;
+	struct discarddata *ts;
+	struct discardopdata *td;
+
+	dev = devvp->v_rdev;
+	ump = VFSTOUFS(devvp->v_specmountpoint);
+	if (ffs_snapblkfree(fs, devvp, bno, size, inum))
+		return;
+
+	error = ffs_check_bad_allocation(__func__, fs, bno, size, dev, inum);
+	if (error)
+		return;
+
+	if (!ump->um_discarddata) {
+		ffs_blkfree_cg(fs, devvp, bno, size);
+		return;
+	}
+
+#ifdef TRIMDEBUG
+	printf("blkfree(%" PRId64 ",%ld)\n", bno, size);
+#endif
+	ts = ump->um_discarddata;
+	td = NULL;
+
+	mutex_enter(&ts->entrylk);
+	if (ts->entry) {
+		td = ts->entry;
+		/* ffs deallocs backwards, check for prepend only */
+		if (td->bno == bno + numfrags(fs, size)
+		    && td->size + size <= ts->maxsize) {
+			td->bno = bno;
+			td->size += size;
+			if (td->size < ts->maxsize) {
+#ifdef TRIMDEBUG
+				printf("defer(%" PRId64 ",%ld)\n", td->bno, td->size);
+#endif
+				mutex_exit(&ts->entrylk);
+				return;
+			}
+			size = 0; /* mark done */
+		}
+		ts->entry = NULL;
+	}
+	mutex_exit(&ts->entrylk);
+
+	if (td) {
+#ifdef TRIMDEBUG
+		printf("enq old(%" PRId64 ",%ld)\n", td->bno, td->size);
+#endif
+		mutex_enter(&ts->wqlk);
+		ts->wqcnt++;
+		mutex_exit(&ts->wqlk);
+		workqueue_enqueue(ts->wq, &td->wk, NULL);
+	}
+	if (!size)
+		return;
+
+	td = kmem_alloc(sizeof(*td), KM_SLEEP);
+	td->devvp = devvp;
+	td->bno = bno;
+	td->size = size;
+
+	if (td->size < ts->maxsize) { /* XXX always the case */
+		mutex_enter(&ts->entrylk);
+		if (!ts->entry) { /* possible race? */
+#ifdef TRIMDEBUG
+			printf("defer(%" PRId64 ",%ld)\n", td->bno, td->size);
+#endif
+			ts->entry = td;
+			td = NULL;
+		}
+		mutex_exit(&ts->entrylk);
+	}
+	if (td) {
+#ifdef TRIMDEBUG
+		printf("enq new(%" PRId64 ",%ld)\n", td->bno, td->size);
+#endif
+		mutex_enter(&ts->wqlk);
+		ts->wqcnt++;
+		mutex_exit(&ts->wqlk);
+		workqueue_enqueue(ts->wq, &td->wk, NULL);
+	}
+}
+
 /*
  * Free a block or fragment from a snapshot cg copy.
  *

Index: src/sys/ufs/ffs/ffs_extern.h
diff -u src/sys/ufs/ffs/ffs_extern.h:1.78 src/sys/ufs/ffs/ffs_extern.h:1.79
--- src/sys/ufs/ffs/ffs_extern.h:1.78	Fri Jun 17 14:23:52 2011
+++ src/sys/ufs/ffs/ffs_extern.h	Fri Oct 19 17:09:08 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: ffs_extern.h,v 1.78 2011/06/17 14:23:52 manu Exp $	*/
+/*	$NetBSD: ffs_extern.h,v 1.79 2012/10/19 17:09:08 drochner Exp $	*/
 
 /*-
  * Copyright (c) 1991, 1993, 1994
@@ -98,6 +98,8 @@ daddr_t	ffs_blkpref_ufs2(struct inode *,
 int	ffs_blkalloc(struct inode *, daddr_t, long);
 int	ffs_blkalloc_ump(struct ufsmount *, daddr_t, long);
 void	ffs_blkfree(struct fs *, struct vnode *, daddr_t, long, ino_t);
+void	*ffs_discard_init(struct vnode *, struct fs *);
+void	ffs_discard_finish(void *, int);
 void	ffs_blkfree_snap(struct fs *, struct vnode *, daddr_t, long, ino_t);
 int	ffs_vfree(struct vnode *, ino_t, int);
 int	ffs_checkfreefile(struct fs *, struct vnode *, ino_t);

Index: src/sys/ufs/ffs/ffs_vfsops.c
diff -u src/sys/ufs/ffs/ffs_vfsops.c:1.278 src/sys/ufs/ffs/ffs_vfsops.c:1.279
--- src/sys/ufs/ffs/ffs_vfsops.c:1.278	Mon Sep 10 07:57:50 2012
+++ src/sys/ufs/ffs/ffs_vfsops.c	Fri Oct 19 17:09:08 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: ffs_vfsops.c,v 1.278 2012/09/10 07:57:50 manu Exp $	*/
+/*	$NetBSD: ffs_vfsops.c,v 1.279 2012/10/19 17:09:08 drochner Exp $	*/
 
 /*-
  * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
@@ -61,7 +61,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.278 2012/09/10 07:57:50 manu Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.279 2012/10/19 17:09:08 drochner Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_ffs.h"
@@ -1306,6 +1306,9 @@ ffs_mountfs(struct vnode *devvp, struct 
 		ufs_extattr_uepm_init(&ump->um_extattr);	
 #endif /* UFS_EXTATTR */
 
+	if (mp->mnt_flag & MNT_DISCARD)
+		ump->um_discarddata = ffs_discard_init(devvp, fs);
+
 	return (0);
 out:
 #ifdef WAPBL
@@ -1462,6 +1465,11 @@ ffs_unmount(struct mount *mp, int mntfla
 	extern int doforce;
 #endif
 
+	if (ump->um_discarddata) {
+		ffs_discard_finish(ump->um_discarddata, mntflags);
+		ump->um_discarddata = NULL;
+	}
+
 	flags = 0;
 	if (mntflags & MNT_FORCE)
 		flags |= FORCECLOSE;

Index: src/sys/ufs/ufs/ufsmount.h
diff -u src/sys/ufs/ufs/ufsmount.h:1.38 src/sys/ufs/ufs/ufsmount.h:1.39
--- src/sys/ufs/ufs/ufsmount.h:1.38	Wed May  9 00:21:18 2012
+++ src/sys/ufs/ufs/ufsmount.h	Fri Oct 19 17:09:08 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: ufsmount.h,v 1.38 2012/05/09 00:21:18 riastradh Exp $	*/
+/*	$NetBSD: ufsmount.h,v 1.39 2012/10/19 17:09:08 drochner Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
@@ -124,6 +124,8 @@ struct ufsmount {
 	void	*um_snapinfo;			/* snapshot private data */
 
 	const struct ufs_ops *um_ops;
+
+	void *um_discarddata;
 };
 
 struct ufs_ops {

Reply via email to