Module Name:    src
Committed By:   jdolecek
Date:           Wed Apr  5 20:15:50 UTC 2017

Modified Files:
        src/sys/dev/ic: ld_nvme.c nvme.c nvmevar.h
        src/sys/dev/scsipi: scsipi_disk.h sd.c
        src/sys/kern: vfs_bio.c
        src/sys/sys: buf.h dkio.h

Log Message:
expose disk device FUA/DPO support via DIOCGCACHE, and allow the flags
to be set for I/O; implement support in sd(4) and nvme(4)

discussed on tech-kern


To generate a diff of this commit:
cvs rdiff -u -r1.14 -r1.15 src/sys/dev/ic/ld_nvme.c
cvs rdiff -u -r1.25 -r1.26 src/sys/dev/ic/nvme.c
cvs rdiff -u -r1.12 -r1.13 src/sys/dev/ic/nvmevar.h
cvs rdiff -u -r1.21 -r1.22 src/sys/dev/scsipi/scsipi_disk.h
cvs rdiff -u -r1.322 -r1.323 src/sys/dev/scsipi/sd.c
cvs rdiff -u -r1.271 -r1.272 src/sys/kern/vfs_bio.c
cvs rdiff -u -r1.126 -r1.127 src/sys/sys/buf.h
cvs rdiff -u -r1.23 -r1.24 src/sys/sys/dkio.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/dev/ic/ld_nvme.c
diff -u src/sys/dev/ic/ld_nvme.c:1.14 src/sys/dev/ic/ld_nvme.c:1.15
--- src/sys/dev/ic/ld_nvme.c:1.14	Tue Feb 28 20:55:09 2017
+++ src/sys/dev/ic/ld_nvme.c	Wed Apr  5 20:15:49 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: ld_nvme.c,v 1.14 2017/02/28 20:55:09 jdolecek Exp $	*/
+/*	$NetBSD: ld_nvme.c,v 1.15 2017/04/05 20:15:49 jdolecek Exp $	*/
 
 /*-
  * Copyright (C) 2016 NONAKA Kimihiro <non...@netbsd.org>
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ld_nvme.c,v 1.14 2017/02/28 20:55:09 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ld_nvme.c,v 1.15 2017/04/05 20:15:49 jdolecek Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -152,11 +152,15 @@ static int
 ld_nvme_start(struct ld_softc *ld, struct buf *bp)
 {
 	struct ld_nvme_softc *sc = device_private(ld->sc_dv);
+	int flags = BUF_ISWRITE(bp) ? 0 : NVME_NS_CTX_F_READ;
+
+	if (bp->b_flags & B_MEDIA_FUA)
+		flags |= NVME_NS_CTX_F_FUA;
 
 	return nvme_ns_dobio(sc->sc_nvme, sc->sc_nsid, sc,
 	    bp, bp->b_data, bp->b_bcount,
 	    sc->sc_ld.sc_secsize, bp->b_rawblkno,
-	    BUF_ISWRITE(bp) ? 0 : NVME_NS_CTX_F_READ,
+	    flags,
 	    ld_nvme_biodone);
 }
 
@@ -221,7 +225,11 @@ ld_nvme_getcache(struct ld_softc *ld, in
 	int error;
 	struct ld_nvme_softc *sc = device_private(ld->sc_dv);
 
-	*addr = 0;
+	/*
+	 * DPO not supported, Dataset Management (DSM) field doesn't specify
+	 * the same semantics.
+	 */ 
+	*addr = DKCACHE_FUA;
 
 	if (!nvme_has_volatile_write_cache(sc->sc_nvme)) {
 		/* cache simply not present */

Index: src/sys/dev/ic/nvme.c
diff -u src/sys/dev/ic/nvme.c:1.25 src/sys/dev/ic/nvme.c:1.26
--- src/sys/dev/ic/nvme.c:1.25	Tue Feb 28 20:53:50 2017
+++ src/sys/dev/ic/nvme.c	Wed Apr  5 20:15:49 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: nvme.c,v 1.25 2017/02/28 20:53:50 jdolecek Exp $	*/
+/*	$NetBSD: nvme.c,v 1.26 2017/04/05 20:15:49 jdolecek Exp $	*/
 /*	$OpenBSD: nvme.c,v 1.49 2016/04/18 05:59:50 dlg Exp $ */
 
 /*
@@ -18,7 +18,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.25 2017/02/28 20:53:50 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.26 2017/04/05 20:15:49 jdolecek Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -727,6 +727,9 @@ nvme_ns_io_fill(struct nvme_queue *q, st
 
 	htolem64(&sqe->slba, ccb->nnc_blkno);
 
+	if (ISSET(ccb->nnc_flags, NVME_NS_CTX_F_FUA))
+		htolem16(&sqe->ioflags, NVM_SQE_IO_FUA);
+
 	/* guaranteed by upper layers, but check just in case */
 	KASSERT((ccb->nnc_datasize % ccb->nnc_secsize) == 0);
 	htolem16(&sqe->nlb, (ccb->nnc_datasize / ccb->nnc_secsize) - 1);

Index: src/sys/dev/ic/nvmevar.h
diff -u src/sys/dev/ic/nvmevar.h:1.12 src/sys/dev/ic/nvmevar.h:1.13
--- src/sys/dev/ic/nvmevar.h:1.12	Tue Feb 28 20:53:50 2017
+++ src/sys/dev/ic/nvmevar.h	Wed Apr  5 20:15:49 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: nvmevar.h,v 1.12 2017/02/28 20:53:50 jdolecek Exp $	*/
+/*	$NetBSD: nvmevar.h,v 1.13 2017/04/05 20:15:49 jdolecek Exp $	*/
 /*	$OpenBSD: nvmevar.h,v 1.8 2016/04/14 11:18:32 dlg Exp $ */
 
 /*
@@ -64,6 +64,7 @@ struct nvme_ccb {
 	uint16_t	nnc_flags;
 #define	NVME_NS_CTX_F_READ	__BIT(0)
 #define	NVME_NS_CTX_F_POLL	__BIT(1)
+#define	NVME_NS_CTX_F_FUA	__BIT(2)
 
 	struct buf	*nnc_buf;
 	daddr_t		nnc_blkno;

Index: src/sys/dev/scsipi/scsipi_disk.h
diff -u src/sys/dev/scsipi/scsipi_disk.h:1.21 src/sys/dev/scsipi/scsipi_disk.h:1.22
--- src/sys/dev/scsipi/scsipi_disk.h:1.21	Tue Dec 25 18:33:42 2007
+++ src/sys/dev/scsipi/scsipi_disk.h	Wed Apr  5 20:15:49 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: scsipi_disk.h,v 1.21 2007/12/25 18:33:42 perry Exp $	*/
+/*	$NetBSD: scsipi_disk.h,v 1.22 2017/04/05 20:15:49 jdolecek Exp $	*/
 
 /*
  * SCSI and SCSI-like interfaces description
@@ -62,9 +62,10 @@ struct scsipi_rw_10 {
 	u_int8_t opcode;
 	u_int8_t byte2;
 #define	SRWB_RELADDR	0x01	/* obsolete */
-#define	SRWB_FUA_NV	0x02	/* force unit access non-volatile cache */
-#define	SRWB_FUA	0x08	/* force unit access */
-#define	SRWB_DPO	0x10	/* disable page out */
+#define	SRWB_FUA_NV	0x02	/* force unit access non-volatile cache (SCSI-3) */
+#define	SRWB_RESV2	0x04	/* reserved (SCSI-2) */
+#define	SRWB_FUA	0x08	/* force unit access volatile cache (SCSI-2) */
+#define	SRWB_DPO	0x10	/* disable page out (SCSI-2) */
 #define	SRWB_PROTECT(x) ((x) << 5)
 	u_int8_t addr[4];
 	u_int8_t reserved;
@@ -159,4 +160,7 @@ struct scsipi_capacity_descriptor {
 #define	SCSIPI_CAP_DESC_CODE_FORMATTED		0x2
 #define	SCSIPI_CAP_DESC_CODE_NONE		0x3
 
+/* defines for the device specific byte in the mode select/sense header */
+#define	SMH_DSP_DPOFUA		0x10
+
 #endif /* _DEV_SCSIPI_SCSIPI_DISK_H_ */

Index: src/sys/dev/scsipi/sd.c
diff -u src/sys/dev/scsipi/sd.c:1.322 src/sys/dev/scsipi/sd.c:1.323
--- src/sys/dev/scsipi/sd.c:1.322	Wed Dec 21 21:28:30 2016
+++ src/sys/dev/scsipi/sd.c	Wed Apr  5 20:15:49 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: sd.c,v 1.322 2016/12/21 21:28:30 mlelstv Exp $	*/
+/*	$NetBSD: sd.c,v 1.323 2017/04/05 20:15:49 jdolecek Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2003, 2004 The NetBSD Foundation, Inc.
@@ -47,7 +47,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sd.c,v 1.322 2016/12/21 21:28:30 mlelstv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: sd.c,v 1.323 2017/04/05 20:15:49 jdolecek Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_scsi.h"
@@ -654,6 +654,7 @@ sd_diskstart(device_t dev, struct buf *b
 	struct scsipi_generic *cmdp;
 	struct scsipi_xfer *xs;
 	int error, flags, nblks, cmdlen;
+	int cdb_flags;
 
 	mutex_enter(chan_mtx(chan));
 
@@ -698,12 +699,27 @@ sd_diskstart(device_t dev, struct buf *b
 		nblks = howmany(bp->b_bcount, sd->params.blksize);
 
 	/*
+	 * Pass FUA and/or DPO if requested. Must be done before CDB
+	 * selection, as 6-byte CDB doesn't support the flags.
+	 */
+	cdb_flags = 0;
+
+	if (bp->b_flags & B_MEDIA_FUA)
+		cdb_flags |= SRWB_FUA;
+
+	if (bp->b_flags & B_MEDIA_DPO)
+		cdb_flags |= SRWB_DPO;
+
+	/*
 	 * Fill out the scsi command.  Use the smallest CDB possible
-	 * (6-byte, 10-byte, or 16-byte).
+	 * (6-byte, 10-byte, or 16-byte). If we need FUA or DPO,
+	 * need to use 10-byte or bigger, as the 6-byte doesn't support
+	 * the flags.
 	 */
 	if (((bp->b_rawblkno & 0x1fffff) == bp->b_rawblkno) &&
 	    ((nblks & 0xff) == nblks) &&
-	    !(periph->periph_quirks & PQUIRK_ONLYBIG)) {
+	    !(periph->periph_quirks & PQUIRK_ONLYBIG) &&
+	    !cdb_flags) {
 		/* 6-byte CDB */
 		memset(&cmd_small, 0, sizeof(cmd_small));
 		cmd_small.opcode = (bp->b_flags & B_READ) ?
@@ -732,6 +748,9 @@ sd_diskstart(device_t dev, struct buf *b
 		cmdp = (struct scsipi_generic *)&cmd16;
 	}
 
+	if (cdb_flags)
+		cmdp->bytes[0] = cdb_flags;
+
 	/*
 	 * Figure out what flags to use.
 	 */
@@ -1796,7 +1815,9 @@ sd_getcache(struct sd_softc *sd, int *bi
 	int error, bits = 0;
 	int big;
 	union scsi_disk_pages *pages;
+	uint8_t dev_spec;
 
+	/* only SCSI-2 and later supported */
 	if (periph->periph_version < 2)
 		return (EOPNOTSUPP);
 
@@ -1806,10 +1827,13 @@ sd_getcache(struct sd_softc *sd, int *bi
 	if (error)
 		return (error);
 
-	if (big)
+	if (big) {
 		pages = (void *)(&scsipi_sense.header.big + 1);
-	else
+		dev_spec = scsipi_sense.header.big.dev_spec;
+	} else {
 		pages = (void *)(&scsipi_sense.header.small + 1);
+		dev_spec = scsipi_sense.header.small.dev_spec;
+	}
 
 	if ((pages->caching_params.flags & CACHING_RCD) == 0)
 		bits |= DKCACHE_READ;
@@ -1818,6 +1842,13 @@ sd_getcache(struct sd_softc *sd, int *bi
 	if (pages->caching_params.pg_code & PGCODE_PS)
 		bits |= DKCACHE_SAVE;
 
+	/*
+	 * Support for FUA/DPO, defined starting with SCSI-2. Use only
+	 * if device claims to support it, according to the MODE SENSE.
+	 */
+	if (ISSET(dev_spec, SMH_DSP_DPOFUA))
+		bits |= DKCACHE_FUA | DKCACHE_DPO;
+
 	memset(&scsipi_sense, 0, sizeof(scsipi_sense));
 	error = sd_mode_sense(sd, SMS_DBD, &scsipi_sense,
 	    sizeof(scsipi_sense.pages.caching_params),

Index: src/sys/kern/vfs_bio.c
diff -u src/sys/kern/vfs_bio.c:1.271 src/sys/kern/vfs_bio.c:1.272
--- src/sys/kern/vfs_bio.c:1.271	Tue Mar 21 10:46:49 2017
+++ src/sys/kern/vfs_bio.c	Wed Apr  5 20:15:49 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_bio.c,v 1.271 2017/03/21 10:46:49 skrll Exp $	*/
+/*	$NetBSD: vfs_bio.c,v 1.272 2017/04/05 20:15:49 jdolecek Exp $	*/
 
 /*-
  * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc.
@@ -123,7 +123,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_bio.c,v 1.271 2017/03/21 10:46:49 skrll Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_bio.c,v 1.272 2017/04/05 20:15:49 jdolecek Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_bufcache.h"
@@ -2027,7 +2027,7 @@ nestiobuf_iodone(buf_t *bp)
 void
 nestiobuf_setup(buf_t *mbp, buf_t *bp, int offset, size_t size)
 {
-	const int b_read = mbp->b_flags & B_READ;
+	const int b_pass = mbp->b_flags & (B_READ|B_MEDIA_FLAGS);
 	struct vnode *vp = mbp->b_vp;
 
 	KASSERT(mbp->b_bcount >= offset + size);
@@ -2035,14 +2035,14 @@ nestiobuf_setup(buf_t *mbp, buf_t *bp, i
 	bp->b_dev = mbp->b_dev;
 	bp->b_objlock = mbp->b_objlock;
 	bp->b_cflags = BC_BUSY;
-	bp->b_flags = B_ASYNC | b_read;
+	bp->b_flags = B_ASYNC | b_pass;
 	bp->b_iodone = nestiobuf_iodone;
 	bp->b_data = (char *)mbp->b_data + offset;
 	bp->b_resid = bp->b_bcount = size;
 	bp->b_bufsize = bp->b_bcount;
 	bp->b_private = mbp;
 	BIO_COPYPRIO(bp, mbp);
-	if (!b_read && vp != NULL) {
+	if (BUF_ISWRITE(bp) && vp != NULL) {
 		mutex_enter(vp->v_interlock);
 		vp->v_numoutput++;
 		mutex_exit(vp->v_interlock);

Index: src/sys/sys/buf.h
diff -u src/sys/sys/buf.h:1.126 src/sys/sys/buf.h:1.127
--- src/sys/sys/buf.h:1.126	Mon Dec 26 23:12:33 2016
+++ src/sys/sys/buf.h	Wed Apr  5 20:15:49 2017
@@ -1,4 +1,4 @@
-/*     $NetBSD: buf.h,v 1.126 2016/12/26 23:12:33 pgoyette Exp $ */
+/*     $NetBSD: buf.h,v 1.127 2017/04/05 20:15:49 jdolecek Exp $ */
 
 /*-
  * Copyright (c) 1999, 2000, 2007, 2008 The NetBSD Foundation, Inc.
@@ -198,16 +198,21 @@ struct buf {
 #define	B_RAW		0x00080000	/* Set by physio for raw transfers. */
 #define	B_READ		0x00100000	/* Read buffer. */
 #define	B_DEVPRIVATE	0x02000000	/* Device driver private flag. */
+#define	B_MEDIA_FUA	0x08000000	/* Set Force Unit Access for media. */
+#define	B_MEDIA_DPO	0x10000000	/* Set Disable Page Out for media. */
 
 #define BUF_FLAGBITS \
     "\20\1AGE\3ASYNC\4BAD\5BUSY\10DELWRI" \
     "\12DONE\13COWDONE\15GATHERED\16INVAL\17LOCKED\20NOCACHE" \
-    "\23PHYS\24RAW\25READ\32DEVPRIVATE\33VFLUSH"
+    "\23PHYS\24RAW\25READ\32DEVPRIVATE\33VFLUSH\34MEDIA_FUA\35MEDIA_DPO"
 
 /* Avoid weird code due to B_WRITE being a "pseudo flag" */
 #define BUF_ISREAD(bp)	(((bp)->b_flags & B_READ) == B_READ)
 #define BUF_ISWRITE(bp)	(((bp)->b_flags & B_READ) == B_WRITE)
 
+/* Media flags, to be passed for nested I/O */
+#define B_MEDIA_FLAGS	(B_MEDIA_FUA|B_MEDIA_DPO)
+
 /*
  * This structure describes a clustered I/O.  It is stored in the b_saveaddr
  * field of the buffer on which I/O is done.  At I/O completion, cluster

Index: src/sys/sys/dkio.h
diff -u src/sys/sys/dkio.h:1.23 src/sys/sys/dkio.h:1.24
--- src/sys/sys/dkio.h:1.23	Wed Apr  5 18:34:56 2017
+++ src/sys/sys/dkio.h	Wed Apr  5 20:15:49 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: dkio.h,v 1.23 2017/04/05 18:34:56 jdolecek Exp $	*/
+/*	$NetBSD: dkio.h,v 1.24 2017/04/05 20:15:49 jdolecek Exp $	*/
 
 /*
  * Copyright (c) 1987, 1988, 1993
@@ -85,6 +85,8 @@
 #define	DKCACHE_RCHANGE	0x000100 /* read enable is changeable */
 #define	DKCACHE_WCHANGE	0x000200 /* write enable is changeable */
 #define	DKCACHE_SAVE	0x010000 /* cache parameters are savable/save them */
+#define	DKCACHE_FUA	0x020000 /* Force Unit Access supported */
+#define	DKCACHE_DPO	0x040000 /* Disable Page Out supported */
 
 /*
  * Combine disk cache flags of two drives to get common cache capabilities.

Reply via email to