Module Name: src
Committed By: jdolecek
Date: Wed Apr 5 20:15:50 UTC 2017
Modified Files:
src/sys/dev/ic: ld_nvme.c nvme.c nvmevar.h
src/sys/dev/scsipi: scsipi_disk.h sd.c
src/sys/kern: vfs_bio.c
src/sys/sys: buf.h dkio.h
Log Message:
expose disk device FUA/DPO support via DIOCGCACHE, and allow the flags
to be set for I/O; implement support in sd(4) and nvme(4)
discussed on tech-kern
To generate a diff of this commit:
cvs rdiff -u -r1.14 -r1.15 src/sys/dev/ic/ld_nvme.c
cvs rdiff -u -r1.25 -r1.26 src/sys/dev/ic/nvme.c
cvs rdiff -u -r1.12 -r1.13 src/sys/dev/ic/nvmevar.h
cvs rdiff -u -r1.21 -r1.22 src/sys/dev/scsipi/scsipi_disk.h
cvs rdiff -u -r1.322 -r1.323 src/sys/dev/scsipi/sd.c
cvs rdiff -u -r1.271 -r1.272 src/sys/kern/vfs_bio.c
cvs rdiff -u -r1.126 -r1.127 src/sys/sys/buf.h
cvs rdiff -u -r1.23 -r1.24 src/sys/sys/dkio.h
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/dev/ic/ld_nvme.c
diff -u src/sys/dev/ic/ld_nvme.c:1.14 src/sys/dev/ic/ld_nvme.c:1.15
--- src/sys/dev/ic/ld_nvme.c:1.14 Tue Feb 28 20:55:09 2017
+++ src/sys/dev/ic/ld_nvme.c Wed Apr 5 20:15:49 2017
@@ -1,4 +1,4 @@
-/* $NetBSD: ld_nvme.c,v 1.14 2017/02/28 20:55:09 jdolecek Exp $ */
+/* $NetBSD: ld_nvme.c,v 1.15 2017/04/05 20:15:49 jdolecek Exp $ */
/*-
* Copyright (C) 2016 NONAKA Kimihiro <[email protected]>
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ld_nvme.c,v 1.14 2017/02/28 20:55:09 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ld_nvme.c,v 1.15 2017/04/05 20:15:49 jdolecek Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -152,11 +152,15 @@ static int
ld_nvme_start(struct ld_softc *ld, struct buf *bp)
{
struct ld_nvme_softc *sc = device_private(ld->sc_dv);
+ int flags = BUF_ISWRITE(bp) ? 0 : NVME_NS_CTX_F_READ;
+
+ if (bp->b_flags & B_MEDIA_FUA)
+ flags |= NVME_NS_CTX_F_FUA;
return nvme_ns_dobio(sc->sc_nvme, sc->sc_nsid, sc,
bp, bp->b_data, bp->b_bcount,
sc->sc_ld.sc_secsize, bp->b_rawblkno,
- BUF_ISWRITE(bp) ? 0 : NVME_NS_CTX_F_READ,
+ flags,
ld_nvme_biodone);
}
@@ -221,7 +225,11 @@ ld_nvme_getcache(struct ld_softc *ld, in
int error;
struct ld_nvme_softc *sc = device_private(ld->sc_dv);
- *addr = 0;
+ /*
+ * DPO not supported, Dataset Management (DSM) field doesn't specify
+ * the same semantics.
+ */
+ *addr = DKCACHE_FUA;
if (!nvme_has_volatile_write_cache(sc->sc_nvme)) {
/* cache simply not present */
Index: src/sys/dev/ic/nvme.c
diff -u src/sys/dev/ic/nvme.c:1.25 src/sys/dev/ic/nvme.c:1.26
--- src/sys/dev/ic/nvme.c:1.25 Tue Feb 28 20:53:50 2017
+++ src/sys/dev/ic/nvme.c Wed Apr 5 20:15:49 2017
@@ -1,4 +1,4 @@
-/* $NetBSD: nvme.c,v 1.25 2017/02/28 20:53:50 jdolecek Exp $ */
+/* $NetBSD: nvme.c,v 1.26 2017/04/05 20:15:49 jdolecek Exp $ */
/* $OpenBSD: nvme.c,v 1.49 2016/04/18 05:59:50 dlg Exp $ */
/*
@@ -18,7 +18,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.25 2017/02/28 20:53:50 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.26 2017/04/05 20:15:49 jdolecek Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -727,6 +727,9 @@ nvme_ns_io_fill(struct nvme_queue *q, st
htolem64(&sqe->slba, ccb->nnc_blkno);
+ if (ISSET(ccb->nnc_flags, NVME_NS_CTX_F_FUA))
+ htolem16(&sqe->ioflags, NVM_SQE_IO_FUA);
+
/* guaranteed by upper layers, but check just in case */
KASSERT((ccb->nnc_datasize % ccb->nnc_secsize) == 0);
htolem16(&sqe->nlb, (ccb->nnc_datasize / ccb->nnc_secsize) - 1);
Index: src/sys/dev/ic/nvmevar.h
diff -u src/sys/dev/ic/nvmevar.h:1.12 src/sys/dev/ic/nvmevar.h:1.13
--- src/sys/dev/ic/nvmevar.h:1.12 Tue Feb 28 20:53:50 2017
+++ src/sys/dev/ic/nvmevar.h Wed Apr 5 20:15:49 2017
@@ -1,4 +1,4 @@
-/* $NetBSD: nvmevar.h,v 1.12 2017/02/28 20:53:50 jdolecek Exp $ */
+/* $NetBSD: nvmevar.h,v 1.13 2017/04/05 20:15:49 jdolecek Exp $ */
/* $OpenBSD: nvmevar.h,v 1.8 2016/04/14 11:18:32 dlg Exp $ */
/*
@@ -64,6 +64,7 @@ struct nvme_ccb {
uint16_t nnc_flags;
#define NVME_NS_CTX_F_READ __BIT(0)
#define NVME_NS_CTX_F_POLL __BIT(1)
+#define NVME_NS_CTX_F_FUA __BIT(2)
struct buf *nnc_buf;
daddr_t nnc_blkno;
Index: src/sys/dev/scsipi/scsipi_disk.h
diff -u src/sys/dev/scsipi/scsipi_disk.h:1.21 src/sys/dev/scsipi/scsipi_disk.h:1.22
--- src/sys/dev/scsipi/scsipi_disk.h:1.21 Tue Dec 25 18:33:42 2007
+++ src/sys/dev/scsipi/scsipi_disk.h Wed Apr 5 20:15:49 2017
@@ -1,4 +1,4 @@
-/* $NetBSD: scsipi_disk.h,v 1.21 2007/12/25 18:33:42 perry Exp $ */
+/* $NetBSD: scsipi_disk.h,v 1.22 2017/04/05 20:15:49 jdolecek Exp $ */
/*
* SCSI and SCSI-like interfaces description
@@ -62,9 +62,10 @@ struct scsipi_rw_10 {
u_int8_t opcode;
u_int8_t byte2;
#define SRWB_RELADDR 0x01 /* obsolete */
-#define SRWB_FUA_NV 0x02 /* force unit access non-volatile cache */
-#define SRWB_FUA 0x08 /* force unit access */
-#define SRWB_DPO 0x10 /* disable page out */
+#define SRWB_FUA_NV 0x02 /* force unit access non-volatile cache (SCSI-3) */
+#define SRWB_RESV2 0x04 /* reserved (SCSI-2) */
+#define SRWB_FUA 0x08 /* force unit access volatile cache (SCSI-2) */
+#define SRWB_DPO 0x10 /* disable page out (SCSI-2) */
#define SRWB_PROTECT(x) ((x) << 5)
u_int8_t addr[4];
u_int8_t reserved;
@@ -159,4 +160,7 @@ struct scsipi_capacity_descriptor {
#define SCSIPI_CAP_DESC_CODE_FORMATTED 0x2
#define SCSIPI_CAP_DESC_CODE_NONE 0x3
+/* defines for the device specific byte in the mode select/sense header */
+#define SMH_DSP_DPOFUA 0x10
+
#endif /* _DEV_SCSIPI_SCSIPI_DISK_H_ */
Index: src/sys/dev/scsipi/sd.c
diff -u src/sys/dev/scsipi/sd.c:1.322 src/sys/dev/scsipi/sd.c:1.323
--- src/sys/dev/scsipi/sd.c:1.322 Wed Dec 21 21:28:30 2016
+++ src/sys/dev/scsipi/sd.c Wed Apr 5 20:15:49 2017
@@ -1,4 +1,4 @@
-/* $NetBSD: sd.c,v 1.322 2016/12/21 21:28:30 mlelstv Exp $ */
+/* $NetBSD: sd.c,v 1.323 2017/04/05 20:15:49 jdolecek Exp $ */
/*-
* Copyright (c) 1998, 2003, 2004 The NetBSD Foundation, Inc.
@@ -47,7 +47,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sd.c,v 1.322 2016/12/21 21:28:30 mlelstv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: sd.c,v 1.323 2017/04/05 20:15:49 jdolecek Exp $");
#ifdef _KERNEL_OPT
#include "opt_scsi.h"
@@ -654,6 +654,7 @@ sd_diskstart(device_t dev, struct buf *b
struct scsipi_generic *cmdp;
struct scsipi_xfer *xs;
int error, flags, nblks, cmdlen;
+ int cdb_flags;
mutex_enter(chan_mtx(chan));
@@ -698,12 +699,27 @@ sd_diskstart(device_t dev, struct buf *b
nblks = howmany(bp->b_bcount, sd->params.blksize);
/*
+ * Pass FUA and/or DPO if requested. Must be done before CDB
+ * selection, as 6-byte CDB doesn't support the flags.
+ */
+ cdb_flags = 0;
+
+ if (bp->b_flags & B_MEDIA_FUA)
+ cdb_flags |= SRWB_FUA;
+
+ if (bp->b_flags & B_MEDIA_DPO)
+ cdb_flags |= SRWB_DPO;
+
+ /*
* Fill out the scsi command. Use the smallest CDB possible
- * (6-byte, 10-byte, or 16-byte).
+ * (6-byte, 10-byte, or 16-byte). If we need FUA or DPO,
+ * need to use 10-byte or bigger, as the 6-byte doesn't support
+ * the flags.
*/
if (((bp->b_rawblkno & 0x1fffff) == bp->b_rawblkno) &&
((nblks & 0xff) == nblks) &&
- !(periph->periph_quirks & PQUIRK_ONLYBIG)) {
+ !(periph->periph_quirks & PQUIRK_ONLYBIG) &&
+ !cdb_flags) {
/* 6-byte CDB */
memset(&cmd_small, 0, sizeof(cmd_small));
cmd_small.opcode = (bp->b_flags & B_READ) ?
@@ -732,6 +748,9 @@ sd_diskstart(device_t dev, struct buf *b
cmdp = (struct scsipi_generic *)&cmd16;
}
+ if (cdb_flags)
+ cmdp->bytes[0] = cdb_flags;
+
/*
* Figure out what flags to use.
*/
@@ -1796,7 +1815,9 @@ sd_getcache(struct sd_softc *sd, int *bi
int error, bits = 0;
int big;
union scsi_disk_pages *pages;
+ uint8_t dev_spec;
+ /* only SCSI-2 and later supported */
if (periph->periph_version < 2)
return (EOPNOTSUPP);
@@ -1806,10 +1827,13 @@ sd_getcache(struct sd_softc *sd, int *bi
if (error)
return (error);
- if (big)
+ if (big) {
pages = (void *)(&scsipi_sense.header.big + 1);
- else
+ dev_spec = scsipi_sense.header.big.dev_spec;
+ } else {
pages = (void *)(&scsipi_sense.header.small + 1);
+ dev_spec = scsipi_sense.header.small.dev_spec;
+ }
if ((pages->caching_params.flags & CACHING_RCD) == 0)
bits |= DKCACHE_READ;
@@ -1818,6 +1842,13 @@ sd_getcache(struct sd_softc *sd, int *bi
if (pages->caching_params.pg_code & PGCODE_PS)
bits |= DKCACHE_SAVE;
+ /*
+ * Support for FUA/DPO, defined starting with SCSI-2. Use only
+ * if device claims to support it, according to the MODE SENSE.
+ */
+ if (ISSET(dev_spec, SMH_DSP_DPOFUA))
+ bits |= DKCACHE_FUA | DKCACHE_DPO;
+
memset(&scsipi_sense, 0, sizeof(scsipi_sense));
error = sd_mode_sense(sd, SMS_DBD, &scsipi_sense,
sizeof(scsipi_sense.pages.caching_params),
Index: src/sys/kern/vfs_bio.c
diff -u src/sys/kern/vfs_bio.c:1.271 src/sys/kern/vfs_bio.c:1.272
--- src/sys/kern/vfs_bio.c:1.271 Tue Mar 21 10:46:49 2017
+++ src/sys/kern/vfs_bio.c Wed Apr 5 20:15:49 2017
@@ -1,4 +1,4 @@
-/* $NetBSD: vfs_bio.c,v 1.271 2017/03/21 10:46:49 skrll Exp $ */
+/* $NetBSD: vfs_bio.c,v 1.272 2017/04/05 20:15:49 jdolecek Exp $ */
/*-
* Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc.
@@ -123,7 +123,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_bio.c,v 1.271 2017/03/21 10:46:49 skrll Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_bio.c,v 1.272 2017/04/05 20:15:49 jdolecek Exp $");
#ifdef _KERNEL_OPT
#include "opt_bufcache.h"
@@ -2027,7 +2027,7 @@ nestiobuf_iodone(buf_t *bp)
void
nestiobuf_setup(buf_t *mbp, buf_t *bp, int offset, size_t size)
{
- const int b_read = mbp->b_flags & B_READ;
+ const int b_pass = mbp->b_flags & (B_READ|B_MEDIA_FLAGS);
struct vnode *vp = mbp->b_vp;
KASSERT(mbp->b_bcount >= offset + size);
@@ -2035,14 +2035,14 @@ nestiobuf_setup(buf_t *mbp, buf_t *bp, i
bp->b_dev = mbp->b_dev;
bp->b_objlock = mbp->b_objlock;
bp->b_cflags = BC_BUSY;
- bp->b_flags = B_ASYNC | b_read;
+ bp->b_flags = B_ASYNC | b_pass;
bp->b_iodone = nestiobuf_iodone;
bp->b_data = (char *)mbp->b_data + offset;
bp->b_resid = bp->b_bcount = size;
bp->b_bufsize = bp->b_bcount;
bp->b_private = mbp;
BIO_COPYPRIO(bp, mbp);
- if (!b_read && vp != NULL) {
+ if (BUF_ISWRITE(bp) && vp != NULL) {
mutex_enter(vp->v_interlock);
vp->v_numoutput++;
mutex_exit(vp->v_interlock);
Index: src/sys/sys/buf.h
diff -u src/sys/sys/buf.h:1.126 src/sys/sys/buf.h:1.127
--- src/sys/sys/buf.h:1.126 Mon Dec 26 23:12:33 2016
+++ src/sys/sys/buf.h Wed Apr 5 20:15:49 2017
@@ -1,4 +1,4 @@
-/* $NetBSD: buf.h,v 1.126 2016/12/26 23:12:33 pgoyette Exp $ */
+/* $NetBSD: buf.h,v 1.127 2017/04/05 20:15:49 jdolecek Exp $ */
/*-
* Copyright (c) 1999, 2000, 2007, 2008 The NetBSD Foundation, Inc.
@@ -198,16 +198,21 @@ struct buf {
#define B_RAW 0x00080000 /* Set by physio for raw transfers. */
#define B_READ 0x00100000 /* Read buffer. */
#define B_DEVPRIVATE 0x02000000 /* Device driver private flag. */
+#define B_MEDIA_FUA 0x08000000 /* Set Force Unit Access for media. */
+#define B_MEDIA_DPO 0x10000000 /* Set Disable Page Out for media. */
#define BUF_FLAGBITS \
"\20\1AGE\3ASYNC\4BAD\5BUSY\10DELWRI" \
"\12DONE\13COWDONE\15GATHERED\16INVAL\17LOCKED\20NOCACHE" \
- "\23PHYS\24RAW\25READ\32DEVPRIVATE\33VFLUSH"
+ "\23PHYS\24RAW\25READ\32DEVPRIVATE\33VFLUSH\34MEDIA_FUA\35MEDIA_DPO"
/* Avoid weird code due to B_WRITE being a "pseudo flag" */
#define BUF_ISREAD(bp) (((bp)->b_flags & B_READ) == B_READ)
#define BUF_ISWRITE(bp) (((bp)->b_flags & B_READ) == B_WRITE)
+/* Media flags, to be passed for nested I/O */
+#define B_MEDIA_FLAGS (B_MEDIA_FUA|B_MEDIA_DPO)
+
/*
* This structure describes a clustered I/O. It is stored in the b_saveaddr
* field of the buffer on which I/O is done. At I/O completion, cluster
Index: src/sys/sys/dkio.h
diff -u src/sys/sys/dkio.h:1.23 src/sys/sys/dkio.h:1.24
--- src/sys/sys/dkio.h:1.23 Wed Apr 5 18:34:56 2017
+++ src/sys/sys/dkio.h Wed Apr 5 20:15:49 2017
@@ -1,4 +1,4 @@
-/* $NetBSD: dkio.h,v 1.23 2017/04/05 18:34:56 jdolecek Exp $ */
+/* $NetBSD: dkio.h,v 1.24 2017/04/05 20:15:49 jdolecek Exp $ */
/*
* Copyright (c) 1987, 1988, 1993
@@ -85,6 +85,8 @@
#define DKCACHE_RCHANGE 0x000100 /* read enable is changeable */
#define DKCACHE_WCHANGE 0x000200 /* write enable is changeable */
#define DKCACHE_SAVE 0x010000 /* cache parameters are savable/save them */
+#define DKCACHE_FUA 0x020000 /* Force Unit Access supported */
+#define DKCACHE_DPO 0x040000 /* Disable Page Out supported */
/*
* Combine disk cache flags of two drives to get common cache capabilities.