Module Name: src Committed By: jdolecek Date: Wed Apr 5 20:15:50 UTC 2017
Modified Files: src/sys/dev/ic: ld_nvme.c nvme.c nvmevar.h src/sys/dev/scsipi: scsipi_disk.h sd.c src/sys/kern: vfs_bio.c src/sys/sys: buf.h dkio.h Log Message: expose disk device FUA/DPO support via DIOCGCACHE, and allow the flags to be set for I/O; implement support in sd(4) and nvme(4) discussed on tech-kern To generate a diff of this commit: cvs rdiff -u -r1.14 -r1.15 src/sys/dev/ic/ld_nvme.c cvs rdiff -u -r1.25 -r1.26 src/sys/dev/ic/nvme.c cvs rdiff -u -r1.12 -r1.13 src/sys/dev/ic/nvmevar.h cvs rdiff -u -r1.21 -r1.22 src/sys/dev/scsipi/scsipi_disk.h cvs rdiff -u -r1.322 -r1.323 src/sys/dev/scsipi/sd.c cvs rdiff -u -r1.271 -r1.272 src/sys/kern/vfs_bio.c cvs rdiff -u -r1.126 -r1.127 src/sys/sys/buf.h cvs rdiff -u -r1.23 -r1.24 src/sys/sys/dkio.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/dev/ic/ld_nvme.c diff -u src/sys/dev/ic/ld_nvme.c:1.14 src/sys/dev/ic/ld_nvme.c:1.15 --- src/sys/dev/ic/ld_nvme.c:1.14 Tue Feb 28 20:55:09 2017 +++ src/sys/dev/ic/ld_nvme.c Wed Apr 5 20:15:49 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: ld_nvme.c,v 1.14 2017/02/28 20:55:09 jdolecek Exp $ */ +/* $NetBSD: ld_nvme.c,v 1.15 2017/04/05 20:15:49 jdolecek Exp $ */ /*- * Copyright (C) 2016 NONAKA Kimihiro <non...@netbsd.org> @@ -26,7 +26,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ld_nvme.c,v 1.14 2017/02/28 20:55:09 jdolecek Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ld_nvme.c,v 1.15 2017/04/05 20:15:49 jdolecek Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -152,11 +152,15 @@ static int ld_nvme_start(struct ld_softc *ld, struct buf *bp) { struct ld_nvme_softc *sc = device_private(ld->sc_dv); + int flags = BUF_ISWRITE(bp) ? 0 : NVME_NS_CTX_F_READ; + + if (bp->b_flags & B_MEDIA_FUA) + flags |= NVME_NS_CTX_F_FUA; return nvme_ns_dobio(sc->sc_nvme, sc->sc_nsid, sc, bp, bp->b_data, bp->b_bcount, sc->sc_ld.sc_secsize, bp->b_rawblkno, - BUF_ISWRITE(bp) ? 0 : NVME_NS_CTX_F_READ, + flags, ld_nvme_biodone); } @@ -221,7 +225,11 @@ ld_nvme_getcache(struct ld_softc *ld, in int error; struct ld_nvme_softc *sc = device_private(ld->sc_dv); - *addr = 0; + /* + * DPO not supported, Dataset Management (DSM) field doesn't specify + * the same semantics. + */ + *addr = DKCACHE_FUA; if (!nvme_has_volatile_write_cache(sc->sc_nvme)) { /* cache simply not present */ Index: src/sys/dev/ic/nvme.c diff -u src/sys/dev/ic/nvme.c:1.25 src/sys/dev/ic/nvme.c:1.26 --- src/sys/dev/ic/nvme.c:1.25 Tue Feb 28 20:53:50 2017 +++ src/sys/dev/ic/nvme.c Wed Apr 5 20:15:49 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: nvme.c,v 1.25 2017/02/28 20:53:50 jdolecek Exp $ */ +/* $NetBSD: nvme.c,v 1.26 2017/04/05 20:15:49 jdolecek Exp $ */ /* $OpenBSD: nvme.c,v 1.49 2016/04/18 05:59:50 dlg Exp $ */ /* @@ -18,7 +18,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.25 2017/02/28 20:53:50 jdolecek Exp $"); +__KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.26 2017/04/05 20:15:49 jdolecek Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -727,6 +727,9 @@ nvme_ns_io_fill(struct nvme_queue *q, st htolem64(&sqe->slba, ccb->nnc_blkno); + if (ISSET(ccb->nnc_flags, NVME_NS_CTX_F_FUA)) + htolem16(&sqe->ioflags, NVM_SQE_IO_FUA); + /* guaranteed by upper layers, but check just in case */ KASSERT((ccb->nnc_datasize % ccb->nnc_secsize) == 0); htolem16(&sqe->nlb, (ccb->nnc_datasize / ccb->nnc_secsize) - 1); Index: src/sys/dev/ic/nvmevar.h diff -u src/sys/dev/ic/nvmevar.h:1.12 src/sys/dev/ic/nvmevar.h:1.13 --- src/sys/dev/ic/nvmevar.h:1.12 Tue Feb 28 20:53:50 2017 +++ src/sys/dev/ic/nvmevar.h Wed Apr 5 20:15:49 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: nvmevar.h,v 1.12 2017/02/28 20:53:50 jdolecek Exp $ */ +/* $NetBSD: nvmevar.h,v 1.13 2017/04/05 20:15:49 jdolecek Exp $ */ /* $OpenBSD: nvmevar.h,v 1.8 2016/04/14 11:18:32 dlg Exp $ */ /* @@ -64,6 +64,7 @@ struct nvme_ccb { uint16_t nnc_flags; #define NVME_NS_CTX_F_READ __BIT(0) #define NVME_NS_CTX_F_POLL __BIT(1) +#define NVME_NS_CTX_F_FUA __BIT(2) struct buf *nnc_buf; daddr_t nnc_blkno; Index: src/sys/dev/scsipi/scsipi_disk.h diff -u src/sys/dev/scsipi/scsipi_disk.h:1.21 src/sys/dev/scsipi/scsipi_disk.h:1.22 --- src/sys/dev/scsipi/scsipi_disk.h:1.21 Tue Dec 25 18:33:42 2007 +++ src/sys/dev/scsipi/scsipi_disk.h Wed Apr 5 20:15:49 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: scsipi_disk.h,v 1.21 2007/12/25 18:33:42 perry Exp $ */ +/* $NetBSD: scsipi_disk.h,v 1.22 2017/04/05 20:15:49 jdolecek Exp $ */ /* * SCSI and SCSI-like interfaces description @@ -62,9 +62,10 @@ struct scsipi_rw_10 { u_int8_t opcode; u_int8_t byte2; #define SRWB_RELADDR 0x01 /* obsolete */ -#define SRWB_FUA_NV 0x02 /* force unit access non-volatile cache */ -#define SRWB_FUA 0x08 /* force unit access */ -#define SRWB_DPO 0x10 /* disable page out */ +#define SRWB_FUA_NV 0x02 /* force unit access non-volatile cache (SCSI-3) */ +#define SRWB_RESV2 0x04 /* reserved (SCSI-2) */ +#define SRWB_FUA 0x08 /* force unit access volatile cache (SCSI-2) */ +#define SRWB_DPO 0x10 /* disable page out (SCSI-2) */ #define SRWB_PROTECT(x) ((x) << 5) u_int8_t addr[4]; u_int8_t reserved; @@ -159,4 +160,7 @@ struct scsipi_capacity_descriptor { #define SCSIPI_CAP_DESC_CODE_FORMATTED 0x2 #define SCSIPI_CAP_DESC_CODE_NONE 0x3 +/* defines for the device specific byte in the mode select/sense header */ +#define SMH_DSP_DPOFUA 0x10 + #endif /* _DEV_SCSIPI_SCSIPI_DISK_H_ */ Index: src/sys/dev/scsipi/sd.c diff -u src/sys/dev/scsipi/sd.c:1.322 src/sys/dev/scsipi/sd.c:1.323 --- src/sys/dev/scsipi/sd.c:1.322 Wed Dec 21 21:28:30 2016 +++ src/sys/dev/scsipi/sd.c Wed Apr 5 20:15:49 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: sd.c,v 1.322 2016/12/21 21:28:30 mlelstv Exp $ */ +/* $NetBSD: sd.c,v 1.323 2017/04/05 20:15:49 jdolecek Exp $ */ /*- * Copyright (c) 1998, 2003, 2004 The NetBSD Foundation, Inc. @@ -47,7 +47,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: sd.c,v 1.322 2016/12/21 21:28:30 mlelstv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sd.c,v 1.323 2017/04/05 20:15:49 jdolecek Exp $"); #ifdef _KERNEL_OPT #include "opt_scsi.h" @@ -654,6 +654,7 @@ sd_diskstart(device_t dev, struct buf *b struct scsipi_generic *cmdp; struct scsipi_xfer *xs; int error, flags, nblks, cmdlen; + int cdb_flags; mutex_enter(chan_mtx(chan)); @@ -698,12 +699,27 @@ sd_diskstart(device_t dev, struct buf *b nblks = howmany(bp->b_bcount, sd->params.blksize); /* + * Pass FUA and/or DPO if requested. Must be done before CDB + * selection, as 6-byte CDB doesn't support the flags. + */ + cdb_flags = 0; + + if (bp->b_flags & B_MEDIA_FUA) + cdb_flags |= SRWB_FUA; + + if (bp->b_flags & B_MEDIA_DPO) + cdb_flags |= SRWB_DPO; + + /* * Fill out the scsi command. Use the smallest CDB possible - * (6-byte, 10-byte, or 16-byte). + * (6-byte, 10-byte, or 16-byte). If we need FUA or DPO, + * need to use 10-byte or bigger, as the 6-byte doesn't support + * the flags. */ if (((bp->b_rawblkno & 0x1fffff) == bp->b_rawblkno) && ((nblks & 0xff) == nblks) && - !(periph->periph_quirks & PQUIRK_ONLYBIG)) { + !(periph->periph_quirks & PQUIRK_ONLYBIG) && + !cdb_flags) { /* 6-byte CDB */ memset(&cmd_small, 0, sizeof(cmd_small)); cmd_small.opcode = (bp->b_flags & B_READ) ? @@ -732,6 +748,9 @@ sd_diskstart(device_t dev, struct buf *b cmdp = (struct scsipi_generic *)&cmd16; } + if (cdb_flags) + cmdp->bytes[0] = cdb_flags; + /* * Figure out what flags to use. */ @@ -1796,7 +1815,9 @@ sd_getcache(struct sd_softc *sd, int *bi int error, bits = 0; int big; union scsi_disk_pages *pages; + uint8_t dev_spec; + /* only SCSI-2 and later supported */ if (periph->periph_version < 2) return (EOPNOTSUPP); @@ -1806,10 +1827,13 @@ sd_getcache(struct sd_softc *sd, int *bi if (error) return (error); - if (big) + if (big) { pages = (void *)(&scsipi_sense.header.big + 1); - else + dev_spec = scsipi_sense.header.big.dev_spec; + } else { pages = (void *)(&scsipi_sense.header.small + 1); + dev_spec = scsipi_sense.header.small.dev_spec; + } if ((pages->caching_params.flags & CACHING_RCD) == 0) bits |= DKCACHE_READ; @@ -1818,6 +1842,13 @@ sd_getcache(struct sd_softc *sd, int *bi if (pages->caching_params.pg_code & PGCODE_PS) bits |= DKCACHE_SAVE; + /* + * Support for FUA/DPO, defined starting with SCSI-2. Use only + * if device claims to support it, according to the MODE SENSE. + */ + if (ISSET(dev_spec, SMH_DSP_DPOFUA)) + bits |= DKCACHE_FUA | DKCACHE_DPO; + memset(&scsipi_sense, 0, sizeof(scsipi_sense)); error = sd_mode_sense(sd, SMS_DBD, &scsipi_sense, sizeof(scsipi_sense.pages.caching_params), Index: src/sys/kern/vfs_bio.c diff -u src/sys/kern/vfs_bio.c:1.271 src/sys/kern/vfs_bio.c:1.272 --- src/sys/kern/vfs_bio.c:1.271 Tue Mar 21 10:46:49 2017 +++ src/sys/kern/vfs_bio.c Wed Apr 5 20:15:49 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: vfs_bio.c,v 1.271 2017/03/21 10:46:49 skrll Exp $ */ +/* $NetBSD: vfs_bio.c,v 1.272 2017/04/05 20:15:49 jdolecek Exp $ */ /*- * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc. @@ -123,7 +123,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: vfs_bio.c,v 1.271 2017/03/21 10:46:49 skrll Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_bio.c,v 1.272 2017/04/05 20:15:49 jdolecek Exp $"); #ifdef _KERNEL_OPT #include "opt_bufcache.h" @@ -2027,7 +2027,7 @@ nestiobuf_iodone(buf_t *bp) void nestiobuf_setup(buf_t *mbp, buf_t *bp, int offset, size_t size) { - const int b_read = mbp->b_flags & B_READ; + const int b_pass = mbp->b_flags & (B_READ|B_MEDIA_FLAGS); struct vnode *vp = mbp->b_vp; KASSERT(mbp->b_bcount >= offset + size); @@ -2035,14 +2035,14 @@ nestiobuf_setup(buf_t *mbp, buf_t *bp, i bp->b_dev = mbp->b_dev; bp->b_objlock = mbp->b_objlock; bp->b_cflags = BC_BUSY; - bp->b_flags = B_ASYNC | b_read; + bp->b_flags = B_ASYNC | b_pass; bp->b_iodone = nestiobuf_iodone; bp->b_data = (char *)mbp->b_data + offset; bp->b_resid = bp->b_bcount = size; bp->b_bufsize = bp->b_bcount; bp->b_private = mbp; BIO_COPYPRIO(bp, mbp); - if (!b_read && vp != NULL) { + if (BUF_ISWRITE(bp) && vp != NULL) { mutex_enter(vp->v_interlock); vp->v_numoutput++; mutex_exit(vp->v_interlock); Index: src/sys/sys/buf.h diff -u src/sys/sys/buf.h:1.126 src/sys/sys/buf.h:1.127 --- src/sys/sys/buf.h:1.126 Mon Dec 26 23:12:33 2016 +++ src/sys/sys/buf.h Wed Apr 5 20:15:49 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: buf.h,v 1.126 2016/12/26 23:12:33 pgoyette Exp $ */ +/* $NetBSD: buf.h,v 1.127 2017/04/05 20:15:49 jdolecek Exp $ */ /*- * Copyright (c) 1999, 2000, 2007, 2008 The NetBSD Foundation, Inc. @@ -198,16 +198,21 @@ struct buf { #define B_RAW 0x00080000 /* Set by physio for raw transfers. */ #define B_READ 0x00100000 /* Read buffer. */ #define B_DEVPRIVATE 0x02000000 /* Device driver private flag. */ +#define B_MEDIA_FUA 0x08000000 /* Set Force Unit Access for media. */ +#define B_MEDIA_DPO 0x10000000 /* Set Disable Page Out for media. */ #define BUF_FLAGBITS \ "\20\1AGE\3ASYNC\4BAD\5BUSY\10DELWRI" \ "\12DONE\13COWDONE\15GATHERED\16INVAL\17LOCKED\20NOCACHE" \ - "\23PHYS\24RAW\25READ\32DEVPRIVATE\33VFLUSH" + "\23PHYS\24RAW\25READ\32DEVPRIVATE\33VFLUSH\34MEDIA_FUA\35MEDIA_DPO" /* Avoid weird code due to B_WRITE being a "pseudo flag" */ #define BUF_ISREAD(bp) (((bp)->b_flags & B_READ) == B_READ) #define BUF_ISWRITE(bp) (((bp)->b_flags & B_READ) == B_WRITE) +/* Media flags, to be passed for nested I/O */ +#define B_MEDIA_FLAGS (B_MEDIA_FUA|B_MEDIA_DPO) + /* * This structure describes a clustered I/O. It is stored in the b_saveaddr * field of the buffer on which I/O is done. At I/O completion, cluster Index: src/sys/sys/dkio.h diff -u src/sys/sys/dkio.h:1.23 src/sys/sys/dkio.h:1.24 --- src/sys/sys/dkio.h:1.23 Wed Apr 5 18:34:56 2017 +++ src/sys/sys/dkio.h Wed Apr 5 20:15:49 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: dkio.h,v 1.23 2017/04/05 18:34:56 jdolecek Exp $ */ +/* $NetBSD: dkio.h,v 1.24 2017/04/05 20:15:49 jdolecek Exp $ */ /* * Copyright (c) 1987, 1988, 1993 @@ -85,6 +85,8 @@ #define DKCACHE_RCHANGE 0x000100 /* read enable is changeable */ #define DKCACHE_WCHANGE 0x000200 /* write enable is changeable */ #define DKCACHE_SAVE 0x010000 /* cache parameters are savable/save them */ +#define DKCACHE_FUA 0x020000 /* Force Unit Access supported */ +#define DKCACHE_DPO 0x040000 /* Disable Page Out supported */ /* * Combine disk cache flags of two drives to get common cache capabilities.