Module Name:    src
Committed By:   nonaka
Date:           Sun May  1 10:21:02 UTC 2016

Modified Files:
        src/distrib/sets/lists/man: mi
        src/share/man/man4: Makefile ld.4 pci.4
        src/sys/arch/amd64/conf: ALL GENERIC XEN3_DOM0
        src/sys/conf: files
        src/sys/dev: DEVNAMES
        src/sys/dev/pci: files.pci
Added Files:
        src/share/man/man4: nvme.4
        src/sys/dev/ic: ld_nvme.c nvme.c nvmereg.h nvmevar.h
        src/sys/dev/pci: nvme_pci.c

Log Message:
Added nvme(4) for Non-Volatile Memory Host Controller Interface devices.
Ported from OpenBSD.


To generate a diff of this commit:
cvs rdiff -u -r1.1523 -r1.1524 src/distrib/sets/lists/man/mi
cvs rdiff -u -r1.627 -r1.628 src/share/man/man4/Makefile
cvs rdiff -u -r1.19 -r1.20 src/share/man/man4/ld.4
cvs rdiff -u -r0 -r1.1 src/share/man/man4/nvme.4
cvs rdiff -u -r1.95 -r1.96 src/share/man/man4/pci.4
cvs rdiff -u -r1.33 -r1.34 src/sys/arch/amd64/conf/ALL
cvs rdiff -u -r1.431 -r1.432 src/sys/arch/amd64/conf/GENERIC
cvs rdiff -u -r1.117 -r1.118 src/sys/arch/amd64/conf/XEN3_DOM0
cvs rdiff -u -r1.1157 -r1.1158 src/sys/conf/files
cvs rdiff -u -r1.297 -r1.298 src/sys/dev/DEVNAMES
cvs rdiff -u -r0 -r1.1 src/sys/dev/ic/ld_nvme.c src/sys/dev/ic/nvme.c \
    src/sys/dev/ic/nvmereg.h src/sys/dev/ic/nvmevar.h
cvs rdiff -u -r1.380 -r1.381 src/sys/dev/pci/files.pci
cvs rdiff -u -r0 -r1.1 src/sys/dev/pci/nvme_pci.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/distrib/sets/lists/man/mi
diff -u src/distrib/sets/lists/man/mi:1.1523 src/distrib/sets/lists/man/mi:1.1524
--- src/distrib/sets/lists/man/mi:1.1523	Thu Apr 28 15:55:15 2016
+++ src/distrib/sets/lists/man/mi	Sun May  1 10:21:01 2016
@@ -1,4 +1,4 @@
-# $NetBSD: mi,v 1.1523 2016/04/28 15:55:15 christos Exp $
+# $NetBSD: mi,v 1.1524 2016/05/01 10:21:01 nonaka Exp $
 #
 # Note: don't delete entries from here - mark them as "obsolete" instead.
 #
@@ -1468,6 +1468,7 @@
 ./usr/share/man/cat4/ntwo.0			man-sys-catman		.cat
 ./usr/share/man/cat4/ntwoc.0			man-sys-catman		.cat
 ./usr/share/man/cat4/null.0			man-sys-catman		.cat
+./usr/share/man/cat4/nvme.0			man-sys-catman		.cat
 ./usr/share/man/cat4/nxt2k.0			man-sys-catman		.cat
 ./usr/share/man/cat4/oak.0			man-sys-catman		.cat
 ./usr/share/man/cat4/oboe.0			man-sys-catman		.cat
@@ -4500,6 +4501,7 @@
 ./usr/share/man/html4/ntwo.html			man-sys-htmlman		html
 ./usr/share/man/html4/ntwoc.html		man-sys-htmlman		html
 ./usr/share/man/html4/null.html			man-sys-htmlman		html
+./usr/share/man/html4/nvme.html			man-sys-htmlman		html
 ./usr/share/man/html4/nxt2k.html		man-sys-htmlman		html
 ./usr/share/man/html4/oak.html			man-sys-htmlman		html
 ./usr/share/man/html4/oboe.html			man-sys-htmlman		html
@@ -7390,6 +7392,7 @@
 ./usr/share/man/man4/ntwo.4			man-sys-man		.man
 ./usr/share/man/man4/ntwoc.4			man-sys-man		.man
 ./usr/share/man/man4/null.4			man-sys-man		.man
+./usr/share/man/man4/nvme.4			man-sys-man		.man
 ./usr/share/man/man4/nxt2k.4			man-sys-man		.man
 ./usr/share/man/man4/oak.4			man-sys-man		.man
 ./usr/share/man/man4/oboe.4			man-sys-man		.man

Index: src/share/man/man4/Makefile
diff -u src/share/man/man4/Makefile:1.627 src/share/man/man4/Makefile:1.628
--- src/share/man/man4/Makefile:1.627	Mon Jan 18 00:34:43 2016
+++ src/share/man/man4/Makefile	Sun May  1 10:21:01 2016
@@ -1,4 +1,4 @@
-#	$NetBSD: Makefile,v 1.627 2016/01/18 00:34:43 kamil Exp $
+#	$NetBSD: Makefile,v 1.628 2016/05/01 10:21:01 nonaka Exp $
 #	@(#)Makefile	8.1 (Berkeley) 6/18/93
 
 MAN=	aac.4 ac97.4 acardide.4 aceride.4 acphy.4 \
@@ -45,6 +45,7 @@ MAN=	aac.4 ac97.4 acardide.4 aceride.4 a
 	mtio.4 msm6242b.4 multicast.4 mvsata.4 \
 	nadb.4 ne.4 neo.4 netintro.4 nfe.4 nfsmb.4 njata.4 njs.4 \
 	nsclpcsio.4 nside.4 nsp.4 nsphy.4 nsphyter.4 ntwoc.4 null.4 nsmb.4 \
+	nvme.4 \
 	oak.4 oosiop.4 opl.4 options.4 optiide.4 osiop.4 otus.4 \
 	pad.4 pas.4 pcdisplay.4 pcf8563rtc.4 pciide.4 pckbc.4 pckbd.4 pcn.4 \
 	pcppi.4 pcscp.4 pcweasel.4 pdcide.4 pdcsata.4 piixide.4 piixpcib.4 \

Index: src/share/man/man4/ld.4
diff -u src/share/man/man4/ld.4:1.19 src/share/man/man4/ld.4:1.20
--- src/share/man/man4/ld.4:1.19	Sat Nov  5 09:22:44 2011
+++ src/share/man/man4/ld.4	Sun May  1 10:21:01 2016
@@ -1,4 +1,4 @@
-.\"	$NetBSD: ld.4,v 1.19 2011/11/05 09:22:44 hannken Exp $
+.\"	$NetBSD: ld.4,v 1.20 2016/05/01 10:21:01 nonaka Exp $
 .\"
 .\" Copyright (c) 2000 The NetBSD Foundation, Inc.
 .\" All rights reserved.
@@ -41,6 +41,7 @@
 .Cd "ld* at icp? unit ?"
 .Cd "ld* at iop? tid ?"
 .Cd "ld* at mlx? unit ?"
+.Cd "ld* at nvme? nsid ?"
 .Cd "ld* at sdmmc?"
 .Cd "ld* at twa? unit ?"
 .Cd "ld* at twe? unit ?"
@@ -71,6 +72,7 @@ partition
 .Xr intro 4 ,
 .Xr iop 4 ,
 .Xr mlx 4 ,
+.Xr nvme 4 ,
 .Xr sdmmc 4 ,
 .Xr twa 4 ,
 .Xr twe 4 ,

Index: src/share/man/man4/pci.4
diff -u src/share/man/man4/pci.4:1.95 src/share/man/man4/pci.4:1.96
--- src/share/man/man4/pci.4:1.95	Fri Aug 28 08:01:15 2015
+++ src/share/man/man4/pci.4	Sun May  1 10:21:01 2016
@@ -1,4 +1,4 @@
-.\"	$NetBSD: pci.4,v 1.95 2015/08/28 08:01:15 wiz Exp $
+.\"	$NetBSD: pci.4,v 1.96 2016/05/01 10:21:01 nonaka Exp $
 .\"
 .\" Copyright (c) 1997 Jason R. Thorpe.  All rights reserved.
 .\" Copyright (c) 1997 Jonathan Stone
@@ -404,6 +404,8 @@ USB EHCI host controllers.
 I2O I/O processors.
 .It mr
 Guillemot Maxi Radio FM 2000 FM radio device.
+.It nvme
+Non-Volatile Memory Host controllers.
 .It oboe
 Toshiba OBOE IrDA SIR/FIR controller.
 .It ohci
@@ -483,6 +485,7 @@ VGA graphics boards.
 .Xr neo 4 ,
 .Xr nfe 4 ,
 .Xr ntwoc 4 ,
+.Xr nvme 4 ,
 .Xr oboe 4 ,
 .Xr ohci 4 ,
 .Xr pcic 4 ,

Index: src/sys/arch/amd64/conf/ALL
diff -u src/sys/arch/amd64/conf/ALL:1.33 src/sys/arch/amd64/conf/ALL:1.34
--- src/sys/arch/amd64/conf/ALL:1.33	Tue Nov 10 13:01:41 2015
+++ src/sys/arch/amd64/conf/ALL	Sun May  1 10:21:01 2016
@@ -1,4 +1,4 @@
-# $NetBSD: ALL,v 1.33 2015/11/10 13:01:41 tnn Exp $
+# $NetBSD: ALL,v 1.34 2016/05/01 10:21:01 nonaka Exp $
 # From NetBSD: GENERIC,v 1.787 2006/10/01 18:37:54 bouyer Exp
 #
 # ALL machine description file
@@ -17,7 +17,7 @@ include 	"arch/amd64/conf/std.amd64"
 
 options 	INCLUDE_CONFIG_FILE	# embed config file in kernel binary
 
-#ident 		"ALL-$Revision: 1.33 $"
+#ident 		"ALL-$Revision: 1.34 $"
 
 maxusers	64		# estimated number of users
 
@@ -865,6 +865,11 @@ st*	at atapibus? drive ? flags 0x0000	# 
 uk*	at atapibus? drive ? flags 0x0000	# ATAPI unknown
 
 
+# NVM Express controllers and devices
+nvme*	at pci? dev ? function ?
+ld*	at nvme? nsid ?
+
+
 # Miscellaneous mass storage devices
 
 # ISA floppy

Index: src/sys/arch/amd64/conf/GENERIC
diff -u src/sys/arch/amd64/conf/GENERIC:1.431 src/sys/arch/amd64/conf/GENERIC:1.432
--- src/sys/arch/amd64/conf/GENERIC:1.431	Sat Apr 23 10:15:27 2016
+++ src/sys/arch/amd64/conf/GENERIC	Sun May  1 10:21:01 2016
@@ -1,4 +1,4 @@
-# $NetBSD: GENERIC,v 1.431 2016/04/23 10:15:27 skrll Exp $
+# $NetBSD: GENERIC,v 1.432 2016/05/01 10:21:01 nonaka Exp $
 #
 # GENERIC machine description file
 #
@@ -22,7 +22,7 @@ include	"arch/amd64/conf/std.amd64"
 
 options 	INCLUDE_CONFIG_FILE	# embed config file in kernel binary
 
-#ident 		"GENERIC-$Revision: 1.431 $"
+#ident 		"GENERIC-$Revision: 1.432 $"
 
 maxusers	64		# estimated number of users
 
@@ -703,6 +703,11 @@ st*	at atapibus? drive ? flags 0x0000	# 
 uk*	at atapibus? drive ? flags 0x0000	# ATAPI unknown
 
 
+# NVM Express controllers and devices
+nvme*	at pci? dev ? function ?
+ld*	at nvme? nsid ?
+
+
 # Miscellaneous mass storage devices
 
 # ISA floppy

Index: src/sys/arch/amd64/conf/XEN3_DOM0
diff -u src/sys/arch/amd64/conf/XEN3_DOM0:1.117 src/sys/arch/amd64/conf/XEN3_DOM0:1.118
--- src/sys/arch/amd64/conf/XEN3_DOM0:1.117	Sat Mar 19 23:21:02 2016
+++ src/sys/arch/amd64/conf/XEN3_DOM0	Sun May  1 10:21:01 2016
@@ -1,4 +1,4 @@
-# $NetBSD: XEN3_DOM0,v 1.117 2016/03/19 23:21:02 gdt Exp $
+# $NetBSD: XEN3_DOM0,v 1.118 2016/05/01 10:21:01 nonaka Exp $
 
 include 	"arch/amd64/conf/std.xen"
 
@@ -529,6 +529,10 @@ ld*	at mlx? unit ?
 
 icpsp*	at icp? unit ?			# SCSI pass-through
 
+# NVM Express controllers and devices
+nvme*	at pci? dev ? function ?
+ld*	at nvme? nsid ?
+
 # wscons
 pckbc0		at isa?			# pc keyboard controller
 pckbd*		at pckbc?		# PC keyboard

Index: src/sys/conf/files
diff -u src/sys/conf/files:1.1157 src/sys/conf/files:1.1158
--- src/sys/conf/files:1.1157	Wed Apr 27 19:46:11 2016
+++ src/sys/conf/files	Sun May  1 10:21:02 2016
@@ -1,4 +1,4 @@
-#	$NetBSD: files,v 1.1157 2016/04/27 19:46:11 christos Exp $
+#	$NetBSD: files,v 1.1158 2016/05/01 10:21:02 nonaka Exp $
 #	@(#)files.newconf	7.5 (Berkeley) 5/10/93
 
 version 	20150846
@@ -1374,6 +1374,14 @@ file	dev/ic/bwi.c			bwi
 device	dme: arp, ether, ifnet
 file	dev/ic/dm9000.c			dme
 
+# NVM Express Controller
+#
+device	nvme {nsid = -1}
+file	dev/ic/nvme.c			nvme
+
+attach	ld at nvme with ld_nvme
+file	dev/ic/ld_nvme.c		ld_nvme
+
 # legitimate pseudo-devices
 #
 defpseudodev vnd:	disk

Index: src/sys/dev/DEVNAMES
diff -u src/sys/dev/DEVNAMES:1.297 src/sys/dev/DEVNAMES:1.298
--- src/sys/dev/DEVNAMES:1.297	Tue Jan  5 13:16:37 2016
+++ src/sys/dev/DEVNAMES	Sun May  1 10:21:02 2016
@@ -1,4 +1,4 @@
-#	$NetBSD: DEVNAMES,v 1.297 2016/01/05 13:16:37 msaitoh Exp $
+#	$NetBSD: DEVNAMES,v 1.298 2016/05/01 10:21:02 nonaka Exp $
 #
 # This file contains all used device names and defined attributes in
 # alphabetical order. New devices added to the system somewhere should first
@@ -970,6 +970,7 @@ nsphy			MI
 nsphyter		MI
 ntwoc			MI
 nubus			mac68k
+nvme			MI
 nvr			atari
 nvram			macppc
 oak			MI

Index: src/sys/dev/pci/files.pci
diff -u src/sys/dev/pci/files.pci:1.380 src/sys/dev/pci/files.pci:1.381
--- src/sys/dev/pci/files.pci:1.380	Tue Jan  5 12:18:42 2016
+++ src/sys/dev/pci/files.pci	Sun May  1 10:21:02 2016
@@ -1,4 +1,4 @@
-#	$NetBSD: files.pci,v 1.380 2016/01/05 12:18:42 msaitoh Exp $
+#	$NetBSD: files.pci,v 1.381 2016/05/01 10:21:02 nonaka Exp $
 #
 # Config file and device description for machine-independent PCI code.
 # Included by ports that need it.  Requires that the SCSI files be
@@ -1175,6 +1175,10 @@ defflag	opt_gffb.h	GFFB_DEBUG
 attach	rtsx at pci with rtsx_pci
 file	dev/pci/rtsx_pci.c	rtsx_pci
 
+# NVM Express Controller
+attach	nvme at pci with nvme_pci
+file	dev/pci/nvme_pci.c	nvme_pci
+
 # PCI graphics devices with DRM/KMS
 include "external/bsd/drm2/pci/files.drmkms_pci"
 

Added files:

Index: src/share/man/man4/nvme.4
diff -u /dev/null src/share/man/man4/nvme.4:1.1
--- /dev/null	Sun May  1 10:21:02 2016
+++ src/share/man/man4/nvme.4	Sun May  1 10:21:01 2016
@@ -0,0 +1,56 @@
+.\"	$NetBSD: nvme.4,v 1.1 2016/05/01 10:21:01 nonaka Exp $
+.\"	$OpenBSD: nvme.4,v 1.2 2016/04/14 11:53:37 jmc Exp $
+.\"
+.\" Copyright (c) 2016 David Gwynne <[email protected]>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd May 1, 2016
+.Dt NVME 4
+.Os
+.Sh NAME
+.Nm nvme
+.Nd Non-Volatile Memory Host Controller Interface
+.Sh SYNOPSIS
+.Cd "nvme* at pci? dev ? function ?"
+.Sh DESCRIPTION
+The
+.Nm
+driver provides support for NVMe, or NVM Express,
+storage controllers conforming to the
+Non-Volatile Memory Host Controller Interface specification.
+.Sh SEE ALSO
+.Xr intro 4 ,
+.Xr ld 4 ,
+.Xr pci 4
+.Sh HISTORY
+The
+.Nm
+driver first appeared in
+.Ox 6.0
+and in
+.Nx 8.0 .
+.Sh AUTHORS
+.An -nosplit
+The
+.Nm
+driver was written by
+.An David Gwynne
+.Aq [email protected]
+for
+.Ox
+and ported to
+.Nx
+by
+.An NONAKA Kimihiro
+.Aq [email protected] .

Index: src/sys/dev/ic/ld_nvme.c
diff -u /dev/null src/sys/dev/ic/ld_nvme.c:1.1
--- /dev/null	Sun May  1 10:21:02 2016
+++ src/sys/dev/ic/ld_nvme.c	Sun May  1 10:21:02 2016
@@ -0,0 +1,236 @@
+/*	$NetBSD: ld_nvme.c,v 1.1 2016/05/01 10:21:02 nonaka Exp $	*/
+
+/*-
+ * Copyright (C) 2016 NONAKA Kimihiro <[email protected]>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: ld_nvme.c,v 1.1 2016/05/01 10:21:02 nonaka Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/device.h>
+#include <sys/buf.h>
+#include <sys/disk.h>
+#include <sys/kmem.h>
+
+#include <dev/ldvar.h>
+#include <dev/ic/nvmereg.h>
+#include <dev/ic/nvmevar.h>
+
+struct ld_nvme_softc {
+	struct ld_softc		sc_ld;
+	struct nvme_softc	*sc_nvme;
+
+	uint16_t		sc_nsid;
+};
+
+static int	ld_nvme_match(device_t, cfdata_t, void *);
+static void	ld_nvme_attach(device_t, device_t, void *);
+static int	ld_nvme_detach(device_t, int);
+
+CFATTACH_DECL_NEW(ld_nvme, sizeof(struct ld_nvme_softc),
+    ld_nvme_match, ld_nvme_attach, ld_nvme_detach, NULL);
+
+static int	ld_nvme_start(struct ld_softc *, struct buf *);
+static int	ld_nvme_dump(struct ld_softc *, void *, int, int);
+static int	ld_nvme_flush(struct ld_softc *, int);
+
+static int	ld_nvme_dobio(struct ld_nvme_softc *, void *, int, daddr_t,
+		    int, struct buf *);
+static void	ld_nvme_biodone(struct nvme_ns_context *);
+static void	ld_nvme_syncdone(struct nvme_ns_context *);
+
+
+static int
+ld_nvme_match(device_t parent, cfdata_t match, void *aux)
+{
+	struct nvme_attach_args *naa = aux;
+
+	if (naa->naa_nsid == 0)
+		return 0;
+
+	return 1;
+}
+
+static void
+ld_nvme_attach(device_t parent, device_t self, void *aux)
+{
+	struct ld_nvme_softc *sc = device_private(self);
+	struct ld_softc *ld = &sc->sc_ld;
+	struct nvme_softc *nsc = device_private(parent);
+	struct nvme_attach_args *naa = aux;
+	struct nvme_namespace *ns;
+	struct nvm_namespace_format *f;
+	uint64_t nsze;
+	int error;
+
+	ld->sc_dv = self;
+	sc->sc_nvme = nsc;
+	sc->sc_nsid = naa->naa_nsid;
+
+	aprint_naive("\n");
+	aprint_normal("\n");
+
+	error = nvme_ns_identify(sc->sc_nvme, sc->sc_nsid);
+	if (error) {
+		aprint_error_dev(self, "couldn't identify namespace\n");
+		return;
+	}
+
+	ns = nvme_ns_get(sc->sc_nvme, sc->sc_nsid);
+	KASSERT(ns);
+	nsze = lemtoh64(&ns->ident->nsze);
+	f = &ns->ident->lbaf[NVME_ID_NS_FLBAS(ns->ident->flbas)];
+
+	ld->sc_secsize = 1 << f->lbads;
+	ld->sc_secperunit = nsze;
+	ld->sc_maxxfer = MAXPHYS;
+	ld->sc_maxqueuecnt = naa->naa_qentries;
+	ld->sc_start = ld_nvme_start;
+	ld->sc_dump = ld_nvme_dump;
+	ld->sc_flush = ld_nvme_flush;
+	ld->sc_flags = LDF_ENABLED;
+	ldattach(ld);
+}
+
+static int
+ld_nvme_detach(device_t self, int flags)
+{
+	struct ld_nvme_softc *sc = device_private(self);
+	struct ld_softc *ld = &sc->sc_ld;
+	int rv;
+
+	if ((rv = ldbegindetach(ld, flags)) != 0)
+		return rv;
+	ldenddetach(ld);
+
+	nvme_ns_free(sc->sc_nvme, sc->sc_nsid);
+
+	return 0;
+}
+
+static int
+ld_nvme_start(struct ld_softc *ld, struct buf *bp)
+{
+	struct ld_nvme_softc *sc = device_private(ld->sc_dv);
+
+	return ld_nvme_dobio(sc, bp->b_data, bp->b_bcount, bp->b_rawblkno,
+	    BUF_ISWRITE(bp), bp);
+}
+
+static int
+ld_nvme_dump(struct ld_softc *ld, void *data, int blkno, int blkcnt)
+{
+	struct ld_nvme_softc *sc = device_private(ld->sc_dv);
+
+	return ld_nvme_dobio(sc, data, blkcnt * ld->sc_secsize, blkno, 1, NULL);
+}
+
+static int
+ld_nvme_dobio(struct ld_nvme_softc *sc, void *data, int datasize, daddr_t blkno,
+    int dowrite, struct buf *bp)
+{
+	struct nvme_ns_context *ctx;
+	int error;
+	int s;
+
+	ctx = nvme_ns_get_ctx(bp != NULL ? PR_WAITOK : PR_NOWAIT);
+	ctx->nnc_cookie = sc;
+	ctx->nnc_nsid = sc->sc_nsid;
+	ctx->nnc_done = ld_nvme_biodone;
+	ctx->nnc_buf = bp;
+	ctx->nnc_data = data;
+	ctx->nnc_datasize = datasize;
+	ctx->nnc_secsize = sc->sc_ld.sc_secsize;
+	ctx->nnc_blkno = blkno;
+	ctx->nnc_flags = dowrite ? 0 : NVME_NS_CTX_F_READ;
+	if (bp == NULL) {
+		SET(ctx->nnc_flags, NVME_NS_CTX_F_POLL);
+		s = splbio();
+	}
+	error = nvme_ns_dobio(sc->sc_nvme, ctx);
+	if (bp == NULL) {
+		splx(s);
+	}
+
+	return error;
+}
+
+static void
+ld_nvme_biodone(struct nvme_ns_context *ctx)
+{
+	struct ld_nvme_softc *sc = ctx->nnc_cookie;
+	struct buf *bp = ctx->nnc_buf;
+	int status = NVME_CQE_SC(ctx->nnc_status);
+
+	if (bp != NULL) {
+		if (status != NVME_CQE_SC_SUCCESS) {
+			bp->b_error = EIO;
+			bp->b_resid = bp->b_bcount;
+			aprint_error_dev(sc->sc_ld.sc_dv, "I/O error\n");
+		} else {
+			bp->b_resid = 0;
+		}
+		lddone(&sc->sc_ld, bp);
+	} else {
+		if (status != NVME_CQE_SC_SUCCESS) {
+			aprint_error_dev(sc->sc_ld.sc_dv, "I/O error\n");
+		}
+	}
+	nvme_ns_put_ctx(ctx);
+}
+
+static int
+ld_nvme_flush(struct ld_softc *ld, int flags)
+{
+	struct ld_nvme_softc *sc = device_private(ld->sc_dv);
+	struct nvme_ns_context *ctx;
+	int error;
+	int s;
+
+	ctx = nvme_ns_get_ctx((flags & LDFL_POLL) ? PR_NOWAIT : PR_WAITOK);
+	ctx->nnc_cookie = sc;
+	ctx->nnc_nsid = sc->sc_nsid;
+	ctx->nnc_done = ld_nvme_syncdone;
+	ctx->nnc_flags = 0;
+	if (flags & LDFL_POLL) {
+		SET(ctx->nnc_flags, NVME_NS_CTX_F_POLL);
+		s = splbio();
+	}
+	error = nvme_ns_sync(sc->sc_nvme, ctx);
+	if (flags & LDFL_POLL) {
+		splx(s);
+	}
+
+	return error;
+}
+
+static void
+ld_nvme_syncdone(struct nvme_ns_context *ctx)
+{
+
+	nvme_ns_put_ctx(ctx);
+}
Index: src/sys/dev/ic/nvme.c
diff -u /dev/null src/sys/dev/ic/nvme.c:1.1
--- /dev/null	Sun May  1 10:21:02 2016
+++ src/sys/dev/ic/nvme.c	Sun May  1 10:21:02 2016
@@ -0,0 +1,1333 @@
+/*	$NetBSD: nvme.c,v 1.1 2016/05/01 10:21:02 nonaka Exp $	*/
+/*	$OpenBSD: nvme.c,v 1.49 2016/04/18 05:59:50 dlg Exp $ */
+
+/*
+ * Copyright (c) 2014 David Gwynne <[email protected]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.1 2016/05/01 10:21:02 nonaka Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/atomic.h>
+#include <sys/bus.h>
+#include <sys/buf.h>
+#include <sys/device.h>
+#include <sys/kmem.h>
+#include <sys/once.h>
+#include <sys/queue.h>
+#include <sys/mutex.h>
+
+#include <dev/ic/nvmereg.h>
+#include <dev/ic/nvmevar.h>
+
+int nvme_adminq_size = 128;
+int nvme_ioq_size = 128;
+
+static int	nvme_print(void *, const char *);
+
+static int	nvme_ready(struct nvme_softc *, uint32_t);
+static int	nvme_enable(struct nvme_softc *, u_int);
+static int	nvme_disable(struct nvme_softc *);
+static int	nvme_shutdown(struct nvme_softc *);
+
+static void	nvme_version(struct nvme_softc *, uint32_t);
+#ifdef NVME_DEBUG
+static void	nvme_dumpregs(struct nvme_softc *);
+#endif
+static int	nvme_identify(struct nvme_softc *, u_int);
+static void	nvme_fill_identify(struct nvme_queue *, struct nvme_ccb *,
+		    void *);
+
+static int	nvme_ccbs_alloc(struct nvme_queue *, u_int);
+static void	nvme_ccbs_free(struct nvme_queue *);
+
+static struct nvme_ccb *
+		nvme_ccb_get(struct nvme_queue *);
+static void	nvme_ccb_put(struct nvme_queue *, struct nvme_ccb *);
+
+static int	nvme_poll(struct nvme_softc *, struct nvme_queue *,
+		    struct nvme_ccb *, void (*)(struct nvme_queue *,
+		    struct nvme_ccb *, void *));
+static void	nvme_poll_fill(struct nvme_queue *, struct nvme_ccb *, void *);
+static void	nvme_poll_done(struct nvme_queue *, struct nvme_ccb *,
+		    struct nvme_cqe *);
+static void	nvme_sqe_fill(struct nvme_queue *, struct nvme_ccb *, void *);
+static void	nvme_empty_done(struct nvme_queue *, struct nvme_ccb *,
+		    struct nvme_cqe *);
+
+static struct nvme_queue *
+		nvme_q_alloc(struct nvme_softc *, uint16_t, u_int, u_int);
+static int	nvme_q_create(struct nvme_softc *, struct nvme_queue *);
+static int	nvme_q_delete(struct nvme_softc *, struct nvme_queue *);
+static void	nvme_q_submit(struct nvme_softc *, struct nvme_queue *,
+		    struct nvme_ccb *, void (*)(struct nvme_queue *,
+		    struct nvme_ccb *, void *));
+static int	nvme_q_complete(struct nvme_softc *, struct nvme_queue *q);
+static void	nvme_q_free(struct nvme_softc *, struct nvme_queue *);
+
+static struct nvme_dmamem *
+		nvme_dmamem_alloc(struct nvme_softc *, size_t);
+static void	nvme_dmamem_free(struct nvme_softc *, struct nvme_dmamem *);
+static void	nvme_dmamem_sync(struct nvme_softc *, struct nvme_dmamem *,
+		    int);
+
+static void	nvme_ns_io_fill(struct nvme_queue *, struct nvme_ccb *,
+		    void *);
+static void	nvme_ns_io_done(struct nvme_queue *, struct nvme_ccb *,
+		    struct nvme_cqe *);
+static void	nvme_ns_sync_fill(struct nvme_queue *, struct nvme_ccb *,
+		    void *);
+static void	nvme_ns_sync_done(struct nvme_queue *, struct nvme_ccb *,
+		    struct nvme_cqe *);
+
+static void	nvme_strvis(u_char *, int, const u_char *, int);
+
+#define nvme_read4(_s, _r) \
+	bus_space_read_4((_s)->sc_iot, (_s)->sc_ioh, (_r))
+#define nvme_write4(_s, _r, _v) \
+	bus_space_write_4((_s)->sc_iot, (_s)->sc_ioh, (_r), (_v))
+#ifdef __LP64__
+#define nvme_read8(_s, _r) \
+	bus_space_read_8((_s)->sc_iot, (_s)->sc_ioh, (_r))
+#define nvme_write8(_s, _r, _v) \
+	bus_space_write_8((_s)->sc_iot, (_s)->sc_ioh, (_r), (_v))
+#else /* __LP64__ */
+static inline uint64_t
+nvme_read8(struct nvme_softc *sc, bus_size_t r)
+{
+	uint64_t v;
+	uint32_t *a = (uint32_t *)&v;
+
+#if _BYTE_ORDER == _LITTLE_ENDIAN
+	a[0] = nvme_read4(sc, r);
+	a[1] = nvme_read4(sc, r + 4);
+#else /* _BYTE_ORDER == _LITTLE_ENDIAN */
+	a[1] = nvme_read4(sc, r);
+	a[0] = nvme_read4(sc, r + 4);
+#endif
+
+	return v;
+}
+
+static inline void
+nvme_write8(struct nvme_softc *sc, bus_size_t r, uint64_t v)
+{
+	uint32_t *a = (uint32_t *)&v;
+
+#if _BYTE_ORDER == _LITTLE_ENDIAN
+	nvme_write4(sc, r, a[0]);
+	nvme_write4(sc, r + 4, a[1]);
+#else /* _BYTE_ORDER == _LITTLE_ENDIAN */
+	nvme_write4(sc, r, a[1]);
+	nvme_write4(sc, r + 4, a[0]);
+#endif
+}
+#endif /* __LP64__ */
+#define nvme_barrier(_s, _r, _l, _f) \
+	bus_space_barrier((_s)->sc_iot, (_s)->sc_ioh, (_r), (_l), (_f))
+
+pool_cache_t nvme_ns_ctx_cache;
+ONCE_DECL(nvme_init_once);
+
+static int
+nvme_init(void)
+{
+	nvme_ns_ctx_cache = pool_cache_init(sizeof(struct nvme_ns_context),
+	    0, 0, 0, "nvme_ns_ctx", NULL, IPL_BIO, NULL, NULL, NULL);
+	KASSERT(nvme_ns_ctx_cache != NULL);
+	return 0;
+}
+
+static void
+nvme_version(struct nvme_softc *sc, uint32_t ver)
+{
+	const char *v = NULL;
+
+	switch (ver) {
+	case NVME_VS_1_0:
+		v = "1.0";
+		break;
+	case NVME_VS_1_1:
+		v = "1.1";
+		break;
+	case NVME_VS_1_2:
+		v = "1.2";
+		break;
+	default:
+		aprint_error_dev(sc->sc_dev, "unknown version 0x%08x\n", ver);
+		return;
+	}
+
+	aprint_normal_dev(sc->sc_dev, "NVMe %s\n", v);
+}
+
+#ifdef NVME_DEBUG
+static void
+nvme_dumpregs(struct nvme_softc *sc)
+{
+	uint64_t r8;
+	uint32_t r4;
+
+#define	DEVNAME(_sc) device_xname((_sc)->sc_dev)
+	r8 = nvme_read8(sc, NVME_CAP);
+	printf("%s: cap  0x%016llx\n", DEVNAME(sc), nvme_read8(sc, NVME_CAP));
+	printf("%s:  mpsmax %u (%u)\n", DEVNAME(sc),
+	    (u_int)NVME_CAP_MPSMAX(r8), (1 << NVME_CAP_MPSMAX(r8)));
+	printf("%s:  mpsmin %u (%u)\n", DEVNAME(sc),
+	    (u_int)NVME_CAP_MPSMIN(r8), (1 << NVME_CAP_MPSMIN(r8)));
+	printf("%s:  css %llu\n", DEVNAME(sc), NVME_CAP_CSS(r8));
+	printf("%s:  nssrs %llu\n", DEVNAME(sc), NVME_CAP_NSSRS(r8));
+	printf("%s:  dstrd %u\n", DEVNAME(sc), NVME_CAP_DSTRD(r8));
+	printf("%s:  to %llu msec\n", DEVNAME(sc), NVME_CAP_TO(r8));
+	printf("%s:  ams %llu\n", DEVNAME(sc), NVME_CAP_AMS(r8));
+	printf("%s:  cqr %llu\n", DEVNAME(sc), NVME_CAP_CQR(r8));
+	printf("%s:  mqes %llu\n", DEVNAME(sc), NVME_CAP_MQES(r8));
+
+	printf("%s: vs   0x%04x\n", DEVNAME(sc), nvme_read4(sc, NVME_VS));
+
+	r4 = nvme_read4(sc, NVME_CC);
+	printf("%s: cc   0x%04x\n", DEVNAME(sc), r4);
+	printf("%s:  iocqes %u\n", DEVNAME(sc), NVME_CC_IOCQES_R(r4));
+	printf("%s:  iosqes %u\n", DEVNAME(sc), NVME_CC_IOSQES_R(r4));
+	printf("%s:  shn %u\n", DEVNAME(sc), NVME_CC_SHN_R(r4));
+	printf("%s:  ams %u\n", DEVNAME(sc), NVME_CC_AMS_R(r4));
+	printf("%s:  mps %u\n", DEVNAME(sc), NVME_CC_MPS_R(r4));
+	printf("%s:  css %u\n", DEVNAME(sc), NVME_CC_CSS_R(r4));
+	printf("%s:  en %u\n", DEVNAME(sc), ISSET(r4, NVME_CC_EN));
+
+	printf("%s: csts 0x%08x\n", DEVNAME(sc), nvme_read4(sc, NVME_CSTS));
+	printf("%s: aqa  0x%08x\n", DEVNAME(sc), nvme_read4(sc, NVME_AQA));
+	printf("%s: asq  0x%016llx\n", DEVNAME(sc), nvme_read8(sc, NVME_ASQ));
+	printf("%s: acq  0x%016llx\n", DEVNAME(sc), nvme_read8(sc, NVME_ACQ));
+#undef	DEVNAME
+}
+#endif	/* NVME_DEBUG */
+
+static int
+nvme_ready(struct nvme_softc *sc, uint32_t rdy)
+{
+	u_int i = 0;
+
+	while ((nvme_read4(sc, NVME_CSTS) & NVME_CSTS_RDY) != rdy) {
+		if (i++ > sc->sc_rdy_to)
+			return 1;
+
+		delay(1000);
+		nvme_barrier(sc, NVME_CSTS, 4, BUS_SPACE_BARRIER_READ);
+	}
+
+	return 0;
+}
+
+static int
+nvme_enable(struct nvme_softc *sc, u_int mps)
+{
+	uint32_t cc;
+
+	cc = nvme_read4(sc, NVME_CC);
+	if (ISSET(cc, NVME_CC_EN))
+		return nvme_ready(sc, NVME_CSTS_RDY);
+
+	nvme_write4(sc, NVME_AQA, NVME_AQA_ACQS(sc->sc_admin_q->q_entries) |
+	    NVME_AQA_ASQS(sc->sc_admin_q->q_entries));
+	nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE);
+
+	nvme_write8(sc, NVME_ASQ, NVME_DMA_DVA(sc->sc_admin_q->q_sq_dmamem));
+	nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE);
+	nvme_write8(sc, NVME_ACQ, NVME_DMA_DVA(sc->sc_admin_q->q_cq_dmamem));
+	nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE);
+
+	CLR(cc, NVME_CC_IOCQES_MASK | NVME_CC_IOSQES_MASK | NVME_CC_SHN_MASK |
+	    NVME_CC_AMS_MASK | NVME_CC_MPS_MASK | NVME_CC_CSS_MASK);
+	SET(cc, NVME_CC_IOSQES(ffs(64) - 1) | NVME_CC_IOCQES(ffs(16) - 1));
+	SET(cc, NVME_CC_SHN(NVME_CC_SHN_NONE));
+	SET(cc, NVME_CC_CSS(NVME_CC_CSS_NVM));
+	SET(cc, NVME_CC_AMS(NVME_CC_AMS_RR));
+	SET(cc, NVME_CC_MPS(mps));
+	SET(cc, NVME_CC_EN);
+
+	nvme_write4(sc, NVME_CC, cc);
+	nvme_barrier(sc, 0, sc->sc_ios,
+	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
+
+	return nvme_ready(sc, NVME_CSTS_RDY);
+}
+
+static int
+nvme_disable(struct nvme_softc *sc)
+{
+	uint32_t cc, csts;
+
+	cc = nvme_read4(sc, NVME_CC);
+	if (ISSET(cc, NVME_CC_EN)) {
+		csts = nvme_read4(sc, NVME_CSTS);
+		if (!ISSET(csts, NVME_CSTS_CFS) &&
+		    nvme_ready(sc, NVME_CSTS_RDY) != 0)
+			return 1;
+	}
+
+	CLR(cc, NVME_CC_EN);
+
+	nvme_write4(sc, NVME_CC, cc);
+	nvme_barrier(sc, 0, sc->sc_ios,
+	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
+
+	return nvme_ready(sc, 0);
+}
+
+int
+nvme_attach(struct nvme_softc *sc)
+{
+	struct nvme_attach_args naa;
+	uint64_t cap;
+	uint32_t reg;
+	u_int dstrd;
+	u_int mps = PAGE_SHIFT;
+	int adminq_entries = nvme_adminq_size;
+	int ioq_entries = nvme_ioq_size;
+	int i;
+
+	RUN_ONCE(&nvme_init_once, nvme_init);
+
+	reg = nvme_read4(sc, NVME_VS);
+	if (reg == 0xffffffff) {
+		aprint_error_dev(sc->sc_dev, "invalid mapping\n");
+		return 1;
+	}
+
+	nvme_version(sc, reg);
+
+	cap = nvme_read8(sc, NVME_CAP);
+	dstrd = NVME_CAP_DSTRD(cap);
+	if (NVME_CAP_MPSMIN(cap) > PAGE_SHIFT) {
+		aprint_error_dev(sc->sc_dev, "NVMe minimum page size %u "
+		    "is greater than CPU page size %u\n",
+		    1 << NVME_CAP_MPSMIN(cap), 1 << PAGE_SHIFT);
+		return 1;
+	}
+	if (NVME_CAP_MPSMAX(cap) < mps)
+		mps = NVME_CAP_MPSMAX(cap);
+
+	sc->sc_rdy_to = NVME_CAP_TO(cap);
+	sc->sc_mps = 1 << mps;
+	sc->sc_mdts = MAXPHYS;
+	sc->sc_max_sgl = 2;
+
+	if (nvme_disable(sc) != 0) {
+		aprint_error_dev(sc->sc_dev, "unable to disable controller\n");
+		return 1;
+	}
+
+	sc->sc_admin_q = nvme_q_alloc(sc, NVME_ADMIN_Q, adminq_entries, dstrd);
+	if (sc->sc_admin_q == NULL) {
+		aprint_error_dev(sc->sc_dev,
+		    "unable to allocate admin queue\n");
+		return 1;
+	}
+	if (sc->sc_intr_establish(sc, NVME_ADMIN_Q, sc->sc_admin_q))
+		goto free_admin_q;
+
+	if (nvme_enable(sc, mps) != 0) {
+		aprint_error_dev(sc->sc_dev, "unable to enable controller\n");
+		goto disestablish_admin_q;
+	}
+
+	if (nvme_identify(sc, NVME_CAP_MPSMIN(cap)) != 0) {
+		aprint_error_dev(sc->sc_dev, "unable to identify controller\n");
+		goto disable;
+	}
+
+	/* we know how big things are now */
+	sc->sc_max_sgl = sc->sc_mdts / sc->sc_mps;
+
+	/* reallocate ccbs of admin queue with new max sgl. */
+	nvme_ccbs_free(sc->sc_admin_q);
+	nvme_ccbs_alloc(sc->sc_admin_q, sc->sc_admin_q->q_entries);
+
+	sc->sc_q = kmem_zalloc(sizeof(*sc->sc_q) * sc->sc_nq, KM_SLEEP);
+	if (sc->sc_q == NULL) {
+		aprint_error_dev(sc->sc_dev, "unable to allocate io queue\n");
+		goto disable;
+	}
+	for (i = 0; i < sc->sc_nq; i++) {
+		sc->sc_q[i] = nvme_q_alloc(sc, i + 1, ioq_entries, dstrd);
+		if (sc->sc_q[i] == NULL) {
+			aprint_error_dev(sc->sc_dev,
+			    "unable to allocate io queue\n");
+			goto free_q;
+		}
+		if (nvme_q_create(sc, sc->sc_q[i]) != 0) {
+			aprint_error_dev(sc->sc_dev,
+			    "unable to create io queue\n");
+			nvme_q_free(sc, sc->sc_q[i]);
+			goto free_q;
+		}
+	}
+
+	if (!sc->sc_use_mq)
+		nvme_write4(sc, NVME_INTMC, 1);
+
+	sc->sc_namespaces = kmem_zalloc(sizeof(*sc->sc_namespaces) * sc->sc_nn,
+	    KM_SLEEP);
+	for (i = 0; i < sc->sc_nn; i++) {
+		memset(&naa, 0, sizeof(naa));
+		naa.naa_nsid = i + 1;
+		naa.naa_qentries = ioq_entries;
+		sc->sc_namespaces[i].dev = config_found(sc->sc_dev, &naa,
+		    nvme_print);
+	}
+
+	return 0;
+
+free_q:
+	while (--i >= 0) {
+		nvme_q_delete(sc, sc->sc_q[i]);
+		nvme_q_free(sc, sc->sc_q[i]);
+	}
+disable:
+	nvme_disable(sc);
+disestablish_admin_q:
+	sc->sc_intr_disestablish(sc, NVME_ADMIN_Q);
+free_admin_q:
+	nvme_q_free(sc, sc->sc_admin_q);
+
+	return 1;
+}
+
+static int
+nvme_print(void *aux, const char *pnp)
+{
+	struct nvme_attach_args *naa = aux;
+
+	if (pnp)
+		aprint_normal("at %s", pnp);
+
+	if (naa->naa_nsid > 0)
+		aprint_normal(" nsid %d", naa->naa_nsid);
+
+	return UNCONF;
+}
+
+int
+nvme_detach(struct nvme_softc *sc, int flags)
+{
+	int i, error;
+
+	error = config_detach_children(sc->sc_dev, flags);
+	if (error)
+		return error;
+
+	error = nvme_shutdown(sc);
+	if (error)
+		return error;
+
+	for (i = 0; i < sc->sc_nq; i++)
+		nvme_q_free(sc, sc->sc_q[i]);
+	kmem_free(sc->sc_q, sizeof(*sc->sc_q) * sc->sc_nq);
+	nvme_q_free(sc, sc->sc_admin_q);
+
+	return 0;
+}
+
+static int
+nvme_shutdown(struct nvme_softc *sc)
+{
+	uint32_t cc, csts;
+	bool disabled = false;
+	int i;
+
+	if (!sc->sc_use_mq)
+		nvme_write4(sc, NVME_INTMS, 1);
+
+	for (i = 0; i < sc->sc_nq; i++) {
+		if (nvme_q_delete(sc, sc->sc_q[i]) != 0) {
+			aprint_error_dev(sc->sc_dev,
+			    "unable to delete io queue %d, disabling\n", i + 1);
+			disabled = true;
+		}
+	}
+	sc->sc_intr_disestablish(sc, NVME_ADMIN_Q);
+	if (disabled)
+		goto disable;
+
+	cc = nvme_read4(sc, NVME_CC);
+	CLR(cc, NVME_CC_SHN_MASK);
+	SET(cc, NVME_CC_SHN(NVME_CC_SHN_NORMAL));
+	nvme_write4(sc, NVME_CC, cc);
+
+	for (i = 0; i < 4000; i++) {
+		nvme_barrier(sc, 0, sc->sc_ios,
+		    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
+		csts = nvme_read4(sc, NVME_CSTS);
+		if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_DONE)
+			return 0;
+
+		delay(1000);
+	}
+
+	aprint_error_dev(sc->sc_dev, "unable to shudown, disabling\n");
+
+disable:
+	nvme_disable(sc);
+	return 0;
+}
+
+void
+nvme_childdet(device_t self, device_t child)
+{
+	struct nvme_softc *sc = device_private(self);
+	int i;
+
+	for (i = 0; i < sc->sc_nn; i++) {
+		if (sc->sc_namespaces[i].dev == child) {
+			/* Already freed ns->ident. */
+			sc->sc_namespaces[i].dev = NULL;
+			break;
+		}
+	}
+}
+
+int
+nvme_ns_identify(struct nvme_softc *sc, uint16_t nsid)
+{
+	struct nvme_sqe sqe;
+	struct nvm_identify_namespace *identify;
+	struct nvme_dmamem *mem;
+	struct nvme_ccb *ccb;
+	struct nvme_namespace *ns;
+	int rv;
+
+	KASSERT(nsid > 0);
+
+	ccb = nvme_ccb_get(sc->sc_admin_q);
+	KASSERT(ccb != NULL);
+
+	mem = nvme_dmamem_alloc(sc, sizeof(*identify));
+	if (mem == NULL)
+		return ENOMEM;
+
+	memset(&sqe, 0, sizeof(sqe));
+	sqe.opcode = NVM_ADMIN_IDENTIFY;
+	htolem32(&sqe.nsid, nsid);
+	htolem64(&sqe.entry.prp[0], NVME_DMA_DVA(mem));
+	htolem32(&sqe.cdw10, 0);
+
+	ccb->ccb_done = nvme_empty_done;
+	ccb->ccb_cookie = &sqe;
+
+	nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD);
+	rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill);
+	nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD);
+
+	nvme_ccb_put(sc->sc_admin_q, ccb);
+
+	if (rv != 0) {
+		rv = EIO;
+		goto done;
+	}
+
+	/* commit */
+
+	identify = kmem_zalloc(sizeof(*identify), KM_SLEEP);
+	memcpy(identify, NVME_DMA_KVA(mem), sizeof(*identify));
+
+	ns = nvme_ns_get(sc, nsid);
+	KASSERT(ns);
+	ns->ident = identify;
+
+done:
+	nvme_dmamem_free(sc, mem);
+
+	return rv;
+}
+
+int
+nvme_ns_dobio(struct nvme_softc *sc, struct nvme_ns_context *ctx)
+{
+	struct nvme_queue *q = nvme_get_q(sc);
+	struct nvme_ccb *ccb;
+	bus_dmamap_t dmap;
+	int i, error;
+
+	ccb = nvme_ccb_get(q);
+	if (ccb == NULL)
+		return EAGAIN;
+
+	ccb->ccb_done = nvme_ns_io_done;
+	ccb->ccb_cookie = ctx;
+
+	dmap = ccb->ccb_dmamap;
+	error = bus_dmamap_load(sc->sc_dmat, dmap, ctx->nnc_data,
+	    ctx->nnc_datasize, NULL,
+	    (ISSET(ctx->nnc_flags, NVME_NS_CTX_F_POLL) ?
+	      BUS_DMA_NOWAIT : BUS_DMA_WAITOK) |
+	    (ISSET(ctx->nnc_flags, NVME_NS_CTX_F_READ) ?
+	      BUS_DMA_READ : BUS_DMA_WRITE));
+	if (error) {
+		nvme_ccb_put(q, ccb);
+		return error;
+	}
+
+	bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize,
+	    ISSET(ctx->nnc_flags, NVME_NS_CTX_F_READ) ?
+	    BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE);
+
+	if (dmap->dm_nsegs > 2) {
+		for (i = 1; i < dmap->dm_nsegs; i++) {
+			htolem64(&ccb->ccb_prpl[i - 1],
+			    dmap->dm_segs[i].ds_addr);
+		}
+		bus_dmamap_sync(sc->sc_dmat,
+		    NVME_DMA_MAP(q->q_ccb_prpls),
+		    ccb->ccb_prpl_off,
+		    sizeof(*ccb->ccb_prpl) * dmap->dm_nsegs - 1,
+		    BUS_DMASYNC_PREWRITE);
+	}
+
+	if (ISSET(ctx->nnc_flags, NVME_NS_CTX_F_POLL)) {
+		if (nvme_poll(sc, q, ccb, nvme_ns_io_fill) != 0)
+			return EIO;
+		return 0;
+	}
+
+	nvme_q_submit(sc, q, ccb, nvme_ns_io_fill);
+	return 0;
+}
+
+static void
+nvme_ns_io_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot)
+{
+	struct nvme_sqe_io *sqe = slot;
+	struct nvme_ns_context *ctx = ccb->ccb_cookie;
+	bus_dmamap_t dmap = ccb->ccb_dmamap;
+
+	sqe->opcode = ISSET(ctx->nnc_flags, NVME_NS_CTX_F_READ) ?
+	    NVM_CMD_READ : NVM_CMD_WRITE;
+	htolem32(&sqe->nsid, ctx->nnc_nsid);
+
+	htolem64(&sqe->entry.prp[0], dmap->dm_segs[0].ds_addr);
+	switch (dmap->dm_nsegs) {
+	case 1:
+		break;
+	case 2:
+		htolem64(&sqe->entry.prp[1], dmap->dm_segs[1].ds_addr);
+		break;
+	default:
+		/* the prp list is already set up and synced */
+		htolem64(&sqe->entry.prp[1], ccb->ccb_prpl_dva);
+		break;
+	}
+
+	htolem64(&sqe->slba, ctx->nnc_blkno);
+	htolem16(&sqe->nlb, (ctx->nnc_datasize / ctx->nnc_secsize) - 1);
+}
+
+static void
+nvme_ns_io_done(struct nvme_queue *q, struct nvme_ccb *ccb,
+    struct nvme_cqe *cqe)
+{
+	struct nvme_softc *sc = q->q_sc;
+	struct nvme_ns_context *ctx = ccb->ccb_cookie;
+	bus_dmamap_t dmap = ccb->ccb_dmamap;
+	uint16_t flags;
+
+	if (dmap->dm_nsegs > 2) {
+		bus_dmamap_sync(sc->sc_dmat,
+		    NVME_DMA_MAP(q->q_ccb_prpls),
+		    ccb->ccb_prpl_off,
+		    sizeof(*ccb->ccb_prpl) * dmap->dm_nsegs - 1,
+		    BUS_DMASYNC_POSTWRITE);
+	}
+
+	bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize,
+	    ISSET(ctx->nnc_flags, NVME_NS_CTX_F_READ) ?
+	    BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE);
+
+	bus_dmamap_unload(sc->sc_dmat, dmap);
+	nvme_ccb_put(q, ccb);
+
+	flags = lemtoh16(&cqe->flags);
+
+	ctx->nnc_status = flags;
+	(*ctx->nnc_done)(ctx);
+}
+
+int
+nvme_ns_sync(struct nvme_softc *sc, struct nvme_ns_context *ctx)
+{
+	struct nvme_queue *q = nvme_get_q(sc);
+	struct nvme_ccb *ccb;
+
+	ccb = nvme_ccb_get(q);
+	if (ccb == NULL)
+		return EAGAIN;
+
+	ccb->ccb_done = nvme_ns_sync_done;
+	ccb->ccb_cookie = ctx;
+
+	if (ISSET(ctx->nnc_flags, NVME_NS_CTX_F_POLL)) {
+		if (nvme_poll(sc, q, ccb, nvme_ns_sync_fill) != 0)
+			return EIO;
+		return 0;
+	}
+
+	nvme_q_submit(sc, q, ccb, nvme_ns_sync_fill);
+	return 0;
+}
+
+static void
+nvme_ns_sync_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot)
+{
+	struct nvme_sqe *sqe = slot;
+	struct nvme_ns_context *ctx = ccb->ccb_cookie;
+
+	sqe->opcode = NVM_CMD_FLUSH;
+	htolem32(&sqe->nsid, ctx->nnc_nsid);
+}
+
+static void
+nvme_ns_sync_done(struct nvme_queue *q, struct nvme_ccb *ccb,
+    struct nvme_cqe *cqe)
+{
+	struct nvme_ns_context *ctx = ccb->ccb_cookie;
+	uint16_t flags;
+
+	nvme_ccb_put(q, ccb);
+
+	flags = lemtoh16(&cqe->flags);
+
+	ctx->nnc_status = flags;
+	(*ctx->nnc_done)(ctx);
+}
+
+void
+nvme_ns_free(struct nvme_softc *sc, uint16_t nsid)
+{
+	struct nvme_namespace *ns;
+	struct nvm_identify_namespace *identify;
+
+	ns = nvme_ns_get(sc, nsid);
+	KASSERT(ns);
+
+	identify = ns->ident;
+	ns->ident = NULL;
+	if (identify != NULL)
+		kmem_free(identify, sizeof(*identify));
+}
+
+static void
+nvme_q_submit(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb,
+    void (*fill)(struct nvme_queue *, struct nvme_ccb *, void *))
+{
+	struct nvme_sqe *sqe = NVME_DMA_KVA(q->q_sq_dmamem);
+	uint32_t tail;
+
+	mutex_enter(&q->q_sq_mtx);
+	tail = q->q_sq_tail;
+	if (++q->q_sq_tail >= q->q_entries)
+		q->q_sq_tail = 0;
+
+	sqe += tail;
+
+	bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem),
+	    sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_POSTWRITE);
+	memset(sqe, 0, sizeof(*sqe));
+	(*fill)(q, ccb, sqe);
+	sqe->cid = ccb->ccb_id;
+	bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem),
+	    sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_PREWRITE);
+
+	nvme_write4(sc, q->q_sqtdbl, q->q_sq_tail);
+	mutex_exit(&q->q_sq_mtx);
+}
+
+struct nvme_poll_state {
+	struct nvme_sqe s;
+	struct nvme_cqe c;
+};
+
+static int
+nvme_poll(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb,
+    void (*fill)(struct nvme_queue *, struct nvme_ccb *, void *))
+{
+	struct nvme_poll_state state;
+	void (*done)(struct nvme_queue *, struct nvme_ccb *, struct nvme_cqe *);
+	void *cookie;
+	uint16_t flags;
+
+	memset(&state, 0, sizeof(state));
+	(*fill)(q, ccb, &state.s);
+
+	done = ccb->ccb_done;
+	cookie = ccb->ccb_cookie;
+
+	ccb->ccb_done = nvme_poll_done;
+	ccb->ccb_cookie = &state;
+
+	nvme_q_submit(sc, q, ccb, nvme_poll_fill);
+	while (!ISSET(state.c.flags, htole16(NVME_CQE_PHASE))) {
+		if (nvme_q_complete(sc, q) == 0)
+			delay(10);
+
+		/* XXX no timeout? */
+	}
+
+	ccb->ccb_cookie = cookie;
+	done(q, ccb, &state.c);
+
+	flags = lemtoh16(&state.c.flags);
+
+	return flags & ~NVME_CQE_PHASE;
+}
+
+static void
+nvme_poll_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot)
+{
+	struct nvme_sqe *sqe = slot;
+	struct nvme_poll_state *state = ccb->ccb_cookie;
+
+	*sqe = state->s;
+}
+
+static void
+nvme_poll_done(struct nvme_queue *q, struct nvme_ccb *ccb,
+    struct nvme_cqe *cqe)
+{
+	struct nvme_poll_state *state = ccb->ccb_cookie;
+
+	SET(cqe->flags, htole16(NVME_CQE_PHASE));
+	state->c = *cqe;
+}
+
+static void
+nvme_sqe_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot)
+{
+	struct nvme_sqe *src = ccb->ccb_cookie;
+	struct nvme_sqe *dst = slot;
+
+	*dst = *src;
+}
+
+static void
+nvme_empty_done(struct nvme_queue *q, struct nvme_ccb *ccb,
+    struct nvme_cqe *cqe)
+{
+}
+
+static int
+nvme_q_complete(struct nvme_softc *sc, struct nvme_queue *q)
+{
+	struct nvme_ccb *ccb;
+	struct nvme_cqe *ring = NVME_DMA_KVA(q->q_cq_dmamem), *cqe;
+	uint32_t head;
+	uint16_t flags;
+	int rv = 0;
+
+	if (!mutex_tryenter(&q->q_cq_mtx))
+		return -1;
+
+	nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD);
+	head = q->q_cq_head;
+	for (;;) {
+		cqe = &ring[head];
+		flags = lemtoh16(&cqe->flags);
+		if ((flags & NVME_CQE_PHASE) != q->q_cq_phase)
+			break;
+
+		ccb = &q->q_ccbs[cqe->cid];
+		ccb->ccb_done(q, ccb, cqe);
+
+		if (++head >= q->q_entries) {
+			head = 0;
+			q->q_cq_phase ^= NVME_CQE_PHASE;
+		}
+
+		rv = 1;
+	}
+	nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD);
+
+	if (rv)
+		nvme_write4(sc, q->q_cqhdbl, q->q_cq_head = head);
+	mutex_exit(&q->q_cq_mtx);
+
+	return rv;
+}
+
+static int
+nvme_identify(struct nvme_softc *sc, u_int mps)
+{
+	char sn[41], mn[81], fr[17];
+	struct nvm_identify_controller *identify;
+	struct nvme_dmamem *mem;
+	struct nvme_ccb *ccb;
+	u_int mdts;
+	int rv = 1;
+
+	ccb = nvme_ccb_get(sc->sc_admin_q);
+	if (ccb == NULL)
+		panic("%s: nvme_ccb_get returned NULL", __func__);
+
+	mem = nvme_dmamem_alloc(sc, sizeof(*identify));
+	if (mem == NULL)
+		return 1;
+
+	ccb->ccb_done = nvme_empty_done;
+	ccb->ccb_cookie = mem;
+
+	nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD);
+	rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_fill_identify);
+	nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD);
+
+	nvme_ccb_put(sc->sc_admin_q, ccb);
+
+	if (rv != 0)
+		goto done;
+
+	identify = NVME_DMA_KVA(mem);
+
+	nvme_strvis(sn, sizeof(sn), identify->sn, sizeof(identify->sn));
+	nvme_strvis(mn, sizeof(mn), identify->mn, sizeof(identify->mn));
+	nvme_strvis(fr, sizeof(fr), identify->fr, sizeof(identify->fr));
+	aprint_normal_dev(sc->sc_dev, "%s, firmware %s, serial %s\n", mn, fr,
+	    sn);
+
+	if (identify->mdts > 0) {
+		mdts = (1 << identify->mdts) * (1 << mps);
+		if (mdts < sc->sc_mdts)
+			sc->sc_mdts = mdts;
+	}
+
+	sc->sc_nn = lemtoh32(&identify->nn);
+
+	memcpy(&sc->sc_identify, identify, sizeof(sc->sc_identify));
+
+done:
+	nvme_dmamem_free(sc, mem);
+
+	return rv;
+}
+
+static int
+nvme_q_create(struct nvme_softc *sc, struct nvme_queue *q)
+{
+	struct nvme_sqe_q sqe;
+	struct nvme_ccb *ccb;
+	int rv;
+
+	if (sc->sc_use_mq && sc->sc_intr_establish(sc, q->q_id, q))
+		return 1;
+
+	ccb = nvme_ccb_get(sc->sc_admin_q);
+	KASSERT(ccb != NULL);
+
+	ccb->ccb_done = nvme_empty_done;
+	ccb->ccb_cookie = &sqe;
+
+	memset(&sqe, 0, sizeof(sqe));
+	sqe.opcode = NVM_ADMIN_ADD_IOCQ;
+	htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_cq_dmamem));
+	htolem16(&sqe.qsize, q->q_entries - 1);
+	htolem16(&sqe.qid, q->q_id);
+	sqe.qflags = NVM_SQE_CQ_IEN | NVM_SQE_Q_PC;
+	if (sc->sc_use_mq)
+		htolem16(&sqe.cqid, q->q_id);	/* qid == vector */
+
+	rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill);
+	if (rv != 0)
+		goto fail;
+
+	ccb->ccb_done = nvme_empty_done;
+	ccb->ccb_cookie = &sqe;
+
+	memset(&sqe, 0, sizeof(sqe));
+	sqe.opcode = NVM_ADMIN_ADD_IOSQ;
+	htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_sq_dmamem));
+	htolem16(&sqe.qsize, q->q_entries - 1);
+	htolem16(&sqe.qid, q->q_id);
+	htolem16(&sqe.cqid, q->q_id);
+	sqe.qflags = NVM_SQE_Q_PC;
+
+	rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill);
+	if (rv != 0)
+		goto fail;
+
+fail:
+	nvme_ccb_put(sc->sc_admin_q, ccb);
+	return rv;
+}
+
+static int
+nvme_q_delete(struct nvme_softc *sc, struct nvme_queue *q)
+{
+	struct nvme_sqe_q sqe;
+	struct nvme_ccb *ccb;
+	int rv;
+
+	ccb = nvme_ccb_get(sc->sc_admin_q);
+	KASSERT(ccb != NULL);
+
+	ccb->ccb_done = nvme_empty_done;
+	ccb->ccb_cookie = &sqe;
+
+	memset(&sqe, 0, sizeof(sqe));
+	sqe.opcode = NVM_ADMIN_DEL_IOSQ;
+	htolem16(&sqe.qid, q->q_id);
+
+	rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill);
+	if (rv != 0)
+		goto fail;
+
+	ccb->ccb_done = nvme_empty_done;
+	ccb->ccb_cookie = &sqe;
+
+	memset(&sqe, 0, sizeof(sqe));
+	sqe.opcode = NVM_ADMIN_DEL_IOCQ;
+	htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_sq_dmamem));
+	htolem16(&sqe.qid, q->q_id);
+
+	rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill);
+	if (rv != 0)
+		goto fail;
+
+fail:
+	nvme_ccb_put(sc->sc_admin_q, ccb);
+
+	if (rv == 0 && sc->sc_use_mq) {
+		if (sc->sc_intr_disestablish(sc, q->q_id))
+			rv = 1;
+	}
+
+	return rv;
+}
+
+static void
+nvme_fill_identify(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot)
+{
+	struct nvme_sqe *sqe = slot;
+	struct nvme_dmamem *mem = ccb->ccb_cookie;
+
+	sqe->opcode = NVM_ADMIN_IDENTIFY;
+	htolem64(&sqe->entry.prp[0], NVME_DMA_DVA(mem));
+	htolem32(&sqe->cdw10, 1);
+}
+
+static int
+nvme_ccbs_alloc(struct nvme_queue *q, u_int nccbs)
+{
+	struct nvme_softc *sc = q->q_sc;
+	struct nvme_ccb *ccb;
+	bus_addr_t off;
+	uint64_t *prpl;
+	u_int i;
+
+	mutex_init(&q->q_ccb_mtx, MUTEX_DEFAULT, IPL_BIO);
+	SIMPLEQ_INIT(&q->q_ccb_list);
+
+	q->q_ccbs = kmem_alloc(sizeof(*ccb) * nccbs, KM_SLEEP);
+	if (q->q_ccbs == NULL)
+		return 1;
+
+	q->q_nccbs = nccbs;
+	q->q_ccb_prpls = nvme_dmamem_alloc(sc,
+	    sizeof(*prpl) * sc->sc_max_sgl * nccbs);
+
+	prpl = NVME_DMA_KVA(q->q_ccb_prpls);
+	off = 0;
+
+	for (i = 0; i < nccbs; i++) {
+		ccb = &q->q_ccbs[i];
+
+		if (bus_dmamap_create(sc->sc_dmat, sc->sc_mdts,
+		    sc->sc_max_sgl + 1 /* we get a free prp in the sqe */,
+		    sc->sc_mps, sc->sc_mps, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
+		    &ccb->ccb_dmamap) != 0)
+			goto free_maps;
+
+		ccb->ccb_id = i;
+		ccb->ccb_prpl = prpl;
+		ccb->ccb_prpl_off = off;
+		ccb->ccb_prpl_dva = NVME_DMA_DVA(q->q_ccb_prpls) + off;
+
+		SIMPLEQ_INSERT_TAIL(&q->q_ccb_list, ccb, ccb_entry);
+
+		prpl += sc->sc_max_sgl;
+		off += sizeof(*prpl) * sc->sc_max_sgl;
+	}
+
+	return 0;
+
+free_maps:
+	nvme_ccbs_free(q);
+	return 1;
+}
+
+static struct nvme_ccb *
+nvme_ccb_get(struct nvme_queue *q)
+{
+	struct nvme_ccb *ccb;
+
+	mutex_enter(&q->q_ccb_mtx);
+	ccb = SIMPLEQ_FIRST(&q->q_ccb_list);
+	if (ccb != NULL)
+		SIMPLEQ_REMOVE_HEAD(&q->q_ccb_list, ccb_entry);
+	mutex_exit(&q->q_ccb_mtx);
+
+	return ccb;
+}
+
+static void
+nvme_ccb_put(struct nvme_queue *q, struct nvme_ccb *ccb)
+{
+
+	mutex_enter(&q->q_ccb_mtx);
+	SIMPLEQ_INSERT_HEAD(&q->q_ccb_list, ccb, ccb_entry);
+	mutex_exit(&q->q_ccb_mtx);
+}
+
+static void
+nvme_ccbs_free(struct nvme_queue *q)
+{
+	struct nvme_softc *sc = q->q_sc;
+	struct nvme_ccb *ccb;
+
+	mutex_enter(&q->q_ccb_mtx);
+	while ((ccb = SIMPLEQ_FIRST(&q->q_ccb_list)) != NULL) {
+		SIMPLEQ_REMOVE_HEAD(&q->q_ccb_list, ccb_entry);
+		bus_dmamap_destroy(sc->sc_dmat, ccb->ccb_dmamap);
+	}
+	mutex_exit(&q->q_ccb_mtx);
+
+	nvme_dmamem_free(sc, q->q_ccb_prpls);
+	kmem_free(q->q_ccbs, sizeof(*ccb) * q->q_nccbs);
+	q->q_ccbs = NULL;
+	mutex_destroy(&q->q_ccb_mtx);
+}
+
+static struct nvme_queue *
+nvme_q_alloc(struct nvme_softc *sc, uint16_t id, u_int entries, u_int dstrd)
+{
+	struct nvme_queue *q;
+
+	q = kmem_alloc(sizeof(*q), KM_SLEEP);
+	if (q == NULL)
+		return NULL;
+
+	q->q_sc = sc;
+	q->q_sq_dmamem = nvme_dmamem_alloc(sc,
+	    sizeof(struct nvme_sqe) * entries);
+	if (q->q_sq_dmamem == NULL)
+		goto free;
+
+	q->q_cq_dmamem = nvme_dmamem_alloc(sc,
+	    sizeof(struct nvme_cqe) * entries);
+	if (q->q_cq_dmamem == NULL)
+		goto free_sq;
+
+	memset(NVME_DMA_KVA(q->q_sq_dmamem), 0, NVME_DMA_LEN(q->q_sq_dmamem));
+	memset(NVME_DMA_KVA(q->q_cq_dmamem), 0, NVME_DMA_LEN(q->q_cq_dmamem));
+
+	mutex_init(&q->q_sq_mtx, MUTEX_DEFAULT, IPL_BIO);
+	mutex_init(&q->q_cq_mtx, MUTEX_DEFAULT, IPL_BIO);
+	q->q_sqtdbl = NVME_SQTDBL(id, dstrd);
+	q->q_cqhdbl = NVME_CQHDBL(id, dstrd);
+	q->q_id = id;
+	q->q_entries = entries;
+	q->q_sq_tail = 0;
+	q->q_cq_head = 0;
+	q->q_cq_phase = NVME_CQE_PHASE;
+
+	nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_PREWRITE);
+	nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD);
+
+	if (nvme_ccbs_alloc(q, entries) != 0) {
+		aprint_error_dev(sc->sc_dev, "unable to allocate ccbs\n");
+		goto free_cq;
+	}
+
+	return q;
+
+free_cq:
+	nvme_dmamem_free(sc, q->q_cq_dmamem);
+free_sq:
+	nvme_dmamem_free(sc, q->q_sq_dmamem);
+free:
+	kmem_free(q, sizeof(*q));
+
+	return NULL;
+}
+
+static void
+nvme_q_free(struct nvme_softc *sc, struct nvme_queue *q)
+{
+	nvme_ccbs_free(q);
+	nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD);
+	nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_POSTWRITE);
+	nvme_dmamem_free(sc, q->q_cq_dmamem);
+	nvme_dmamem_free(sc, q->q_sq_dmamem);
+	kmem_free(q, sizeof(*q));
+}
+
+int
+nvme_intr(void *xsc)
+{
+	struct nvme_softc *sc = xsc;
+	int rv = 0;
+
+	nvme_write4(sc, NVME_INTMS, 1);
+
+	if (nvme_q_complete(sc, sc->sc_admin_q))
+		rv = 1;
+	if (sc->sc_q != NULL)
+		if (nvme_q_complete(sc, sc->sc_q[0]))
+			rv = 1;
+
+	nvme_write4(sc, NVME_INTMC, 1);
+
+	return rv;
+}
+
+int
+nvme_mq_msi_intr(void *xq)
+{
+	struct nvme_queue *q = xq;
+	struct nvme_softc *sc = q->q_sc;
+	int rv = 0;
+
+	nvme_write4(sc, NVME_INTMS, 1U << q->q_id);
+
+	if (nvme_q_complete(sc, q))
+		rv = 1;
+
+	nvme_write4(sc, NVME_INTMC, 1U << q->q_id);
+
+	return rv;
+}
+
+int
+nvme_mq_msix_intr(void *xq)
+{
+	struct nvme_queue *q = xq;
+	int rv = 0;
+
+	if (nvme_q_complete(q->q_sc, q))
+		rv = 1;
+
+	return rv;
+}
+
+static struct nvme_dmamem *
+nvme_dmamem_alloc(struct nvme_softc *sc, size_t size)
+{
+	struct nvme_dmamem *ndm;
+	int nsegs;
+
+	ndm = kmem_zalloc(sizeof(*ndm), KM_SLEEP);
+	if (ndm == NULL)
+		return NULL;
+
+	ndm->ndm_size = size;
+
+	if (bus_dmamap_create(sc->sc_dmat, size, 1, size, 0,
+	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &ndm->ndm_map) != 0)
+		goto ndmfree;
+
+	if (bus_dmamem_alloc(sc->sc_dmat, size, sc->sc_mps, 0, &ndm->ndm_seg,
+	    1, &nsegs, BUS_DMA_WAITOK) != 0)
+		goto destroy;
+
+	if (bus_dmamem_map(sc->sc_dmat, &ndm->ndm_seg, nsegs, size,
+	    &ndm->ndm_kva, BUS_DMA_WAITOK) != 0)
+		goto free;
+	memset(ndm->ndm_kva, 0, size);
+
+	if (bus_dmamap_load(sc->sc_dmat, ndm->ndm_map, ndm->ndm_kva, size,
+	    NULL, BUS_DMA_WAITOK) != 0)
+		goto unmap;
+
+	return ndm;
+
+unmap:
+	bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, size);
+free:
+	bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1);
+destroy:
+	bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map);
+ndmfree:
+	kmem_free(ndm, sizeof(*ndm));
+	return NULL;
+}
+
+static void
+nvme_dmamem_sync(struct nvme_softc *sc, struct nvme_dmamem *mem, int ops)
+{
+	bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(mem),
+	    0, NVME_DMA_LEN(mem), ops);
+}
+
+void
+nvme_dmamem_free(struct nvme_softc *sc, struct nvme_dmamem *ndm)
+{
+	bus_dmamap_unload(sc->sc_dmat, ndm->ndm_map);
+	bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, ndm->ndm_size);
+	bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1);
+	bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map);
+	kmem_free(ndm, sizeof(*ndm));
+}
+
+/*
+ * Copy of sys/dev/scsipi/scsipiconf.c:scsipi_strvis()
+ */
+static void
+nvme_strvis(u_char *dst, int dlen, const u_char *src, int slen)
+{
+
+#define STRVIS_ISWHITE(x) ((x) == ' ' || (x) == '\0' || (x) == (u_char)'\377')
+	/* Trim leading and trailing blanks and NULs. */
+	while (slen > 0 && STRVIS_ISWHITE(src[0]))
+		++src, --slen;
+	while (slen > 0 && STRVIS_ISWHITE(src[slen - 1]))
+		--slen;
+
+	while (slen > 0) {
+		if (*src < 0x20 || *src >= 0x80) {
+			/* non-printable characters */
+			dlen -= 4;
+			if (dlen < 1)
+				break;
+			*dst++ = '\\';
+			*dst++ = ((*src & 0300) >> 6) + '0';
+			*dst++ = ((*src & 0070) >> 3) + '0';
+			*dst++ = ((*src & 0007) >> 0) + '0';
+		} else if (*src == '\\') {
+			/* quote characters */
+			dlen -= 2;
+			if (dlen < 1)
+				break;
+			*dst++ = '\\';
+			*dst++ = '\\';
+		} else {
+			/* normal characters */
+			if (--dlen < 1)
+				break;
+			*dst++ = *src;
+		}
+		++src, --slen;
+	}
+
+	*dst++ = 0;
+}
Index: src/sys/dev/ic/nvmereg.h
diff -u /dev/null src/sys/dev/ic/nvmereg.h:1.1
--- /dev/null	Sun May  1 10:21:02 2016
+++ src/sys/dev/ic/nvmereg.h	Sun May  1 10:21:02 2016
@@ -0,0 +1,387 @@
+/*	$NetBSD: nvmereg.h,v 1.1 2016/05/01 10:21:02 nonaka Exp $	*/
+/*	$OpenBSD: nvmereg.h,v 1.10 2016/04/14 11:18:32 dlg Exp $ */
+
+/*
+ * Copyright (c) 2014 David Gwynne <[email protected]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#define NVME_CAP	0x0000	/* Controller Capabilities */
+#define  NVME_CAP_MPSMAX(_r)	(12 + (((_r) >> 52) & 0xf)) /* shift */
+#define  NVME_CAP_MPSMIN(_r)	(12 + (((_r) >> 48) & 0xf)) /* shift */
+#define  NVME_CAP_CSS(_r)	(((_r) >> 37) & 0x7f)
+#define  NVME_CAP_CSS_NVM	__BIT(0)
+#define  NVME_CAP_NSSRS(_r)	ISSET((_r), __BIT(36))
+#define  NVME_CAP_DSTRD(_r)	__BIT(2 + (((_r) >> 32) & 0xf)) /* bytes */
+#define  NVME_CAP_TO(_r)	(500 * (((_r) >> 24) & 0xff)) /* ms */
+#define  NVME_CAP_AMS(_r)	(((_r) >> 17) & 0x3)
+#define  NVME_CAP_AMS_WRR	__BIT(0)
+#define  NVME_CAP_AMS_VENDOR	__BIT(1)
+#define  NVME_CAP_CQR(_r)	ISSET((_r), __BIT(16))
+#define  NVME_CAP_MQES(_r)	(((_r) & 0xffff) + 1)
+#define NVME_CAP_LO	0x0000
+#define NVME_CAP_HI	0x0004
+#define NVME_VS		0x0008	/* Version */
+#define  NVME_VS_MJR(_r)	(((_r) >> 16) & 0xffff)
+#define  NVME_VS_MNR(_r)	((_r) & 0xffff)
+#define  NVME_VS_1_0		0x00010000
+#define  NVME_VS_1_1		0x00010100
+#define  NVME_VS_1_2		0x00010200
+#define NVME_INTMS	0x000c	/* Interrupt Mask Set */
+#define NVME_INTMC	0x0010	/* Interrupt Mask Clear */
+#define NVME_CC		0x0014	/* Controller Configuration */
+#define  NVME_CC_IOCQES(_v)	(((_v) & 0xf) << 20)
+#define  NVME_CC_IOCQES_MASK	NVME_CC_IOCQES(0xf)
+#define  NVME_CC_IOCQES_R(_v)	(((_v) >> 20) & 0xf)
+#define  NVME_CC_IOSQES(_v)	(((_v) & 0xf) << 16)
+#define  NVME_CC_IOSQES_MASK	NVME_CC_IOSQES(0xf)
+#define  NVME_CC_IOSQES_R(_v)	(((_v) >> 16) & 0xf)
+#define  NVME_CC_SHN(_v)	(((_v) & 0x3) << 14)
+#define  NVME_CC_SHN_MASK	NVME_CC_SHN(0x3)
+#define  NVME_CC_SHN_R(_v)	(((_v) >> 15) & 0x3)
+#define  NVME_CC_SHN_NONE	0
+#define  NVME_CC_SHN_NORMAL	1
+#define  NVME_CC_SHN_ABRUPT	2
+#define  NVME_CC_AMS(_v)	(((_v) & 0x7) << 11)
+#define  NVME_CC_AMS_MASK	NVME_CC_AMS(0x7)
+#define  NVME_CC_AMS_R(_v)	(((_v) >> 11) & 0xf)
+#define  NVME_CC_AMS_RR		0 /* round-robin */
+#define  NVME_CC_AMS_WRR_U	1 /* weighted round-robin w/ urgent */
+#define  NVME_CC_AMS_VENDOR	7 /* vendor */
+#define  NVME_CC_MPS(_v)	((((_v) - 12) & 0xf) << 7)
+#define  NVME_CC_MPS_MASK	(0xf << 7)
+#define  NVME_CC_MPS_R(_v)	(12 + (((_v) >> 7) & 0xf))
+#define  NVME_CC_CSS(_v)	(((_v) & 0x7) << 4)
+#define  NVME_CC_CSS_MASK	NVME_CC_CSS(0x7)
+#define  NVME_CC_CSS_R(_v)	(((_v) >> 4) & 0x7)
+#define  NVME_CC_CSS_NVM	0
+#define  NVME_CC_EN		__BIT(0)
+#define NVME_CSTS	0x001c	/* Controller Status */
+#define  NVME_CSTS_SHST_MASK	(0x3 << 2)
+#define  NVME_CSTS_SHST_NONE	(0x0 << 2) /* normal operation */
+#define  NVME_CSTS_SHST_WAIT	(0x1 << 2) /* shutdown processing occurring */
+#define  NVME_CSTS_SHST_DONE	(0x2 << 2) /* shutdown processing complete */
+#define  NVME_CSTS_CFS		(1 << 1)
+#define  NVME_CSTS_RDY		(1 << 0)
+#define NVME_NSSR	0x0020	/* NVM Subsystem Reset (Optional) */
+#define NVME_AQA	0x0024	/* Admin Queue Attributes */
+				/* Admin Completion Queue Size */
+#define  NVME_AQA_ACQS(_v)	(((_v) - 1) << 16)
+				/* Admin Submission Queue Size */
+#define  NVME_AQA_ASQS(_v)	(((_v) - 1) << 0)
+#define NVME_ASQ	0x0028	/* Admin Submission Queue Base Address */
+#define NVME_ACQ	0x0030	/* Admin Completion Queue Base Address */
+
+#define NVME_ADMIN_Q		0
+/* Submission Queue Tail Doorbell */
+#define NVME_SQTDBL(_q, _s)	(0x1000 + (2 * (_q) + 0) * (_s))
+/* Completion Queue Head Doorbell */
+#define NVME_CQHDBL(_q, _s)	(0x1000 + (2 * (_q) + 1) * (_s))
+
+struct nvme_sge {
+	uint8_t		id;
+	uint8_t		_reserved[15];
+} __packed __aligned(8);
+
+struct nvme_sge_data {
+	uint8_t		id;
+	uint8_t		_reserved[3];
+
+	uint32_t	length;
+
+	uint64_t	address;
+} __packed __aligned(8);
+
+struct nvme_sge_bit_bucket {
+	uint8_t		id;
+	uint8_t		_reserved[3];
+
+	uint32_t	length;
+
+	uint64_t	address;
+} __packed __aligned(8);
+
+struct nvme_sqe {
+	uint8_t		opcode;
+	uint8_t		flags;
+	uint16_t	cid;
+
+	uint32_t	nsid;
+
+	uint8_t		_reserved[8];
+
+	uint64_t	mptr;
+
+	union {
+		uint64_t	prp[2];
+		struct nvme_sge	sge;
+	} __packed	entry;
+
+	uint32_t	cdw10;
+	uint32_t	cdw11;
+	uint32_t	cdw12;
+	uint32_t	cdw13;
+	uint32_t	cdw14;
+	uint32_t	cdw15;
+} __packed __aligned(8);
+
+struct nvme_sqe_q {
+	uint8_t		opcode;
+	uint8_t		flags;
+	uint16_t	cid;
+
+	uint8_t		_reserved1[20];
+
+	uint64_t	prp1;
+
+	uint8_t		_reserved2[8];
+
+	uint16_t	qid;
+	uint16_t	qsize;
+
+	uint8_t		qflags;
+#define NVM_SQE_SQ_QPRIO_URG	(0x0 << 1)
+#define NVM_SQE_SQ_QPRIO_HI	(0x1 << 1)
+#define NVM_SQE_SQ_QPRIO_MED	(0x2 << 1)
+#define NVM_SQE_SQ_QPRIO_LOW	(0x3 << 1)
+#define NVM_SQE_CQ_IEN		(1 << 1)
+#define NVM_SQE_Q_PC		(1 << 0)
+	uint8_t		_reserved3;
+	uint16_t	cqid; /* XXX interrupt vector for cq */
+
+	uint8_t		_reserved4[16];
+} __packed __aligned(8);
+
+struct nvme_sqe_io {
+	uint8_t		opcode;
+	uint8_t		flags;
+	uint16_t	cid;
+
+	uint32_t	nsid;
+
+	uint8_t		_reserved[8];
+
+	uint64_t	mptr;
+
+	union {
+		uint64_t	prp[2];
+		struct nvme_sge	sge;
+	} __packed	entry;
+
+	uint64_t	slba;	/* Starting LBA */
+
+	uint16_t	nlb;	/* Number of Logical Blocks */
+	uint16_t	ioflags;
+
+	uint8_t		dsm;	/* Dataset Management */
+	uint8_t		_reserved2[3];
+
+	uint32_t	eilbrt;	/* Expected Initial Logical Block
+				   Reference Tag */
+
+	uint16_t	elbat;	/* Expected Logical Block
+				   Application Tag */
+	uint16_t	elbatm;	/* Expected Logical Block
+				   Application Tag Mask */
+} __packed __aligned(8);
+
+struct nvme_cqe {
+	uint32_t	cdw0;
+
+	uint32_t	_reserved;
+
+	uint16_t	sqhd; /* SQ Head Pointer */
+	uint16_t	sqid; /* SQ Identifier */
+
+	uint16_t	cid; /* Command Identifier */
+	uint16_t	flags;
+#define NVME_CQE_DNR		__BIT(15)
+#define NVME_CQE_M		__BIT(14)
+#define NVME_CQE_SCT(_f)	((_f) & (0x07 << 8))
+#define  NVME_CQE_SCT_GENERIC		(0x00 << 8)
+#define  NVME_CQE_SCT_COMMAND		(0x01 << 8)
+#define  NVME_CQE_SCT_MEDIAERR		(0x02 << 8)
+#define  NVME_CQE_SCT_VENDOR		(0x07 << 8)
+#define NVME_CQE_SC(_f)		((_f) & (0x7f << 1))
+#define  NVME_CQE_SC_SUCCESS		(0x00 << 1)
+#define  NVME_CQE_SC_INVALID_OPCODE	(0x01 << 1)
+#define  NVME_CQE_SC_INVALID_FIELD	(0x02 << 1)
+#define  NVME_CQE_SC_CID_CONFLICT	(0x03 << 1)
+#define  NVME_CQE_SC_DATA_XFER_ERR	(0x04 << 1)
+#define  NVME_CQE_SC_ABRT_BY_NO_PWR	(0x05 << 1)
+#define  NVME_CQE_SC_INTERNAL_DEV_ERR	(0x06 << 1)
+#define  NVME_CQE_SC_CMD_ABRT_REQD	(0x07 << 1)
+#define  NVME_CQE_SC_CMD_ABDR_SQ_DEL	(0x08 << 1)
+#define  NVME_CQE_SC_CMD_ABDR_FUSE_ERR	(0x09 << 1)
+#define  NVME_CQE_SC_CMD_ABDR_FUSE_MISS	(0x0a << 1)
+#define  NVME_CQE_SC_INVALID_NS		(0x0b << 1)
+#define  NVME_CQE_SC_CMD_SEQ_ERR	(0x0c << 1)
+#define  NVME_CQE_SC_INVALID_LAST_SGL	(0x0d << 1)
+#define  NVME_CQE_SC_INVALID_NUM_SGL	(0x0e << 1)
+#define  NVME_CQE_SC_DATA_SGL_LEN	(0x0f << 1)
+#define  NVME_CQE_SC_MDATA_SGL_LEN	(0x10 << 1)
+#define  NVME_CQE_SC_SGL_TYPE_INVALID	(0x11 << 1)
+#define  NVME_CQE_SC_LBA_RANGE		(0x80 << 1)
+#define  NVME_CQE_SC_CAP_EXCEEDED	(0x81 << 1)
+#define  NVME_CQE_NS_NOT_RDY		(0x82 << 1)
+#define  NVME_CQE_RSV_CONFLICT		(0x83 << 1)
+#define NVME_CQE_PHASE		__BIT(0)
+} __packed __aligned(8);
+
+#define NVM_ADMIN_DEL_IOSQ	0x00 /* Delete I/O Submission Queue */
+#define NVM_ADMIN_ADD_IOSQ	0x01 /* Create I/O Submission Queue */
+#define NVM_ADMIN_GET_LOG_PG	0x02 /* Get Log Page */
+#define NVM_ADMIN_DEL_IOCQ	0x04 /* Delete I/O Completion Queue */
+#define NVM_ADMIN_ADD_IOCQ	0x05 /* Create I/O Completion Queue */
+#define NVM_ADMIN_IDENTIFY	0x06 /* Identify */
+#define NVM_ADMIN_ABORT		0x08 /* Abort */
+#define NVM_ADMIN_SET_FEATURES	0x09 /* Set Features */
+#define NVM_ADMIN_GET_FEATURES	0x0a /* Get Features */
+#define NVM_ADMIN_ASYNC_EV_REQ	0x0c /* Asynchronous Event Request */
+#define NVM_ADMIN_FW_ACTIVATE	0x10 /* Firmware Activate */
+#define NVM_ADMIN_FW_DOWNLOAD	0x11 /* Firmware Image Download */
+
+#define NVM_CMD_FLUSH		0x00 /* Flush */
+#define NVM_CMD_WRITE		0x01 /* Write */
+#define NVM_CMD_READ		0x02 /* Read */
+#define NVM_CMD_WR_UNCOR	0x04 /* Write Uncorrectable */
+#define NVM_CMD_COMPARE		0x05 /* Compare */
+#define NVM_CMD_DSM		0x09 /* Dataset Management */
+
+/* Power State Descriptor Data */
+struct nvm_identify_psd {
+	uint16_t	mp;		/* Max Power */
+	uint16_t	flags;
+
+	uint32_t	enlat;		/* Entry Latency */
+
+	uint32_t	exlat;		/* Exit Latency */
+
+	uint8_t		rrt;		/* Relative Read Throughput */
+	uint8_t		rrl;		/* Relative Read Latency */
+	uint8_t		rwt;		/* Relative Write Throughput */
+	uint8_t		rwl;		/* Relative Write Latency */
+
+	uint8_t		_reserved[16];
+} __packed __aligned(8);
+
+struct nvm_identify_controller {
+	/* Controller Capabilities and Features */
+
+	uint16_t	vid;		/* PCI Vendor ID */
+	uint16_t	ssvid;		/* PCI Subsystem Vendor ID */
+
+	uint8_t		sn[20];		/* Serial Number */
+	uint8_t		mn[40];		/* Model Number */
+	uint8_t		fr[8];		/* Firmware Revision */
+
+	uint8_t		rab;		/* Recommended Arbitration Burst */
+	uint8_t		ieee[3];	/* IEEE OUI Identifier */
+
+	uint8_t		cmic;		/* Controller Multi-Path I/O and
+					   Namespace Sharing Capabilities */
+	uint8_t		mdts;		/* Maximum Data Transfer Size */
+	uint16_t	cntlid;		/* Controller ID */
+
+	uint8_t		_reserved1[176];
+
+	/* Admin Command Set Attributes & Optional Controller Capabilities */
+
+	uint16_t	oacs;		/* Optional Admin Command Support */
+	uint8_t		acl;		/* Abort Command Limit */
+	uint8_t		aerl;		/* Asynchronous Event Request Limit */
+
+	uint8_t		frmw;		/* Firmware Updates */
+	uint8_t		lpa;		/* Log Page Attributes */
+	uint8_t		elpe;		/* Error Log Page Entries */
+	uint8_t		npss;		/* Number of Power States Support */
+
+	uint8_t		avscc;		/* Admin Vendor Specific Command
+					   Configuration */
+	uint8_t		apsta;		/* Autonomous Power State Transition
+					   Attributes */
+
+	uint8_t		_reserved2[246];
+
+	/* NVM Command Set Attributes */
+
+	uint8_t		sqes;		/* Submission Queue Entry Size */
+	uint8_t		cqes;		/* Completion Queue Entry Size */
+	uint8_t		_reserved3[2];
+
+	uint32_t	nn;		/* Number of Namespaces */
+
+	uint16_t	oncs;		/* Optional NVM Command Support */
+	uint16_t	fuses;		/* Fused Operation Support */
+
+	uint8_t		fna;		/* Format NVM Attributes */
+	uint8_t		vwc;		/* Volatile Write Cache */
+	uint16_t	awun;		/* Atomic Write Unit Normal */
+
+	uint16_t	awupf;		/* Atomic Write Unit Power Fail */
+	uint8_t		nvscc;		/* NVM Vendor Specific Command */
+	uint8_t		_reserved4[1];
+
+	uint16_t	acwu;		/* Atomic Compare & Write Unit */
+	uint8_t		_reserved5[2];
+
+	uint32_t	sgls;		/* SGL Support */
+
+	uint8_t		_reserved6[164];
+
+	/* I/O Command Set Attributes */
+
+	uint8_t		_reserved7[1344];
+
+	/* Power State Descriptors */
+
+	struct nvm_identify_psd psd[32]; /* Power State Descriptors */
+
+	/* Vendor Specific */
+
+	uint8_t		_reserved8[1024];
+} __packed __aligned(8);
+
+struct nvm_namespace_format {
+	uint16_t	ms;		/* Metadata Size */
+	uint8_t		lbads;		/* LBA Data Size */
+	uint8_t		rp;		/* Relative Performance */
+} __packed __aligned(4);
+
+struct nvm_identify_namespace {
+	uint64_t	nsze;		/* Namespace Size */
+
+	uint64_t	ncap;		/* Namespace Capacity */
+
+	uint64_t	nuse;		/* Namespace Utilization */
+
+	uint8_t		nsfeat;		/* Namespace Features */
+	uint8_t		nlbaf;		/* Number of LBA Formats */
+	uint8_t		flbas;		/* Formatted LBA Size */
+#define NVME_ID_NS_FLBAS(_f)			((_f) & 0x0f)
+#define NVME_ID_NS_FLBAS_MD			0x10
+	uint8_t		mc;		/* Metadata Capabilities */
+	uint8_t		dpc;		/* End-to-end Data Protection
+					   Capabilities */
+	uint8_t		dps;		/* End-to-end Data Protection Type Settings */
+
+	uint8_t		_reserved1[98];
+
+	struct nvm_namespace_format
+			lbaf[16];	/* LBA Format Support */
+
+	uint8_t		_reserved2[192];
+
+	uint8_t		vs[3712];
+} __packed __aligned(8);
Index: src/sys/dev/ic/nvmevar.h
diff -u /dev/null src/sys/dev/ic/nvmevar.h:1.1
--- /dev/null	Sun May  1 10:21:02 2016
+++ src/sys/dev/ic/nvmevar.h	Sun May  1 10:21:02 2016
@@ -0,0 +1,180 @@
+/*	$NetBSD: nvmevar.h,v 1.1 2016/05/01 10:21:02 nonaka Exp $	*/
+/*	$OpenBSD: nvmevar.h,v 1.8 2016/04/14 11:18:32 dlg Exp $ */
+
+/*
+ * Copyright (c) 2014 David Gwynne <[email protected]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/bus.h>
+#include <sys/cpu.h>
+#include <sys/device.h>
+#include <sys/mutex.h>
+#include <sys/pool.h>
+#include <sys/queue.h>
+
+struct nvme_dmamem {
+	bus_dmamap_t		ndm_map;
+	bus_dma_segment_t	ndm_seg;
+	size_t			ndm_size;
+	void			*ndm_kva;
+};
+#define NVME_DMA_MAP(_ndm)	((_ndm)->ndm_map)
+#define NVME_DMA_LEN(_ndm)	((_ndm)->ndm_map->dm_segs[0].ds_len)
+#define NVME_DMA_DVA(_ndm)	((uint64_t)(_ndm)->ndm_map->dm_segs[0].ds_addr)
+#define NVME_DMA_KVA(_ndm)	((void *)(_ndm)->ndm_kva)
+
+struct nvme_softc;
+struct nvme_queue;
+
+struct nvme_ccb {
+	SIMPLEQ_ENTRY(nvme_ccb)	ccb_entry;
+
+	bus_dmamap_t		ccb_dmamap;
+
+	void			*ccb_cookie;
+	void			(*ccb_done)(struct nvme_queue *,
+				    struct nvme_ccb *, struct nvme_cqe *);
+
+	bus_addr_t		ccb_prpl_off;
+	uint64_t		ccb_prpl_dva;
+	uint64_t		*ccb_prpl;
+
+	uint16_t		ccb_id;
+};
+SIMPLEQ_HEAD(nvme_ccb_list, nvme_ccb);
+
+struct nvme_queue {
+	struct nvme_softc	*q_sc;
+	kmutex_t		q_sq_mtx;
+	kmutex_t		q_cq_mtx;
+	struct nvme_dmamem	*q_sq_dmamem;
+	struct nvme_dmamem	*q_cq_dmamem;
+	bus_size_t 		q_sqtdbl; /* submission queue tail doorbell */
+	bus_size_t 		q_cqhdbl; /* completion queue head doorbell */
+	uint16_t		q_id;
+	uint32_t		q_entries;
+	uint32_t		q_sq_tail;
+	uint32_t		q_cq_head;
+	uint16_t		q_cq_phase;
+
+	kmutex_t		q_ccb_mtx;
+	u_int			q_nccbs;
+	struct nvme_ccb		*q_ccbs;
+	struct nvme_ccb_list	q_ccb_list;
+	struct nvme_dmamem	*q_ccb_prpls;
+};
+
+struct nvme_namespace {
+	struct nvm_identify_namespace *ident;
+	device_t dev;
+};
+
+struct nvme_softc {
+	device_t		sc_dev;
+
+	bus_space_tag_t		sc_iot;
+	bus_space_handle_t	sc_ioh;
+	bus_size_t		sc_ios;
+	bus_dma_tag_t		sc_dmat;
+
+	int			(*sc_intr_establish)(struct nvme_softc *,
+				    uint16_t qid, struct nvme_queue *);
+	int			(*sc_intr_disestablish)(struct nvme_softc *,
+				    uint16_t qid);
+	void			**sc_ih;
+
+	u_int			sc_rdy_to;
+	size_t			sc_mps;
+	size_t			sc_mdts;
+	u_int			sc_max_sgl;
+
+	struct nvm_identify_controller
+				sc_identify;
+
+	u_int			sc_nn;
+	struct nvme_namespace	*sc_namespaces;
+
+	bool			sc_use_mq;
+	u_int			sc_nq;		/* # of io queue (sc_q) */
+	struct nvme_queue	*sc_admin_q;
+	struct nvme_queue	**sc_q;
+
+	uint32_t		sc_flags;
+#define	NVME_F_ATTACHED	__BIT(0)
+};
+
+#define	lemtoh16(p)	le16toh(*((uint16_t *)(p)))
+#define	lemtoh32(p)	le32toh(*((uint32_t *)(p)))
+#define	lemtoh64(p)	le64toh(*((uint64_t *)(p)))
+#define	htolem16(p, x)	(*((uint16_t *)(p)) = htole16(x))
+#define	htolem32(p, x)	(*((uint32_t *)(p)) = htole32(x))
+#define	htolem64(p, x)	(*((uint64_t *)(p)) = htole64(x))
+
+struct nvme_attach_args {
+	uint16_t	naa_nsid;
+	uint32_t	naa_qentries;
+};
+
+int	nvme_attach(struct nvme_softc *);
+int	nvme_detach(struct nvme_softc *, int flags);
+void	nvme_childdet(device_t, device_t);
+int	nvme_intr(void *);
+int	nvme_mq_msi_intr(void *);
+int	nvme_mq_msix_intr(void *);
+
+static inline struct nvme_queue *
+nvme_get_q(struct nvme_softc *sc)
+{
+	return sc->sc_q[cpu_index(curcpu()) % sc->sc_nq];
+}
+
+/*
+ * namespace
+ */
+static inline struct nvme_namespace *
+nvme_ns_get(struct nvme_softc *sc, uint16_t nsid)
+{
+	if (nsid == 0 || nsid - 1 >= sc->sc_nn)
+		return NULL;
+	return &sc->sc_namespaces[nsid - 1];
+}
+
+int	nvme_ns_identify(struct nvme_softc *, uint16_t);
+void	nvme_ns_free(struct nvme_softc *, uint16_t);
+
+struct nvme_ns_context {
+	void		*nnc_cookie;
+	void		(*nnc_done)(struct nvme_ns_context *);
+	uint16_t	nnc_nsid;
+
+	struct buf	*nnc_buf;
+	void		*nnc_data;
+	int		nnc_datasize;
+	int		nnc_secsize;
+	daddr_t		nnc_blkno;
+	u_int		nnc_flags;
+#define	NVME_NS_CTX_F_READ	__BIT(0)
+#define	NVME_NS_CTX_F_POLL	__BIT(1)
+
+	int		nnc_status;
+};
+
+extern pool_cache_t nvme_ns_ctx_cache;
+
+#define	nvme_ns_get_ctx(flags)	pool_cache_get(nvme_ns_ctx_cache, (flags))
+#define	nvme_ns_put_ctx(ctx)	pool_cache_put(nvme_ns_ctx_cache, (ctx))
+
+int	nvme_ns_dobio(struct nvme_softc *, struct nvme_ns_context *);
+int	nvme_ns_sync(struct nvme_softc *, struct nvme_ns_context *);

Index: src/sys/dev/pci/nvme_pci.c
diff -u /dev/null src/sys/dev/pci/nvme_pci.c:1.1
--- /dev/null	Sun May  1 10:21:02 2016
+++ src/sys/dev/pci/nvme_pci.c	Sun May  1 10:21:02 2016
@@ -0,0 +1,451 @@
+/*	$NetBSD: nvme_pci.c,v 1.1 2016/05/01 10:21:02 nonaka Exp $	*/
+/*	$OpenBSD: nvme_pci.c,v 1.3 2016/04/14 11:18:32 dlg Exp $ */
+
+/*
+ * Copyright (c) 2014 David Gwynne <[email protected]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*-
+ * Copyright (C) 2016 NONAKA Kimihiro <[email protected]>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: nvme_pci.c,v 1.1 2016/05/01 10:21:02 nonaka Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/device.h>
+#include <sys/bitops.h>
+#include <sys/bus.h>
+#include <sys/cpu.h>
+#include <sys/interrupt.h>
+#include <sys/kmem.h>
+#include <sys/pmf.h>
+
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+
+#include <dev/ic/nvmereg.h>
+#include <dev/ic/nvmevar.h>
+
+int nvme_pci_force_intx = 0;
+int nvme_pci_mpsafe = 0;
+int nvme_pci_mq = 1;		/* INTx: ioq=1, MSI/MSI-X: ioq=ncpu */
+
+#define NVME_PCI_BAR		0x10
+
+struct nvme_pci_softc {
+	struct nvme_softc	psc_nvme;
+
+	pci_chipset_tag_t	psc_pc;
+	pci_intr_handle_t	*psc_intrs;
+	int			psc_nintrs;
+};
+
+static int	nvme_pci_match(device_t, cfdata_t, void *);
+static void	nvme_pci_attach(device_t, device_t, void *);
+static int	nvme_pci_detach(device_t, int);
+
+CFATTACH_DECL3_NEW(nvme_pci, sizeof(struct nvme_pci_softc),
+    nvme_pci_match, nvme_pci_attach, nvme_pci_detach, NULL, NULL,
+    nvme_childdet, DVF_DETACH_SHUTDOWN);
+
+static int	nvme_pci_intr_establish(struct nvme_softc *,
+		    uint16_t, struct nvme_queue *);
+static int	nvme_pci_intr_disestablish(struct nvme_softc *, uint16_t);
+static int	nvme_pci_setup_intr(struct pci_attach_args *,
+		    struct nvme_pci_softc *);
+
+static int
+nvme_pci_match(device_t parent, cfdata_t match, void *aux)
+{
+	struct pci_attach_args *pa = aux;
+
+	if (PCI_CLASS(pa->pa_class) == PCI_CLASS_MASS_STORAGE &&
+	    PCI_SUBCLASS(pa->pa_class) == PCI_SUBCLASS_MASS_STORAGE_NVM &&
+	    PCI_INTERFACE(pa->pa_class) == PCI_INTERFACE_NVM_NVME)
+		return 1;
+
+	return 0;
+}
+
+static void
+nvme_pci_attach(device_t parent, device_t self, void *aux)
+{
+	struct nvme_pci_softc *psc = device_private(self);
+	struct nvme_softc *sc = &psc->psc_nvme;
+	struct pci_attach_args *pa = aux;
+	pcireg_t memtype;
+	char intr_xname[INTRDEVNAMEBUF];
+	char intrbuf[PCI_INTRSTR_LEN];
+	const char *intrstr = NULL;
+	bus_addr_t memaddr;
+	int flags, msixoff;
+	int i, nq, error;
+
+	sc->sc_dev = self;
+	psc->psc_pc = pa->pa_pc;
+	if (pci_dma64_available(pa))
+		sc->sc_dmat = pa->pa_dmat64;
+	else
+		sc->sc_dmat = pa->pa_dmat;
+
+	pci_aprint_devinfo(pa, NULL);
+
+	/* Map registers */
+	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, NVME_PCI_BAR);
+	if (PCI_MAPREG_TYPE(memtype) != PCI_MAPREG_TYPE_MEM) {
+		aprint_error_dev(self, "invalid type (type=0x%x)\n", memtype);
+		return;
+	}
+	sc->sc_iot = pa->pa_memt;
+	error = pci_mapreg_info(pa->pa_pc, pa->pa_tag, PCI_MAPREG_START,
+	    memtype, &memaddr, &sc->sc_ios, &flags);
+	if (error) {
+		aprint_error_dev(self, "can't get map info\n");
+		return;
+	}
+	if (pci_get_capability(pa->pa_pc, pa->pa_tag, PCI_CAP_MSIX, &msixoff,
+	    NULL)) {
+		pcireg_t msixtbl;
+		uint32_t table_offset;
+		int bir;
+
+		msixtbl = pci_conf_read(pa->pa_pc, pa->pa_tag,
+		    msixoff + PCI_MSIX_TBLOFFSET);
+		table_offset = msixtbl & PCI_MSIX_TBLOFFSET_MASK;
+		bir = msixtbl & PCI_MSIX_PBABIR_MASK;
+		if (bir == 0) {
+			sc->sc_ios = table_offset;
+		}
+	}
+	error = bus_space_map(sc->sc_iot, memaddr, sc->sc_ios, flags,
+	    &sc->sc_ioh);
+	if (error != 0) {
+		aprint_error_dev(self, "can't map mem space (error=%d)\n",
+		    error);
+		return;
+	}
+
+	/* Establish interrupts */
+	if (nvme_pci_setup_intr(pa, psc) != 0) {
+		aprint_error_dev(self, "unable to allocate interrupt\n");
+		goto unmap;
+	}
+	sc->sc_intr_establish = nvme_pci_intr_establish;
+	sc->sc_intr_disestablish = nvme_pci_intr_disestablish;
+
+	nq = sc->sc_nq + (sc->sc_use_mq ? 1 : 0);
+	sc->sc_ih = kmem_zalloc(sizeof(*sc->sc_ih) * nq, KM_SLEEP);
+	if (sc->sc_ih == NULL) {
+		aprint_error_dev(self, "unable to allocate ih memory\n");
+		goto intr_release;
+	}
+	i = 0;
+	if (!sc->sc_use_mq) {
+		for (; i < nq; i++) {
+			if (nvme_pci_mpsafe) {
+				pci_intr_setattr(pa->pa_pc, &psc->psc_intrs[i],
+				    PCI_INTR_MPSAFE, true);
+			}
+			snprintf(intr_xname, sizeof(intr_xname), "%s",
+			    device_xname(self));
+			sc->sc_ih[i] = pci_intr_establish_xname(pa->pa_pc,
+			    psc->psc_intrs[i], IPL_BIO, nvme_intr, sc,
+			    intr_xname);
+			if (sc->sc_ih[i] == NULL) {
+				aprint_error_dev(self,
+				    "unable to establish %s interrupt\n",
+				    intr_xname);
+				goto intr_disestablish;
+			}
+			intrstr = pci_intr_string(pa->pa_pc, psc->psc_intrs[i],
+			    intrbuf, sizeof(intrbuf));
+			aprint_normal_dev(sc->sc_dev, "interrupting at %s\n",
+			    intrstr);
+		}
+	}
+
+	if (nvme_attach(sc) != 0) {
+		/* error printed by nvme_attach() */
+		goto intr_disestablish;
+	}
+
+	if (!pmf_device_register(self, NULL, NULL))
+		aprint_error_dev(self, "couldn't establish power handler\n");
+
+	SET(sc->sc_flags, NVME_F_ATTACHED);
+	return;
+
+intr_disestablish:
+	while (--i >= 0)
+		pci_intr_disestablish(pa->pa_pc, sc->sc_ih[i]);
+	kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * sc->sc_nq);
+	sc->sc_nq = 0;
+intr_release:
+	pci_intr_release(pa->pa_pc, psc->psc_intrs, psc->psc_nintrs);
+	psc->psc_nintrs = 0;
+unmap:
+	bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios);
+	sc->sc_ios = 0;
+}
+
+static int
+nvme_pci_detach(device_t self, int flags)
+{
+	struct nvme_pci_softc *psc = device_private(self);
+	struct nvme_softc *sc = &psc->psc_nvme;
+	int i, nq, error;
+
+	if (!ISSET(sc->sc_flags, NVME_F_ATTACHED))
+		return 0;
+
+	error = nvme_detach(sc, flags);
+	if (error)
+		return error;
+
+	nq = sc->sc_nq + (sc->sc_use_mq ? 1 : 0);
+	if (!sc->sc_use_mq) {
+		for (i = 0; i < nq; i++)
+			pci_intr_disestablish(psc->psc_pc, sc->sc_ih[i]);
+	}
+	kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * nq);
+	pci_intr_release(psc->psc_pc, psc->psc_intrs, psc->psc_nintrs);
+	bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios);
+	return 0;
+}
+
+static int
+nvme_pci_intr_establish(struct nvme_softc *sc, uint16_t qid,
+    struct nvme_queue *q)
+{
+	struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc;
+	char intr_xname[INTRDEVNAMEBUF];
+	char intrbuf[PCI_INTRSTR_LEN];
+	const char *intrstr = NULL;
+	int (*ih_func)(void *);
+	void *ih_arg;
+	kcpuset_t *affinity;
+	cpuid_t affinity_to;
+	int error;
+
+	if (!sc->sc_use_mq && qid > 0)
+		return 0;
+
+	KASSERT(sc->sc_ih[qid] == NULL);
+
+	if (nvme_pci_mpsafe) {
+		pci_intr_setattr(psc->psc_pc, &psc->psc_intrs[qid],
+		    PCI_INTR_MPSAFE, true);
+	}
+	if (!sc->sc_use_mq) {
+		snprintf(intr_xname, sizeof(intr_xname), "%s",
+		    device_xname(sc->sc_dev));
+		ih_arg = sc;
+		ih_func = nvme_intr;
+	} else {
+		if (qid == 0) {
+			snprintf(intr_xname, sizeof(intr_xname), "%s adminq",
+			    device_xname(sc->sc_dev));
+		} else {
+			snprintf(intr_xname, sizeof(intr_xname), "%s ioq%d",
+			    device_xname(sc->sc_dev), qid);
+		}
+		ih_arg = q;
+		if (pci_intr_type(psc->psc_intrs[qid]) == PCI_INTR_TYPE_MSIX)
+			ih_func = nvme_mq_msix_intr;
+		else
+			ih_func = nvme_mq_msi_intr;
+	}
+	sc->sc_ih[qid] = pci_intr_establish_xname(psc->psc_pc,
+	    psc->psc_intrs[qid], IPL_BIO, ih_func, ih_arg, intr_xname);
+	if (sc->sc_ih[qid] == NULL) {
+		aprint_error_dev(sc->sc_dev,
+		    "unable to establish %s interrupt\n", intr_xname);
+		return 1;
+	}
+	intrstr = pci_intr_string(psc->psc_pc, psc->psc_intrs[qid], intrbuf,
+	    sizeof(intrbuf));
+	if (!sc->sc_use_mq) {
+		aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr);
+	} else if (qid == 0) {
+		aprint_normal_dev(sc->sc_dev,
+		    "for admin queue interrupting at %s\n", intrstr);
+	} else if (!nvme_pci_mpsafe) {
+		aprint_normal_dev(sc->sc_dev,
+		    "for io queue %d interrupting at %s\n", qid, intrstr);
+	} else {
+		kcpuset_create(&affinity, true);
+		affinity_to = (qid - 1) % ncpu;
+		kcpuset_set(affinity, affinity_to);
+		error = interrupt_distribute(sc->sc_ih[qid], affinity, NULL);
+		kcpuset_destroy(affinity);
+		aprint_normal_dev(sc->sc_dev,
+		    "for io queue %d interrupting at %s", qid, intrstr);
+		if (error == 0)
+			aprint_normal(" affinity to cpu%lu", affinity_to);
+		aprint_normal("\n");
+	}
+	return 0;
+}
+
+static int
+nvme_pci_intr_disestablish(struct nvme_softc *sc, uint16_t qid)
+{
+	struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc;
+
+	if (!sc->sc_use_mq && qid > 0)
+		return 0;
+
+	KASSERT(sc->sc_ih[qid] != NULL);
+
+	pci_intr_disestablish(psc->psc_pc, sc->sc_ih[qid]);
+	sc->sc_ih[qid] = NULL;
+
+	return 0;
+}
+
+static int
+nvme_pci_setup_intr(struct pci_attach_args *pa, struct nvme_pci_softc *psc)
+{
+	struct nvme_softc *sc = &psc->psc_nvme;
+	pci_intr_handle_t *ihps;
+	int counts[PCI_INTR_TYPE_SIZE], alloced_counts[PCI_INTR_TYPE_SIZE];
+	int max_type, intr_type;
+	int error;
+
+	if (nvme_pci_force_intx) {
+		max_type = PCI_INTR_TYPE_INTX;
+		goto force_intx;
+	}
+
+	/* MSI-X */
+	max_type = PCI_INTR_TYPE_MSIX;
+	counts[PCI_INTR_TYPE_MSIX] = min(pci_msix_count(pa->pa_pc, pa->pa_tag),
+	    ncpu + 1);
+	if (counts[PCI_INTR_TYPE_MSIX] > 0) {
+		memset(alloced_counts, 0, sizeof(alloced_counts));
+		alloced_counts[PCI_INTR_TYPE_MSIX] = counts[PCI_INTR_TYPE_MSIX];
+		if (pci_intr_alloc(pa, &ihps, alloced_counts,
+		    PCI_INTR_TYPE_MSIX)) {
+			counts[PCI_INTR_TYPE_MSIX] = 0;
+		} else {
+			counts[PCI_INTR_TYPE_MSIX] =
+			    alloced_counts[PCI_INTR_TYPE_MSIX];
+			pci_intr_release(pa->pa_pc, ihps,
+			    alloced_counts[PCI_INTR_TYPE_MSIX]);
+		}
+	}
+	if (counts[PCI_INTR_TYPE_MSIX] < 2) {
+		counts[PCI_INTR_TYPE_MSIX] = 0;
+		max_type = PCI_INTR_TYPE_MSI;
+	} else if (!nvme_pci_mq || !nvme_pci_mpsafe) {
+		counts[PCI_INTR_TYPE_MSIX] = 2;	/* adminq + 1 ioq */
+	}
+
+retry_msi:
+	/* MSI */
+	counts[PCI_INTR_TYPE_MSI] = pci_msi_count(pa->pa_pc, pa->pa_tag);
+	if (counts[PCI_INTR_TYPE_MSI] > 0) {
+		while (counts[PCI_INTR_TYPE_MSI] > ncpu + 1) {
+			if (counts[PCI_INTR_TYPE_MSI] / 2 <= ncpu + 1)
+				break;
+			counts[PCI_INTR_TYPE_MSI] /= 2;
+		}
+		memset(alloced_counts, 0, sizeof(alloced_counts));
+		alloced_counts[PCI_INTR_TYPE_MSI] = counts[PCI_INTR_TYPE_MSI];
+		if (pci_intr_alloc(pa, &ihps, alloced_counts,
+		    PCI_INTR_TYPE_MSI)) {
+			counts[PCI_INTR_TYPE_MSI] = 0;
+		} else {
+			counts[PCI_INTR_TYPE_MSI] =
+			    alloced_counts[PCI_INTR_TYPE_MSI];
+			pci_intr_release(pa->pa_pc, ihps,
+			    alloced_counts[PCI_INTR_TYPE_MSI]);
+		}
+	}
+	if (counts[PCI_INTR_TYPE_MSI] < 1) {
+		counts[PCI_INTR_TYPE_MSI] = 0;
+		if (max_type == PCI_INTR_TYPE_MSI)
+			max_type = PCI_INTR_TYPE_INTX;
+	} else if (!nvme_pci_mq || !nvme_pci_mpsafe) {
+		if (counts[PCI_INTR_TYPE_MSI] > 2)
+			counts[PCI_INTR_TYPE_MSI] = 2;	/* adminq + 1 ioq */
+	}
+
+force_intx:
+	/* INTx */
+	counts[PCI_INTR_TYPE_INTX] = 1;
+
+	memcpy(alloced_counts, counts, sizeof(counts));
+	error = pci_intr_alloc(pa, &ihps, alloced_counts, max_type);
+	if (error) {
+		if (max_type != PCI_INTR_TYPE_INTX) {
+retry:
+			memset(counts, 0, sizeof(counts));
+			if (max_type == PCI_INTR_TYPE_MSIX) {
+				max_type = PCI_INTR_TYPE_MSI;
+				goto retry_msi;
+			} else {
+				max_type = PCI_INTR_TYPE_INTX;
+				goto force_intx;
+			}
+		}
+		return error;
+	}
+
+	intr_type = pci_intr_type(ihps[0]);
+	if (alloced_counts[intr_type] < counts[intr_type]) {
+		if (intr_type != PCI_INTR_TYPE_INTX) {
+			pci_intr_release(pa->pa_pc, ihps,
+			    alloced_counts[intr_type]);
+			max_type = intr_type;
+			goto retry;
+		}
+		return EBUSY;
+	}
+
+	psc->psc_intrs = ihps;
+	psc->psc_nintrs = alloced_counts[intr_type];
+	if (intr_type == PCI_INTR_TYPE_MSI) {
+		if (alloced_counts[intr_type] > ncpu + 1)
+			alloced_counts[intr_type] = ncpu + 1;
+	}
+	sc->sc_use_mq = alloced_counts[intr_type] > 1;
+	sc->sc_nq = sc->sc_use_mq ? alloced_counts[intr_type] - 1 : 1;
+	return 0;
+}

Reply via email to