Re: [RFC PATCH: v4 1/2] add mi device in qemu
This patch addresses most of the review comments raised by Klaus. Mainly, I have ensured that the emulated mi device in qemu posts the response rather than waiting for the guest-os(mi utility) to ask for the response. For the same purpose, I have added a new device called nvme-mi-slave which acts as an i2c slave and to which the emulated mi device posts the response. The guest-os(mi utility) reads response from this slave. The nvme-mi-slave has to be used in tandem with nvme-mi device. In addition to the above change, we will be providing the mi utility on the guest as a standalone rather than as a plugin to the nvme-cli application. We will be glad to hear any suggestions, corrections in the approach we have used. Please find the patch below: diff --git a/hw/nvme/meson.build b/hw/nvme/meson.build index 3cf4004..8768ca1 100644 --- a/hw/nvme/meson.build +++ b/hw/nvme/meson.build @@ -1 +1 @@ -softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 'ns.c', 'subsys.c')) +softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 'ns.c', 'subsys.c', 'nvme-mi.c', 'nvme-mi-slave.c')) diff --git a/hw/nvme/nvme-mi-slave.c b/hw/nvme/nvme-mi-slave.c new file mode 100644 index 000..e6ada07 --- /dev/null +++ b/hw/nvme/nvme-mi-slave.c @@ -0,0 +1,93 @@ +/* + * QEMU NVMe-MI Controller + * + * Copyright (c) 2021, Samsung Electronics co Ltd. + * + * Written by Padmakar Kalghatgi + * + * This code is licensed under the GNU GPL v2 or later. + * + * This module acts as a host slave, to which the QEMU-MI module + * will post the response to. + * + * Need to use as following to enable this device + * -device nvme-mi-i2c-slave, addr= + */ + +#include "qemu/osdep.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-core.h" +#include "hw/block/block.h" +#include "nvme-mi-slave.h" + +static uint8_t nvme_mi_slave_i2c_recv(I2CSlave *s) +{ +Nvmemislave *mislave = (Nvmemislave *)s; + +if (mislave->syncflag == true) { +return -1; +} +return mislave->recvbuffer[mislave->recvlen++]; +} + +static int nvme_mi_slave_i2c_send(I2CSlave *s, uint8_t data) +{ +Nvmemislave *mislave = (Nvmemislave *)s; +mislave->syncflag = true; + +switch (mislave->pktpos) { +case NVME_MI_BYTE_LENGTH_POS: +mislave->pktlen = data; +break; +case NVME_MI_EOM_POS: +mislave->eom = (data >> 6) & 1; +break; +} +mislave->recvbuffer[mislave->sendlen++] = data; +mislave->pktpos++; +if (mislave->pktpos == mislave->pktlen + 3) { +mislave->pktlen = 0; +mislave->pktpos = 0; + +if (mislave->eom == 1) { +mislave->sendlen = 0; +mislave->recvlen = 0; +mislave->eom = 0; +mislave->syncflag = false; +} +} +return 0; +} + +static void nvme_mi_slave_realize(DeviceState *dev, Error **errp) +{ +Nvmemislave *mislave = (Nvmemislave *)dev; +mislave->sendlen = 0; +mislave->recvlen = 0; +mislave->eom = 0; +mislave->syncflag = false; +} + +static void nvme_mi_slave_class_init(ObjectClass *oc, void *data) +{ +I2CSlaveClass *k = I2C_SLAVE_CLASS(oc); +DeviceClass *dc = DEVICE_CLASS(oc); + +dc->realize = nvme_mi_slave_realize; +k->recv = nvme_mi_slave_i2c_recv; +k->send = nvme_mi_slave_i2c_send; +} + +static const TypeInfo nvme_mi_slave = { +.name = TYPE_NVME_MI_SLAVE, +.parent = TYPE_I2C_SLAVE, +.instance_size = sizeof(Nvmemislave), +.class_init = nvme_mi_slave_class_init, +}; + +static void register_types(void) +{ +type_register_static(_mi_slave); +} + +type_init(register_types); diff --git a/hw/nvme/nvme-mi-slave.h b/hw/nvme/nvme-mi-slave.h new file mode 100644 index 000..971411a --- /dev/null +++ b/hw/nvme/nvme-mi-slave.h @@ -0,0 +1,28 @@ +#ifndef NVMEMISLAVEH +#define NVMEMISLAVEH + +#include "hw/i2c/i2c.h" +#define TYPE_NVME_MI_SLAVE "nvme-mi-i2c-slave" + +#define MAX_NVME_MI_BUF_SIZE 5000 + +enum Nvmemislavepktpos +{ + NVME_MI_ADDR_POS = 0, + NVME_MI_BYTE_LENGTH_POS = 2, + NVME_MI_EOM_POS = 7 +}; + +typedef struct Nvmemislave +{ +I2CSlave parent_obj; +uint32_t sendlen; +uint32_t recvlen; +uint32_t pktpos; +uint32_t pktlen; +uint8_t eom; +bool syncflag; +u_char recvbuffer[MAX_NVME_MI_BUF_SIZE]; +} Nvmemislave; + +#endif \ No newline at end of file diff --git a/hw/nvme/nvme-mi.c b/hw/nvme/nvme-mi.c new file mode 100644 index 000..ad98bd8 --- /dev/null +++ b/hw/nvme/nvme-mi.c @@ -0,0 +1,684 @@ +/* + * QEMU NVMe-MI Controller + * + * Copyright (c) 2021, Samsung Electronics co Ltd. + * + * Written by Padmakar Kalghatgi + *Arun Kumar Agasar + *Saurav Kumar + * + * This code
Re: [RFC PATCH: v3 1/2] add mi device in qemu
On Wed, Sep 29, 2021 at 06:37:54AM +0200, Klaus Jensen wrote: On Aug 3 12:54, Padmakar Kalghatgi wrote: From: padmakar This patch contains the implementation of certain commands of nvme-mi specification.The MI commands are useful to manage/configure/monitor the device.Eventhough the MI commands can be sent via the inband NVMe-MI send/recieve commands, the idea here is to emulate the sideband interface for MI. The changes here includes the interface for i2c/smbus for nvme-mi protocol. We have used i2c address of 0x15 using which the guest VM can send and recieve the nvme-mi commands. Since the nvme-mi device uses the I2C_SLAVE as parent, we have used the send and recieve callbacks by which the nvme-mi device will get the required notification. With the callback approach, we have eliminated the use of threads. One needs to specify the following command in the launch to specify the nvme-mi device, link to nvme and assign the i2c address. <-device nvme-mi,nvme=nvme0,address=0x15> This module makes use of the NvmeCtrl structure of the nvme module, to fetch relevant information of the nvme device which are used in some of the mi commands. Eventhough certain commands might require modification to the nvme module, currently we have currently refrained from making changes to the nvme module. cmd-name cmd-type * read nvme-mi dsnvme-mi configuration set nvme-mi configuration get nvme-mi vpd read nvme-mi vpd write nvme-mi controller Health Staus Poll nvme-mi nvme subsystem health status poll nvme-mi identify nvme-admin get log page nvme-admin get features nvme-admin Signed-off-by: Padmakar Kalghatgi Reviewed-by: Klaus Birkelund Jensen Reviewed-by: Jaegyu Choi v3 -- rebased on master --- hw/nvme/meson.build | 2 +- hw/nvme/nvme-mi.c | 650 hw/nvme/nvme-mi.h | 297 3 files changed, 948 insertions(+), 1 deletion(-) create mode 100644 hw/nvme/nvme-mi.c create mode 100644 hw/nvme/nvme-mi.h Some general comments. * Please be consistent about MI vs Mi in naming. I have no preference, but NvmeMi is probably most aligned with existing style. * hw/nvme generally does not mix declarations and code. Please move variables declarations to the top of their scope. And please get rid of the hungarian notation ({b,l,u,...}VarName naming) ;) * I'd like that the header was cleaned up to not include stuff that isn't used. * I understand that you'd like to not impact the hw/nvme/ctrl.c device too much, but the Identify, Get Log Page and Get Feature "passthru" commands could really benefit from using the same code as in hw/nvme/ctrl.c - this requires a bit of refactoring such that the data can be returned explicitly instead of being directly DMA'ed to the host. * This is not compliant with the MCTP I2C/SMBus binding spec. The spec states that all transactions are based on the SMBus Block Write bus protocol. This means that responses require that the device (which is defined as an I2C slave must itself master the bus and do a Block Write to the message originator (the address of which is contained in the fourth byte of the packet). diff --git a/hw/nvme/meson.build b/hw/nvme/meson.build index 3cf4004..8dac50e 100644 --- a/hw/nvme/meson.build +++ b/hw/nvme/meson.build @@ -1 +1 @@ -softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 'ns.c', 'subsys.c')) +softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 'ns.c', 'subsys.c', 'nvme-mi.c')) diff --git a/hw/nvme/nvme-mi.c b/hw/nvme/nvme-mi.c new file mode 100644 index 000..a90ce90 --- /dev/null +++ b/hw/nvme/nvme-mi.c @@ -0,0 +1,650 @@ +/* + * QEMU NVMe-MI Controller + * + * Copyright (c) 2021, Samsung Electronics co Ltd. + * + * Written by Padmakar Kalghatgi + *Arun Kumar Agasar + *Saurav Kumar + * + * This code is licensed under the GNU GPL v2 or later. + */ + +/** + * Reference Specs: https://protect2.fireeye.com/v1/url?k=b5aa98b4-d54805e9-b5ab13fb-000babd9f1ba-0224a06b55b4fe71=1=f0445d9e-6d0d-4fac-8d43-1785c5d98f8e=http%3A%2F%2Fwww.nvmexpress.org%2F, + * + * + * Usage + * - + * The nvme-mi device has to be used along with nvme device only + * + * Add options: + *-device nvme-mi,nvme=,address=0x15", Considering that NVMe-MI can run with various MCTP bindings, I think this particular instance of it should be called 'nvme-mi-i2c'. Supporting VDM on a PCIe binding is probably not really a goal at all, but it's better to be explicit about this I think. + * + */ + +#include "qemu/osdep.h" +#include "hw/qdev-core.h" +#include "hw/block/block.
Re: [RFC PATCH: v3 1/2] add mi device in qemu
On Wed, Aug 18, 2021 at 08:01:03AM +0200, Klaus Jensen wrote: On Aug 3 12:54, Padmakar Kalghatgi wrote: From: padmakar This patch contains the implementation of certain commands of nvme-mi specification.The MI commands are useful to manage/configure/monitor the device.Eventhough the MI commands can be sent via the inband NVMe-MI send/recieve commands, the idea here is to emulate the sideband interface for MI. The changes here includes the interface for i2c/smbus for nvme-mi protocol. We have used i2c address of 0x15 using which the guest VM can send and recieve the nvme-mi commands. Since the nvme-mi device uses the I2C_SLAVE as parent, we have used the send and recieve callbacks by which the nvme-mi device will get the required notification. With the callback approach, we have eliminated the use of threads. One needs to specify the following command in the launch to specify the nvme-mi device, link to nvme and assign the i2c address. <-device nvme-mi,nvme=nvme0,address=0x15> This module makes use of the NvmeCtrl structure of the nvme module, to fetch relevant information of the nvme device which are used in some of the mi commands. Eventhough certain commands might require modification to the nvme module, currently we have currently refrained from making changes to the nvme module. cmd-name cmd-type * read nvme-mi dsnvme-mi configuration set nvme-mi configuration get nvme-mi vpd read nvme-mi vpd write nvme-mi controller Health Staus Poll nvme-mi nvme subsystem health status poll nvme-mi identify nvme-admin get log page nvme-admin get features nvme-admin Signed-off-by: Padmakar Kalghatgi Reviewed-by: Klaus Birkelund Jensen Reviewed-by: Jaegyu Choi My Reviewed-by here was added by mistake. I've not given it my formal R-b, but I'll provide a proper review on-list ASAP. But just glossing over it, I like this approach a lot better than v1 (vsock). Apologies for the mistake. Looking forward for the feedback on the i2c implementation.
[RFC PATCH: v3 1/2] add mi device in qemu
From: padmakar This patch contains the implementation of certain commands of nvme-mi specification.The MI commands are useful to manage/configure/monitor the device.Eventhough the MI commands can be sent via the inband NVMe-MI send/recieve commands, the idea here is to emulate the sideband interface for MI. The changes here includes the interface for i2c/smbus for nvme-mi protocol. We have used i2c address of 0x15 using which the guest VM can send and recieve the nvme-mi commands. Since the nvme-mi device uses the I2C_SLAVE as parent, we have used the send and recieve callbacks by which the nvme-mi device will get the required notification. With the callback approach, we have eliminated the use of threads. One needs to specify the following command in the launch to specify the nvme-mi device, link to nvme and assign the i2c address. <-device nvme-mi,nvme=nvme0,address=0x15> This module makes use of the NvmeCtrl structure of the nvme module, to fetch relevant information of the nvme device which are used in some of the mi commands. Eventhough certain commands might require modification to the nvme module, currently we have currently refrained from making changes to the nvme module. cmd-name cmd-type * read nvme-mi dsnvme-mi configuration set nvme-mi configuration get nvme-mi vpd read nvme-mi vpd write nvme-mi controller Health Staus Poll nvme-mi nvme subsystem health status poll nvme-mi identify nvme-admin get log page nvme-admin get features nvme-admin Signed-off-by: Padmakar Kalghatgi Reviewed-by: Klaus Birkelund Jensen Reviewed-by: Jaegyu Choi v3 -- rebased on master --- hw/nvme/meson.build | 2 +- hw/nvme/nvme-mi.c | 650 hw/nvme/nvme-mi.h | 297 3 files changed, 948 insertions(+), 1 deletion(-) create mode 100644 hw/nvme/nvme-mi.c create mode 100644 hw/nvme/nvme-mi.h diff --git a/hw/nvme/meson.build b/hw/nvme/meson.build index 3cf4004..8dac50e 100644 --- a/hw/nvme/meson.build +++ b/hw/nvme/meson.build @@ -1 +1 @@ -softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 'ns.c', 'subsys.c')) +softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 'ns.c', 'subsys.c', 'nvme-mi.c')) diff --git a/hw/nvme/nvme-mi.c b/hw/nvme/nvme-mi.c new file mode 100644 index 000..a90ce90 --- /dev/null +++ b/hw/nvme/nvme-mi.c @@ -0,0 +1,650 @@ +/* + * QEMU NVMe-MI Controller + * + * Copyright (c) 2021, Samsung Electronics co Ltd. + * + * Written by Padmakar Kalghatgi + *Arun Kumar Agasar + *Saurav Kumar + * + * This code is licensed under the GNU GPL v2 or later. + */ + +/** + * Reference Specs: http://www.nvmexpress.org, + * + * + * Usage + * - + * The nvme-mi device has to be used along with nvme device only + * + * Add options: + *-device nvme-mi,nvme=,address=0x15", + * + */ + +#include "qemu/osdep.h" +#include "hw/qdev-core.h" +#include "hw/block/block.h" +#include "hw/pci/msix.h" +#include "nvme.h" +#include "nvme-mi.h" +#include "qemu/crc32c.h" + +#define NVME_TEMPERATURE 0x143 +#define NVME_TEMPERATURE_WARNING 0x157 +#define NVME_TEMPERATURE_CRITICAL 0x175 + +static void nvme_mi_send_resp(NvmeMiCtrl *ctrl_mi, uint8_t *resp, uint32_t size) +{ +uint32_t crc_value = crc32c(0x, resp, size); +size += 4; +uint32_t offset = 0; +uint32_t ofst = 0; +uint32_t som = 1; +uint32_t eom = 0; +uint32_t pktseq = 0; +uint32_t mtus = ctrl_mi->mctp_unit_size; +while (size > 0) { +uint32_t sizesent = size > mtus ? mtus : size; +size -= sizesent; +eom = size > 0 ? 0 : 1; +g_autofree uint8_t *buf = (uint8_t *)g_malloc0(sizesent + 8); +buf[2] = sizesent + 5; +buf[7] = (som << 7) | (eom << 6) | (pktseq << 5); +som = 0; +memcpy(buf + 8, resp + offset, sizesent); +offset += sizesent; +if (size <= 0) { +memcpy(buf + 8 + offset , _value, sizeof(crc_value)); +} +memcpy(ctrl_mi->misendrecv.sendrecvbuf + ofst, buf, sizesent + 8); +ofst += sizesent + 8; +} +} + +static void nvme_mi_resp_hdr_init(NvmeMIResponse *resp, bool bAdminCommand) +{ +resp->msg_header.msgtype = 4; +resp->msg_header.ic = 1; +resp->msg_header.csi = 0; +resp->msg_header.reserved = 0; +resp->msg_header.nmimt = bAdminCommand ? 2 : 1; +resp->msg_header.ror = 1; +resp->msg_header.reserved1 = 0; +} +static void nvme_mi_nvm_subsys_ds(NvmeMiCtrl *ctrl_mi, NvmeMIRequest *req) +{ +Nvm
Re: [RFC PATCH 1/2] hw/nvme: add mi device
On Tue, Jul 13, 2021 at 10:37:23AM +0100, Stefan Hajnoczi wrote: On Tue, Jul 13, 2021 at 06:30:28AM +0100, Christoph Hellwig wrote: On Tue, Jul 13, 2021 at 06:30:28AM +0100, Christoph Hellwig wrote: On Mon, Jul 12, 2021 at 12:03:27PM +0100, Stefan Hajnoczi wrote: > Why did you decide to implement -device nvme-mi as a device on > TYPE_NVME_BUS? If the NVMe spec somehow requires this then I'm surprised > that there's no NVMe bus interface (callbacks). It seems like this could > just as easily be a property of an NVMe controller -device > nvme,mi=on|off or -device nvme-subsys,mi=on|off? I'm probably just not > familiar enough with MI and NVMe architecture... I'm too far away from qemu these days to understand what TYPE_NVME_BUS is. Bt NVMe-MI has tree possible transports: 1) out of band through smbus. This seems something that could be trivially modelled in qemu 2) out of band over MCTP / PCIe VDM. 3) in band using NVMe admin commands that pass through MI commands Thanks for explaining! Common NVMe-MI code can be shared by -device nvme-mi-smbus, in-band NVMe MI commands (part of -device nvme), a vsock transport, etc. This patch has nvme_mi_admin_command() as the entry point to common MI code, so not much needs to be done to achieve this. My question about why -device nvme-mi was because this "device" doesn't implement any bus interface (callbacks). The bus effectively just serves as an owner of this device. The guest does not access the device via the bus. So I'm not sure a -device is appropriate, it's an usual device. If the device is kept, please name it -device nvme-mi-vsock so it's clear this is the NVMe-MI vsock transport. I think the device could be dropped and instead an -device nvme,mi-vsock=on|off property could be added to enable the MI vsock transport on a specific NVMe controller. This raises the question of whether the port number should be configurable so multiple vsock Management Endpoints can coexist. I don't have time to explore the architectural model, but here's the link in case anyone wants to think through all the options for NVMe MI Management Endpoints and how QEMU should model them: "1.4 NVM Subsystem Architectural Model" https://protect2.fireeye.com/v1/url?k=8ee99db1-ee0b00ec-8ee816fe-000babd9f1ba-c174da71c1d11e79=1=b7b9709a-33ac-4d98-a6c0-ff53377a3278=https%3A%2F%2Fnvmexpress.org%2Fwp-content%2Fuploads%2FNVM-Express-Management-Interface-1.2-2021.06.02-Ratified.pdf Stefan Thanks Stefan for the suggestion.
Re: [RFC PATCH 1/2] hw/nvme: add mi device
On Mon, Jul 12, 2021 at 12:03:27PM +0100, Stefan Hajnoczi wrote: On Fri, Jul 09, 2021 at 07:25:45PM +0530, Padmakar Kalghatgi wrote: The enclosed patch contains the implementation of certain commands of nvme-mi specification.The MI commands are useful to manage/configure/monitor the device.Eventhough the MI commands can be sent via the inband NVMe-MI send/recieve commands, the idea here is to emulate the sideband interface for MI. Since the nvme-mi specification deals in communicating to the nvme subsystem via. a sideband interface, in this qemu implementation, virtual-vsock is used for making the sideband communication, the guest VM needs to make the connection to the specific cid of the vsock of the qemu host. One needs to specify the following command in the launch to specify the nvme-mi device, cid and to setup the vsock: -device nvme-mi,bus= -device vhost-vsock-pci, guest-cid= The following commands are tested with nvme-cli by hooking to the cid of the vsock as shown above and use the socket send/recieve commands to issue the commands and get the response. we are planning to push the changes for nvme-cli as well to test the MI functionality. Is the purpose of this feature (-device nvme-mi) testing MI with QEMU's NVMe implementation? My understanding is that instead of inventing an out-of-band interface in the form of a new paravirtualized device, you decided to use vsock to send MI commands from the guest to QEMU? As the connection can be established by the guest VM at any point, we have created a thread which is looking for a connection request. Please suggest if there is a native/better way to handle this. QEMU has an event-driven architecture and uses threads sparingly. When it uses threads it uses qemu_create_thread() instead of pthread_create(), but I suggest using qemu_set_fd_handler() or a coroutine with QIOChannel to integrate into the QEMU event loop instead. I didn't see any thread synchronization, so I'm not sure if accessing NVMe state from the MI thread is safe. Changing the code to use QEMU's event loop can solve that problem since there's no separate thread. vsock mimcs the sideband communication hence we used it. However we are working the smbus/i2c implementation for nvme-mi in qemu/nvme-cli, we will send the patch in few days. to communicate with nvme-mi over smbus/i2c, nvme-mi device needs to inherit from the i2c class which has callbacks for sending and recieving messages, this approach would get rid of the threads. This module makes use of the NvmeCtrl structure of the nvme module, to fetch relevant information of the nvme device which are used in some of the mi commands. Eventhough certain commands might require modification to the nvme module, currently we have currently refrained from making changes to the nvme module. Why did you decide to implement -device nvme-mi as a device on TYPE_NVME_BUS? If the NVMe spec somehow requires this then I'm surprised that there's no NVMe bus interface (callbacks). It seems like this could just as easily be a property of an NVMe controller -device nvme,mi=on|off or -device nvme-subsys,mi=on|off? I'm probably just not familiar enough with MI and NVMe architecture... Stefan since nvme communication happens over pcie and nvme-mi happens over smbus/i2c nvme-mi cannot be a property of nvme rather it should be a separate device which will be on the smbus/i2c.
Re: [RFC PATCH 1/2] hw/nvme: add mi device
On Fri, Jul 09, 2021 at 08:58:42AM -0700, Keith Busch wrote: On Fri, Jul 09, 2021 at 07:25:45PM +0530, Padmakar Kalghatgi wrote: The following commands are tested with nvme-cli by hooking to the cid of the vsock as shown above and use the socket send/recieve commands to issue the commands and get the response. Why sockets? Shouldn't mi targets use smbus for that? vsock mimcs the sideband communication, hence we used it. However, we are working on the smbus/i2c implementation for nvme-mi in qemu/nvme-cli, we will send the patch in few days.
[RFC PATCH 1/2] hw/nvme: add mi device
The enclosed patch contains the implementation of certain commands of nvme-mi specification.The MI commands are useful to manage/configure/monitor the device.Eventhough the MI commands can be sent via the inband NVMe-MI send/recieve commands, the idea here is to emulate the sideband interface for MI. Since the nvme-mi specification deals in communicating to the nvme subsystem via. a sideband interface, in this qemu implementation, virtual-vsock is used for making the sideband communication, the guest VM needs to make the connection to the specific cid of the vsock of the qemu host. One needs to specify the following command in the launch to specify the nvme-mi device, cid and to setup the vsock: -device nvme-mi,bus= -device vhost-vsock-pci, guest-cid= The following commands are tested with nvme-cli by hooking to the cid of the vsock as shown above and use the socket send/recieve commands to issue the commands and get the response. we are planning to push the changes for nvme-cli as well to test the MI functionality. As the connection can be established by the guest VM at any point, we have created a thread which is looking for a connection request. Please suggest if there is a native/better way to handle this. This module makes use of the NvmeCtrl structure of the nvme module, to fetch relevant information of the nvme device which are used in some of the mi commands. Eventhough certain commands might require modification to the nvme module, currently we have currently refrained from making changes to the nvme module. Since this is our first foray into implementing a new module in qemu, we will be happy to receive comments, suggestions to improve the current implementation. cmd-name cmd-type * read nvme-mi dsnvme-mi configuration set nvme-mi configuration get nvme-mi vpd read nvme-mi vpd write nvme-mi controller Health Staus Poll nvme-mi nvme subsystem health status poll nvme-mi identify nvme-admin get log page nvme-admin get features nvme-admin Signed-off-by: Padmakar Kalghatgi --- hw/nvme/meson.build | 2 +- hw/nvme/nvme-mi.c | 676 hw/nvme/nvme-mi.h | 288 ++ 3 files changed, 965 insertions(+), 1 deletion(-) create mode 100644 hw/nvme/nvme-mi.c create mode 100644 hw/nvme/nvme-mi.h diff --git a/hw/nvme/meson.build b/hw/nvme/meson.build index 3cf4004..8dac50e 100644 --- a/hw/nvme/meson.build +++ b/hw/nvme/meson.build @@ -1 +1 @@ -softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 'ns.c', 'subsys.c')) +softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 'ns.c', 'subsys.c', 'nvme-mi.c')) diff --git a/hw/nvme/nvme-mi.c b/hw/nvme/nvme-mi.c new file mode 100644 index 000..5e93417 --- /dev/null +++ b/hw/nvme/nvme-mi.c @@ -0,0 +1,676 @@ +/* + * QEMU NVMe-MI Controller + * + * Copyright (c) 2021, Samsung Electronics co Ltd. + * + * Written by Padmakar Kalghatgi + *Arun Kumar Agasar + *Saurav Kumar + * + * This code is licensed under the GNU GPL v2 or later. + */ + +/** + * Reference Specs: http://www.nvmexpress.org, + * + * + * Usage + * - + * The nvme-mi device has to be used along with nvme device only + * + * Add options: + *-device nvme-mi,bus= + *-device vhost-vsock-pci, guest-cid= + * + * the cid is used to connect to the vsock + */ + +#include "qemu/osdep.h" +#include "hw/qdev-core.h" +#include "hw/block/block.h" +#include "hw/pci/msix.h" +#include "nvme.h" +#include "nvme-mi.h" +#include "qemu/crc32c.h" + +#define NVME_TEMPERATURE 0x143 +#define NVME_TEMPERATURE_WARNING 0x157 +#define NVME_TEMPERATURE_CRITICAL 0x175 + +static void nvme_mi_send_resp(NvmeMiCtrl *ctrl_mi, uint8_t *resp, uint32_t size) +{ +uint32_t crc_value = crc32c(0x, resp, size); +size += 4; +uint32_t retries = 5; +uint32_t offset = 0; +uint32_t som = 1; +uint32_t eom = 0; +uint32_t pktseq = 0; +uint32_t mtus = ctrl_mi->mctp_unit_size; +while (size > 0) { +uint32_t sizesent = size > mtus ? mtus : size; +size -= sizesent; +eom = size > 0 ? 0 : 1; +g_autofree uint8_t *buf = (uint8_t *)g_malloc(sizesent + 8); +buf[2] = sizesent + 5; +buf[7] = (som << 7) | (eom << 6) | (pktseq << 5); +som = 0; +memcpy(buf + 8, resp + offset, sizesent); +offset += sizesent; +if (size <= 0) { +memcpy(buf + 4 + offset , _value, sizeof(crc_value)); +} +retries = 5; +while (retries > 0) { +int32_t nsen
[PATCH]: /hw/nvme/ctrl error handling if descriptors are greater than 1024
From: padmakar if the number of descriptors or pages is more than 1024, dma writes or reads will result in failure. Hence, we check if the number of descriptors or pages is more than 1024 in the nvme module and return Internal Device error. Signed-off-by: Padmakar Kalghatgi --- hw/nvme/ctrl.c | 14 ++ hw/nvme/trace-events | 1 + 2 files changed, 15 insertions(+) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 40a7efc..082592f 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -602,6 +602,20 @@ static uint16_t nvme_map_addr(NvmeCtrl *n, NvmeSg *sg, hwaddr addr, size_t len) return NVME_SUCCESS; } +/* + * The QEMU has an inherent issue where in if the no. + * of descriptors is more than 1024, it will result in + * failure during the dma write or reads. Hence, we need + * to return the error. + */ + +if (((sg->flags & NVME_SG_DMA) && ((sg->qsg.nsg + 1) > IOV_MAX)) || +((sg->iov.niov + 1) > IOV_MAX)) { +NVME_GUEST_ERR(pci_nvme_ub_sg_desc_toohigh, + "number of descriptors is greater than 1024"); +return NVME_INTERNAL_DEV_ERROR; +} + trace_pci_nvme_map_addr(addr, len); if (nvme_addr_is_cmb(n, addr)) { diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events index ea33d0c..bfe1a3b 100644 --- a/hw/nvme/trace-events +++ b/hw/nvme/trace-events @@ -202,3 +202,4 @@ pci_nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion qu pci_nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring" pci_nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring" pci_nvme_ub_unknown_css_value(void) "unknown value in cc.css field" +pci_nvme_ub_sg_desc_toohigh(void) "the number of sg descriptors is too high" -- 2.7.0.windows.1
[PATCH v2] hw/block/nvme: map prp fix if prp2 contains non-zero offset
From: padmakar nvme_map_prp needs to calculate the number of list entries based on the offset value. For the subsequent PRP2 list, need to ensure the number of entries is within the MAX number of PRP entries for a page. Signed-off-by: Padmakar Kalghatgi --- -v2: removed extraneous spacing in the comments(Keith) used bitwise operator instead of the modulo operator(Keith) hw/block/nvme.c | 12 +--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index d439e44..a4c4f9d 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -577,7 +577,12 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, NvmeSg *sg, uint64_t prp1, uint32_t nents, prp_trans; int i = 0; -nents = (len + n->page_size - 1) >> n->page_bits; +/* + * The first PRP list entry, pointed by PRP2 can contain + * offsets. Hence, we need calculate the no of entries in + * prp2 based on the offset it has. + */ +nents = (n->page_size - (prp2 & (n->page_size - 1))) >> 3; prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t); ret = nvme_addr_read(n, prp2, (void *)prp_list, prp_trans); if (ret) { @@ -588,7 +593,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, NvmeSg *sg, uint64_t prp1, while (len != 0) { uint64_t prp_ent = le64_to_cpu(prp_list[i]); -if (i == n->max_prp_ents - 1 && len > n->page_size) { +if (i == nents - 1 && len > n->page_size) { if (unlikely(prp_ent & (n->page_size - 1))) { trace_pci_nvme_err_invalid_prplist_ent(prp_ent); status = NVME_INVALID_PRP_OFFSET | NVME_DNR; @@ -597,7 +602,8 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, NvmeSg *sg, uint64_t prp1, i = 0; nents = (len + n->page_size - 1) >> n->page_bits; -prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t); +nents = MIN(nents, n->max_prp_ents); +prp_trans = nents * sizeof(uint64_t); ret = nvme_addr_read(n, prp_ent, (void *)prp_list, prp_trans); if (ret) { -- 2.7.0.windows.1
[PATCH] hw/block/nvme: map prp fix if prp2 contains non-zero offset
From: padmakar nvme_map_prp needs to calculate the number of list entries based on the offset value. For the subsequent PRP2 list, need to ensure the number of entries is within the MAX number of PRP entries for a page. Signed-off-by: Padmakar Kalghatgi --- hw/block/nvme.c | 12 +--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index d439e44..efb7368 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -577,7 +577,12 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, NvmeSg *sg, uint64_t prp1, uint32_t nents, prp_trans; int i = 0; -nents = (len + n->page_size - 1) >> n->page_bits; +/* + * The first PRP list entry, pointed by PRP2 can contain + * offsets. Hence, we need calculate the no of entries in + * prp2 based on the offset it has. + */ +nents = (n->page_size - (prp2 % n->page_size)) >> 3; prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t); ret = nvme_addr_read(n, prp2, (void *)prp_list, prp_trans); if (ret) { @@ -588,7 +593,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, NvmeSg *sg, uint64_t prp1, while (len != 0) { uint64_t prp_ent = le64_to_cpu(prp_list[i]); -if (i == n->max_prp_ents - 1 && len > n->page_size) { +if (i == nents - 1 && len > n->page_size) { if (unlikely(prp_ent & (n->page_size - 1))) { trace_pci_nvme_err_invalid_prplist_ent(prp_ent); status = NVME_INVALID_PRP_OFFSET | NVME_DNR; @@ -597,7 +602,8 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, NvmeSg *sg, uint64_t prp1, i = 0; nents = (len + n->page_size - 1) >> n->page_bits; -prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t); +nents = MIN(nents, n->max_prp_ents); +prp_trans = nents * sizeof(uint64_t); ret = nvme_addr_read(n, prp_ent, (void *)prp_list, prp_trans); if (ret) { -- 2.7.0.windows.1