[PATCH v5 3/3] hw/ufs: Support for UFS logical unit
From: Jeuk Kim This commit adds support for ufs logical unit. The LU handles processing for the SCSI command, unit descriptor query request. This commit enables the UFS device to process IO requests. Signed-off-by: Jeuk Kim --- hw/ufs/lu.c | 1439 ++ hw/ufs/meson.build |2 +- hw/ufs/trace-events | 25 + hw/ufs/ufs.c | 252 ++- hw/ufs/ufs.h | 43 ++ include/scsi/constants.h |1 + 6 files changed, 1755 insertions(+), 7 deletions(-) create mode 100644 hw/ufs/lu.c diff --git a/hw/ufs/lu.c b/hw/ufs/lu.c new file mode 100644 index 00..ab40f42190 --- /dev/null +++ b/hw/ufs/lu.c @@ -0,0 +1,1439 @@ +/* + * QEMU UFS Logical Unit + * + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All rights reserved. + * + * Written by Jeuk Kim + * + * This code is licensed under the GNU GPL v2 or later. + */ + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qapi/error.h" +#include "qemu/memalign.h" +#include "hw/scsi/scsi.h" +#include "scsi/constants.h" +#include "sysemu/block-backend.h" +#include "qemu/cutils.h" +#include "trace.h" +#include "ufs.h" + +/* + * The code below handling SCSI commands is copied from hw/scsi/scsi-disk.c, + * with minor adjustments to make it work for UFS. + */ + +#define SCSI_DMA_BUF_SIZE (128 * KiB) +#define SCSI_MAX_INQUIRY_LEN 256 +#define SCSI_INQUIRY_DATA_SIZE 36 +#define SCSI_MAX_MODE_LEN 256 + +typedef struct UfsSCSIReq { +SCSIRequest req; +/* Both sector and sector_count are in terms of BDRV_SECTOR_SIZE bytes. */ +uint64_t sector; +uint32_t sector_count; +uint32_t buflen; +bool started; +bool need_fua_emulation; +struct iovec iov; +QEMUIOVector qiov; +BlockAcctCookie acct; +} UfsSCSIReq; + +static void ufs_scsi_free_request(SCSIRequest *req) +{ +UfsSCSIReq *r = DO_UPCAST(UfsSCSIReq, req, req); + +qemu_vfree(r->iov.iov_base); +} + +static void scsi_check_condition(UfsSCSIReq *r, SCSISense sense) +{ +trace_ufs_scsi_check_condition(r->req.tag, sense.key, sense.asc, + sense.ascq); +scsi_req_build_sense(&r->req, sense); +scsi_req_complete(&r->req, CHECK_CONDITION); +} + +static int ufs_scsi_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf, + uint32_t outbuf_len) +{ +UfsHc *u = UFS(req->bus->qbus.parent); +UfsLu *lu = DO_UPCAST(UfsLu, qdev, req->dev); +uint8_t page_code = req->cmd.buf[2]; +int start, buflen = 0; + +if (outbuf_len < SCSI_INQUIRY_DATA_SIZE) { +return -1; +} + +outbuf[buflen++] = lu->qdev.type & 0x1f; +outbuf[buflen++] = page_code; +outbuf[buflen++] = 0x00; +outbuf[buflen++] = 0x00; +start = buflen; + +switch (page_code) { +case 0x00: /* Supported page codes, mandatory */ +{ +trace_ufs_scsi_emulate_vpd_page_00(req->cmd.xfer); +outbuf[buflen++] = 0x00; /* list of supported pages (this page) */ +if (u->params.serial) { +outbuf[buflen++] = 0x80; /* unit serial number */ +} +outbuf[buflen++] = 0x87; /* mode page policy */ +break; +} +case 0x80: /* Device serial number, optional */ +{ +int l; + +if (!u->params.serial) { +trace_ufs_scsi_emulate_vpd_page_80_not_supported(); +return -1; +} + +l = strlen(u->params.serial); +if (l > SCSI_INQUIRY_DATA_SIZE) { +l = SCSI_INQUIRY_DATA_SIZE; +} + +trace_ufs_scsi_emulate_vpd_page_80(req->cmd.xfer); +memcpy(outbuf + buflen, u->params.serial, l); +buflen += l; +break; +} +case 0x87: /* Mode Page Policy, mandatory */ +{ +trace_ufs_scsi_emulate_vpd_page_87(req->cmd.xfer); +outbuf[buflen++] = 0x3f; /* apply to all mode pages and subpages */ +outbuf[buflen++] = 0xff; +outbuf[buflen++] = 0; /* shared */ +outbuf[buflen++] = 0; +break; +} +default: +return -1; +} +/* done with EVPD */ +assert(buflen - start <= 255); +outbuf[start - 1] = buflen - start; +return buflen; +} + +static int ufs_scsi_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf, +uint32_t outbuf_len) +{ +int buflen = 0; + +if (outbuf_len < SCSI_INQUIRY_DATA_SIZE) { +return -1; +} + +if (req->cmd.buf[1] & 0x1) { +/* Vital product data */ +return ufs_scsi_emulate_vpd_page(req, outbuf, outbuf_len); +} + +/* Standard INQUIRY data */ +if (req->cmd.buf[2] != 0) { +return -1; +} + +/* PAGE CODE == 0 */ +buflen = req->cmd.xfer; +if (buflen > SCSI_MAX_INQUIRY_LEN) { +buflen = SCSI_MAX_INQUIRY_LEN; +} + +if (is_wlun(req->lun)) { +outbuf[0] = TYPE_WLUN; +} else { +outbuf[0] = 0; +} +outbuf[1] = 0; + +strpadcpy((char *)&outbuf[16], 16, "QEMU UFS",
[PATCH v5 2/3] hw/ufs: Support for Query Transfer Requests
From: Jeuk Kim This commit makes the UFS device support query and nop out transfer requests. The next patch would be support for UFS logical unit and scsi command transfer request. Signed-off-by: Jeuk Kim --- hw/ufs/trace-events | 1 + hw/ufs/ufs.c| 980 +++- hw/ufs/ufs.h| 46 +++ 3 files changed, 1025 insertions(+), 2 deletions(-) diff --git a/hw/ufs/trace-events b/hw/ufs/trace-events index d1badcad10..665e1a942b 100644 --- a/hw/ufs/trace-events +++ b/hw/ufs/trace-events @@ -18,6 +18,7 @@ ufs_err_dma_read_req_upiu(uint32_t slot, uint64_t addr) "failed to read req upiu ufs_err_dma_read_prdt(uint32_t slot, uint64_t addr) "failed to read prdt. UTRLDBR slot %"PRIu32", prdt addr %"PRIu64"" ufs_err_dma_write_utrd(uint32_t slot, uint64_t addr) "failed to write utrd. UTRLDBR slot %"PRIu32", UTRD dma addr %"PRIu64"" ufs_err_dma_write_rsp_upiu(uint32_t slot, uint64_t addr) "failed to write rsp upiu. UTRLDBR slot %"PRIu32", response upiu addr %"PRIu64"" +ufs_err_utrl_slot_error(uint32_t slot) "UTRLDBR slot %"PRIu32" is in error" ufs_err_utrl_slot_busy(uint32_t slot) "UTRLDBR slot %"PRIu32" is busy" ufs_err_unsupport_register_offset(uint32_t offset) "Register offset 0x%"PRIx32" is not yet supported" ufs_err_invalid_register_offset(uint32_t offset) "Register offset 0x%"PRIx32" is invalid" diff --git a/hw/ufs/ufs.c b/hw/ufs/ufs.c index 101082a8a3..cd33af2cde 100644 --- a/hw/ufs/ufs.c +++ b/hw/ufs/ufs.c @@ -15,10 +15,221 @@ #include "ufs.h" /* The QEMU-UFS device follows spec version 3.1 */ -#define UFS_SPEC_VER 0x0310 +#define UFS_SPEC_VER 0x0310 #define UFS_MAX_NUTRS 32 #define UFS_MAX_NUTMRS 8 +static MemTxResult ufs_addr_read(UfsHc *u, hwaddr addr, void *buf, int size) +{ +hwaddr hi = addr + size - 1; + +if (hi < addr) { +return MEMTX_DECODE_ERROR; +} + +if (!FIELD_EX32(u->reg.cap, CAP, 64AS) && (hi >> 32)) { +return MEMTX_DECODE_ERROR; +} + +return pci_dma_read(PCI_DEVICE(u), addr, buf, size); +} + +static MemTxResult ufs_addr_write(UfsHc *u, hwaddr addr, const void *buf, + int size) +{ +hwaddr hi = addr + size - 1; +if (hi < addr) { +return MEMTX_DECODE_ERROR; +} + +if (!FIELD_EX32(u->reg.cap, CAP, 64AS) && (hi >> 32)) { +return MEMTX_DECODE_ERROR; +} + +return pci_dma_write(PCI_DEVICE(u), addr, buf, size); +} + +static void ufs_complete_req(UfsRequest *req, UfsReqResult req_result); + +static inline hwaddr ufs_get_utrd_addr(UfsHc *u, uint32_t slot) +{ +hwaddr utrl_base_addr = (((hwaddr)u->reg.utrlbau) << 32) + u->reg.utrlba; +hwaddr utrd_addr = utrl_base_addr + slot * sizeof(UtpTransferReqDesc); + +return utrd_addr; +} + +static inline hwaddr ufs_get_req_upiu_base_addr(const UtpTransferReqDesc *utrd) +{ +uint32_t cmd_desc_base_addr_lo = +le32_to_cpu(utrd->command_desc_base_addr_lo); +uint32_t cmd_desc_base_addr_hi = +le32_to_cpu(utrd->command_desc_base_addr_hi); + +return (((hwaddr)cmd_desc_base_addr_hi) << 32) + cmd_desc_base_addr_lo; +} + +static inline hwaddr ufs_get_rsp_upiu_base_addr(const UtpTransferReqDesc *utrd) +{ +hwaddr req_upiu_base_addr = ufs_get_req_upiu_base_addr(utrd); +uint32_t rsp_upiu_byte_off = +le16_to_cpu(utrd->response_upiu_offset) * sizeof(uint32_t); +return req_upiu_base_addr + rsp_upiu_byte_off; +} + +static MemTxResult ufs_dma_read_utrd(UfsRequest *req) +{ +UfsHc *u = req->hc; +hwaddr utrd_addr = ufs_get_utrd_addr(u, req->slot); +MemTxResult ret; + +ret = ufs_addr_read(u, utrd_addr, &req->utrd, sizeof(req->utrd)); +if (ret) { +trace_ufs_err_dma_read_utrd(req->slot, utrd_addr); +} +return ret; +} + +static MemTxResult ufs_dma_read_req_upiu(UfsRequest *req) +{ +UfsHc *u = req->hc; +hwaddr req_upiu_base_addr = ufs_get_req_upiu_base_addr(&req->utrd); +UtpUpiuReq *req_upiu = &req->req_upiu; +uint32_t copy_size; +uint16_t data_segment_length; +MemTxResult ret; + +/* + * To know the size of the req_upiu, we need to read the + * data_segment_length in the header first. + */ +ret = ufs_addr_read(u, req_upiu_base_addr, &req_upiu->header, +sizeof(UtpUpiuHeader)); +if (ret) { +trace_ufs_err_dma_read_req_upiu(req->slot, req_upiu_base_addr); +return ret; +} +data_segment_length = be16_to_cpu(req_upiu->header.data_segment_length); + +copy_size = sizeof(UtpUpiuHeader) + UFS_TRANSACTION_SPECIFIC_FIELD_SIZE + +data_segment_length; + +ret = ufs_addr_read(u, req_upiu_base_addr, &req->req_upiu, copy_size); +if (ret) { +trace_ufs_err_dma_read_req_upiu(req->slot, req_upiu_base_addr); +} +return ret; +} + +static MemTxResult ufs_dma_read_prdt(UfsRequest *req) +{ +UfsHc *u = req->hc; +uint16_t prdt_len = le16_to_cpu(req->utrd.prd_table_length); +uint16_t prdt_byte_off
[PATCH v5 1/3] hw/ufs: Initial commit for emulated Universal-Flash-Storage
From: Jeuk Kim Universal Flash Storage (UFS) is a high-performance mass storage device with a serial interface. It is primarily used as a high-performance data storage device for embedded applications. This commit contains code for UFS device to be recognized as a UFS PCI device. Patches to handle UFS logical unit and Transfer Request will follow. Signed-off-by: Jeuk Kim --- MAINTAINERS |6 + docs/specs/pci-ids.rst |2 + hw/Kconfig |1 + hw/meson.build |1 + hw/ufs/Kconfig |4 + hw/ufs/meson.build |1 + hw/ufs/trace-events | 32 ++ hw/ufs/trace.h |1 + hw/ufs/ufs.c | 278 ++ hw/ufs/ufs.h | 42 ++ include/block/ufs.h | 1048 ++ include/hw/pci/pci.h |1 + include/hw/pci/pci_ids.h |1 + meson.build |1 + 14 files changed, 1419 insertions(+) create mode 100644 hw/ufs/Kconfig create mode 100644 hw/ufs/meson.build create mode 100644 hw/ufs/trace-events create mode 100644 hw/ufs/trace.h create mode 100644 hw/ufs/ufs.c create mode 100644 hw/ufs/ufs.h create mode 100644 include/block/ufs.h diff --git a/MAINTAINERS b/MAINTAINERS index 12e59b6b27..0c8a955b42 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2256,6 +2256,12 @@ F: tests/qtest/nvme-test.c F: docs/system/devices/nvme.rst T: git git://git.infradead.org/qemu-nvme.git nvme-next +ufs +M: Jeuk Kim +S: Supported +F: hw/ufs/* +F: include/block/ufs.h + megasas M: Hannes Reinecke L: qemu-block@nongnu.org diff --git a/docs/specs/pci-ids.rst b/docs/specs/pci-ids.rst index e302bea484..d6707fa069 100644 --- a/docs/specs/pci-ids.rst +++ b/docs/specs/pci-ids.rst @@ -92,6 +92,8 @@ PCI devices (other than virtio): PCI PVPanic device (``-device pvpanic-pci``) 1b36:0012 PCI ACPI ERST device (``-device acpi-erst``) +1b36:0013 + PCI UFS device (``-device ufs``) All these devices are documented in :doc:`index`. diff --git a/hw/Kconfig b/hw/Kconfig index ba62ff6417..9ca7b38c31 100644 --- a/hw/Kconfig +++ b/hw/Kconfig @@ -38,6 +38,7 @@ source smbios/Kconfig source ssi/Kconfig source timer/Kconfig source tpm/Kconfig +source ufs/Kconfig source usb/Kconfig source virtio/Kconfig source vfio/Kconfig diff --git a/hw/meson.build b/hw/meson.build index c7ac7d3d75..f01fac4617 100644 --- a/hw/meson.build +++ b/hw/meson.build @@ -37,6 +37,7 @@ subdir('smbios') subdir('ssi') subdir('timer') subdir('tpm') +subdir('ufs') subdir('usb') subdir('vfio') subdir('virtio') diff --git a/hw/ufs/Kconfig b/hw/ufs/Kconfig new file mode 100644 index 00..b7b3392e85 --- /dev/null +++ b/hw/ufs/Kconfig @@ -0,0 +1,4 @@ +config UFS_PCI +bool +default y if PCI_DEVICES +depends on PCI diff --git a/hw/ufs/meson.build b/hw/ufs/meson.build new file mode 100644 index 00..eb5164bde9 --- /dev/null +++ b/hw/ufs/meson.build @@ -0,0 +1 @@ +system_ss.add(when: 'CONFIG_UFS_PCI', if_true: files('ufs.c')) diff --git a/hw/ufs/trace-events b/hw/ufs/trace-events new file mode 100644 index 00..d1badcad10 --- /dev/null +++ b/hw/ufs/trace-events @@ -0,0 +1,32 @@ +# ufs.c +ufs_irq_raise(void) "INTx" +ufs_irq_lower(void) "INTx" +ufs_mmio_read(uint64_t addr, uint64_t data, unsigned size) "addr 0x%"PRIx64" data 0x%"PRIx64" size %d" +ufs_mmio_write(uint64_t addr, uint64_t data, unsigned size) "addr 0x%"PRIx64" data 0x%"PRIx64" size %d" +ufs_process_db(uint32_t slot) "UTRLDBR slot %"PRIu32"" +ufs_process_req(uint32_t slot) "UTRLDBR slot %"PRIu32"" +ufs_complete_req(uint32_t slot) "UTRLDBR slot %"PRIu32"" +ufs_sendback_req(uint32_t slot) "UTRLDBR slot %"PRIu32"" +ufs_exec_nop_cmd(uint32_t slot) "UTRLDBR slot %"PRIu32"" +ufs_exec_scsi_cmd(uint32_t slot, uint8_t lun, uint8_t opcode) "slot %"PRIu32", lun 0x%"PRIx8", opcode 0x%"PRIx8"" +ufs_exec_query_cmd(uint32_t slot, uint8_t opcode) "slot %"PRIu32", opcode 0x%"PRIx8"" +ufs_process_uiccmd(uint32_t uiccmd, uint32_t ucmdarg1, uint32_t ucmdarg2, uint32_t ucmdarg3) "uiccmd 0x%"PRIx32", ucmdarg1 0x%"PRIx32", ucmdarg2 0x%"PRIx32", ucmdarg3 0x%"PRIx32"" + +# error condition +ufs_err_dma_read_utrd(uint32_t slot, uint64_t addr) "failed to read utrd. UTRLDBR slot %"PRIu32", UTRD dma addr %"PRIu64"" +ufs_err_dma_read_req_upiu(uint32_t slot, uint64_t addr) "failed to read req upiu. UTRLDBR slot %"PRIu32", request upiu addr %"PRIu64"" +ufs_err_dma_read_prdt(uint32_t slot, uint64_t addr) "failed to read prdt. UTRLDBR slot %"PRIu32", prdt addr %"PRIu64"" +ufs_err_dma_write_utrd(uint32_t slot, uint64_t addr) "failed to write utrd. UTRLDBR slot %"PRIu32", UTRD dma addr %"PRIu64"" +ufs_err_dma_write_rsp_upiu(uint32_t slot, uint64_t addr) "failed to write rsp upiu. UTRLDBR slot %"PRIu32", response upiu addr %"PRIu64"" +ufs_err_utrl_slot_busy(uint32_t slot) "UTRLDBR slot %"PRIu32" is busy" +ufs_err_unsupport_register_offset(uint32_t offset) "Register offset 0x%"PRIx32" is not yet supported" +ufs_err_invalid_register_
[PATCH v5 0/3] hw/ufs: Add Universal Flash Storage (UFS) support
Since v4: Addressed review comment from Stefan Hajnoczi. The fixes are as follows. - Keep u->reg fields in host endian (Removed little-endian helper functions from MemoryRegionOps) - Remove unnecessary NULL checks for g_new and g_malloc0 - Replace DEFINE_PROP_DRIVE_IOTHREAD -> DEFINE_PROP_DRIVE In case you were wondering, ufs and ufs-lu have been tested for the following behaviours. 1. Checked ufs device recognition in Windows10 environment 2. Verified ufs device recognition in Ubuntu 22.04 environment 3. Verified io behaviour via fio in Ubuntu 22.04 environment 4. Verified query request via ufs-tools in Ubuntu 22.04 environment Since v3: - Replace softmmu_ss -> system_ss in meson Since v2: Addressed review comment from Stefan Hajnoczi. The main fixes are as follows. - Use of SPDX licence identifiers - fixed endianness error - removed memory leak - fixed DMA error handling logic Since v1: - use macros of "hw/registerfields.h" (Addressed Philippe's review comments) This patch series adds support for a new PCI-based UFS device. The UFS pci device id (PCI_DEVICE_ID_REDHAT_UFS) is not registered in the Linux kernel yet, so it does not work right away, but I confirmed that it works with Linux when the UFS pci device id is registered. I have also verified that it works with Windows 10. Jeuk Kim (3): hw/ufs: Initial commit for emulated Universal-Flash-Storage hw/ufs: Support for Query Transfer Requests hw/ufs: Support for UFS logical unit MAINTAINERS |6 + docs/specs/pci-ids.rst |2 + hw/Kconfig |1 + hw/meson.build |1 + hw/ufs/Kconfig |4 + hw/ufs/lu.c | 1439 hw/ufs/meson.build |1 + hw/ufs/trace-events | 58 ++ hw/ufs/trace.h |1 + hw/ufs/ufs.c | 1494 ++ hw/ufs/ufs.h | 131 include/block/ufs.h | 1048 ++ include/hw/pci/pci.h |1 + include/hw/pci/pci_ids.h |1 + include/scsi/constants.h |1 + meson.build |1 + 16 files changed, 4190 insertions(+) create mode 100644 hw/ufs/Kconfig create mode 100644 hw/ufs/lu.c create mode 100644 hw/ufs/meson.build create mode 100644 hw/ufs/trace-events create mode 100644 hw/ufs/trace.h create mode 100644 hw/ufs/ufs.c create mode 100644 hw/ufs/ufs.h create mode 100644 include/block/ufs.h -- 2.34.1
Re: PING: [PATCH v4 0/3] hw/ufs: Add Universal Flash Storage (UFS) support
On 2023-07-19 오전 3:56, Stefan Hajnoczi wrote: On Tue, Jul 11, 2023 at 07:31:02PM +0900, Jeuk Kim wrote: Hi, Any more reviews...? Dear Stefan If you don't mind, Could you give it "reviewed-by"? And is there anything else I should do...? Sorry for the late reply. I was on vacation and am working my way through pending code review and bug reports. I have started reviewing this series and should be finish on Wednesday or Thursday. Stefan Thank you for your review. I have seen your comments on PATCH v4. I'll send you a patch v5 as soon as I fix the things you commented on. Thanks again. Jeuk
Re: [Libguestfs] [libnbd PATCH v3 04/22] states: Prepare to send 64-bit requests
On Tue, May 30, 2023 at 01:18:22PM -0500, Eric Blake wrote: > > > + /* It is more convenient to manage PAYLOAD_LEN by what was negotiated > > > + * than to require the user to have to set it correctly. > > > + * TODO: Add new h->strict bit to allow intentional protocol violation > > > + * for interoperability testing. > > > + */ > > > + if (h->extended_headers) > > > +flags |= LIBNBD_CMD_FLAG_PAYLOAD_LEN; > > > + else > > > +flags &= ~LIBNBD_CMD_FLAG_PAYLOAD_LEN; > > > > Nice -- I wanted to ask for: > > > > flags &= ~(uint32_t)LIBNBD_CMD_FLAG_PAYLOAD_LEN; > > > > due to LIBNBD_CMD_FLAG_PAYLOAD_LEN having type "int". > > > > However: in patch#3, what has type "int" is: > > > > +#define NBD_CMD_FLAG_PAYLOAD_LEN (1<<5) > > > > and here we have LIBNBD_CMD_FLAG_PAYLOAD_LEN instead -- and the latter > > has type unsigned int already, from your recent commit 69eecae2c03a > > ("api: Generate flag values as unsigned", 2022-11-11). > > Still, worth a (separate) cleanup patch to nbd-protocol.h to prefer > unsigned constants for the flag values where they are not generated. I pushed a preliminary commit 65011cf6 for libnbd along those lines, and will copy the same changes to nbd-protocol.h over to nbdkit shortly. v4 of this series will be rebased on that. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org
Re: [PATCH v4 3/3] hw/ufs: Support for UFS logical unit
On Tue, Jul 04, 2023 at 05:33:59PM +0900, Jeuk Kim wrote: > +static Property ufs_lu_props[] = { > +DEFINE_PROP_DRIVE_IOTHREAD("drive", UfsLu, qdev.conf.blk), This device is not aware of IOThreads, so I think DEFINE_PROP_DRIVE() should be used instead. signature.asc Description: PGP signature
Re: [PATCH v4 2/3] hw/ufs: Support for Query Transfer Requests
On Tue, Jul 04, 2023 at 05:33:58PM +0900, Jeuk Kim wrote: > +static MemTxResult ufs_dma_read_prdt(UfsRequest *req) > +{ > +UfsHc *u = req->hc; > +uint16_t prdt_len = le16_to_cpu(req->utrd.prd_table_length); > +uint16_t prdt_byte_off = > +le16_to_cpu(req->utrd.prd_table_offset) * sizeof(uint32_t); > +uint32_t prdt_size = prdt_len * sizeof(UfshcdSgEntry); > +g_autofree UfshcdSgEntry *prd_entries = NULL; > +hwaddr req_upiu_base_addr, prdt_base_addr; > +int err; > + > +assert(!req->sg); > + > +if (prdt_len == 0) { > +return MEMTX_OK; > +} > + > +prd_entries = g_new(UfshcdSgEntry, prdt_size); > +if (!prd_entries) { g_new() never returns NULL. The process aborts if there is not enough memory available. Use g_try_new() if you want to handle memory allocation failure. > +trace_ufs_err_memory_allocation(); > +return MEMTX_ERROR; > +} > + > +req_upiu_base_addr = ufs_get_req_upiu_base_addr(&req->utrd); > +prdt_base_addr = req_upiu_base_addr + prdt_byte_off; > + > +err = ufs_addr_read(u, prdt_base_addr, prd_entries, prdt_size); > +if (err) { > +trace_ufs_err_dma_read_prdt(req->slot, prdt_base_addr); > +return err; > +} > + > +req->sg = g_malloc0(sizeof(QEMUSGList)); > +if (!req->sg) { g_malloc0() never returns NULL. The process aborts if there is not enough memory available. signature.asc Description: PGP signature
Re: [PATCH v4 1/3] hw/ufs: Initial commit for emulated Universal-Flash-Storage
On Tue, Jul 04, 2023 at 05:33:57PM +0900, Jeuk Kim wrote: > From: Jeuk Kim > > Universal Flash Storage (UFS) is a high-performance mass storage device > with a serial interface. It is primarily used as a high-performance > data storage device for embedded applications. > > This commit contains code for UFS device to be recognized > as a UFS PCI device. > Patches to handle UFS logical unit and Transfer Request will follow. > > Signed-off-by: Jeuk Kim > --- > MAINTAINERS |6 + > docs/specs/pci-ids.rst |2 + > hw/Kconfig |1 + > hw/meson.build |1 + > hw/ufs/Kconfig |4 + > hw/ufs/meson.build |1 + > hw/ufs/trace-events | 33 ++ > hw/ufs/trace.h |1 + > hw/ufs/ufs.c | 304 +++ > hw/ufs/ufs.h | 42 ++ > include/block/ufs.h | 1048 ++ > include/hw/pci/pci.h |1 + > include/hw/pci/pci_ids.h |1 + > meson.build |1 + > 14 files changed, 1446 insertions(+) > create mode 100644 hw/ufs/Kconfig > create mode 100644 hw/ufs/meson.build > create mode 100644 hw/ufs/trace-events > create mode 100644 hw/ufs/trace.h > create mode 100644 hw/ufs/ufs.c > create mode 100644 hw/ufs/ufs.h > create mode 100644 include/block/ufs.h > > diff --git a/MAINTAINERS b/MAINTAINERS > index 4feea49a6e..756aae8623 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -2237,6 +2237,12 @@ F: tests/qtest/nvme-test.c > F: docs/system/devices/nvme.rst > T: git git://git.infradead.org/qemu-nvme.git nvme-next > > +ufs > +M: Jeuk Kim > +S: Supported > +F: hw/ufs/* > +F: include/block/ufs.h > + > megasas > M: Hannes Reinecke > L: qemu-block@nongnu.org > diff --git a/docs/specs/pci-ids.rst b/docs/specs/pci-ids.rst > index e302bea484..d6707fa069 100644 > --- a/docs/specs/pci-ids.rst > +++ b/docs/specs/pci-ids.rst > @@ -92,6 +92,8 @@ PCI devices (other than virtio): >PCI PVPanic device (``-device pvpanic-pci``) > 1b36:0012 >PCI ACPI ERST device (``-device acpi-erst``) > +1b36:0013 > + PCI UFS device (``-device ufs``) > > All these devices are documented in :doc:`index`. > > diff --git a/hw/Kconfig b/hw/Kconfig > index ba62ff6417..9ca7b38c31 100644 > --- a/hw/Kconfig > +++ b/hw/Kconfig > @@ -38,6 +38,7 @@ source smbios/Kconfig > source ssi/Kconfig > source timer/Kconfig > source tpm/Kconfig > +source ufs/Kconfig > source usb/Kconfig > source virtio/Kconfig > source vfio/Kconfig > diff --git a/hw/meson.build b/hw/meson.build > index c7ac7d3d75..f01fac4617 100644 > --- a/hw/meson.build > +++ b/hw/meson.build > @@ -37,6 +37,7 @@ subdir('smbios') > subdir('ssi') > subdir('timer') > subdir('tpm') > +subdir('ufs') > subdir('usb') > subdir('vfio') > subdir('virtio') > diff --git a/hw/ufs/Kconfig b/hw/ufs/Kconfig > new file mode 100644 > index 00..b7b3392e85 > --- /dev/null > +++ b/hw/ufs/Kconfig > @@ -0,0 +1,4 @@ > +config UFS_PCI > +bool > +default y if PCI_DEVICES > +depends on PCI > diff --git a/hw/ufs/meson.build b/hw/ufs/meson.build > new file mode 100644 > index 00..eb5164bde9 > --- /dev/null > +++ b/hw/ufs/meson.build > @@ -0,0 +1 @@ > +system_ss.add(when: 'CONFIG_UFS_PCI', if_true: files('ufs.c')) > diff --git a/hw/ufs/trace-events b/hw/ufs/trace-events > new file mode 100644 > index 00..17793929b1 > --- /dev/null > +++ b/hw/ufs/trace-events > @@ -0,0 +1,33 @@ > +# ufs.c > +ufs_irq_raise(void) "INTx" > +ufs_irq_lower(void) "INTx" > +ufs_mmio_read(uint64_t addr, uint64_t data, unsigned size) "addr 0x%"PRIx64" > data 0x%"PRIx64" size %d" > +ufs_mmio_write(uint64_t addr, uint64_t data, unsigned size) "addr > 0x%"PRIx64" data 0x%"PRIx64" size %d" > +ufs_process_db(uint32_t slot) "UTRLDBR slot %"PRIu32"" > +ufs_process_req(uint32_t slot) "UTRLDBR slot %"PRIu32"" > +ufs_complete_req(uint32_t slot) "UTRLDBR slot %"PRIu32"" > +ufs_sendback_req(uint32_t slot) "UTRLDBR slot %"PRIu32"" > +ufs_exec_nop_cmd(uint32_t slot) "UTRLDBR slot %"PRIu32"" > +ufs_exec_scsi_cmd(uint32_t slot, uint8_t lun, uint8_t opcode) "slot > %"PRIu32", lun 0x%"PRIx8", opcode 0x%"PRIx8"" > +ufs_exec_query_cmd(uint32_t slot, uint8_t opcode) "slot %"PRIu32", opcode > 0x%"PRIx8"" > +ufs_process_uiccmd(uint32_t uiccmd, uint32_t ucmdarg1, uint32_t ucmdarg2, > uint32_t ucmdarg3) "uiccmd 0x%"PRIx32", ucmdarg1 0x%"PRIx32", ucmdarg2 > 0x%"PRIx32", ucmdarg3 0x%"PRIx32"" > + > +# error condition > +ufs_err_memory_allocation(void) "failed to allocate memory" > +ufs_err_dma_read_utrd(uint32_t slot, uint64_t addr) "failed to read utrd. > UTRLDBR slot %"PRIu32", UTRD dma addr %"PRIu64"" > +ufs_err_dma_read_req_upiu(uint32_t slot, uint64_t addr) "failed to read req > upiu. UTRLDBR slot %"PRIu32", request upiu addr %"PRIu64"" > +ufs_err_dma_read_prdt(uint32_t slot, uint64_t addr) "failed to read prdt. > UTRLDBR slot %"PRIu32", prdt addr %"PRIu64"" > +ufs_err_dma_write_utrd(uint32_t slot, uint64_t addr) "failed to wr
Re: PING: [PATCH v4 0/3] hw/ufs: Add Universal Flash Storage (UFS) support
On Tue, Jul 11, 2023 at 07:31:02PM +0900, Jeuk Kim wrote: > Hi, > Any more reviews...? > > Dear Stefan > If you don't mind, Could you give it "reviewed-by"? > And is there anything else I should do...? Sorry for the late reply. I was on vacation and am working my way through pending code review and bug reports. I have started reviewing this series and should be finish on Wednesday or Thursday. Stefan signature.asc Description: PGP signature
Re: [PATCH 6/5] qemu-nbd: make verbose bool and local variable in main()
On Mon, Jul 17, 2023 at 10:25:20PM +0200, Denis V. Lunev wrote: > Pass 'verbose' to nbd_client_thread() inside NbdClientOpts which looks > a little bit cleaner and make it bool as it is used as bool actually. > > Signed-off-by: Denis V. Lunev > CC: Eric Blake > CC: Vladimir Sementsov-Ogievskiy > --- > qemu-nbd.c | 8 +--- > 1 file changed, 5 insertions(+), 3 deletions(-) Reviewed-by: Eric Blake I'll queue the series through my NBD tree. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org
Re: [PATCH 5/5] qemu-nbd: handle dup2() error when qemu-nbd finished setup process
On Mon, Jul 17, 2023 at 04:55:44PM +0200, Denis V. Lunev wrote: > Fail on error, we are in trouble. > > Signed-off-by: Denis V. Lunev > CC: Eric Blake > CC: Vladimir Sementsov-Ogievskiy > --- > qemu-nbd.c | 14 -- > 1 file changed, 12 insertions(+), 2 deletions(-) > > diff --git a/qemu-nbd.c b/qemu-nbd.c > index f27613cb57..cd0e965705 100644 > --- a/qemu-nbd.c > +++ b/qemu-nbd.c > @@ -323,7 +323,12 @@ static void *nbd_client_thread(void *arg) > opts->device, srcpath); > } else { > /* Close stderr so that the qemu-nbd process exits. */ > -dup2(STDOUT_FILENO, STDERR_FILENO); > +int err = dup2(STDOUT_FILENO, STDERR_FILENO); > +if (err < 0) { Shorter to drop the temporary variable, and just do: if (dup2(...) < 0) { > +error_report("Could not set stderr to /dev/null: %s", > + strerror(errno)); > +exit(EXIT_FAILURE); > +} > } Either way, Reviewed-by: Eric Blake -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org
Re: [PATCH 4/5] qemu-nbd: properly report error if qemu_daemon() is failed
On Mon, Jul 17, 2023 at 04:55:43PM +0200, Denis V. Lunev wrote: > errno has been overwritten by dup2() just below qemu_daemon() and thus > improperly returned to the caller. Fix accordingly. > > Signed-off-by: Denis V. Lunev > CC: Eric Blake > CC: Vladimir Sementsov-Ogievskiy > --- > qemu-nbd.c | 5 - > 1 file changed, 4 insertions(+), 1 deletion(-) Reviewed-by: Eric Blake > > diff --git a/qemu-nbd.c b/qemu-nbd.c > index 4450cc826b..f27613cb57 100644 > --- a/qemu-nbd.c > +++ b/qemu-nbd.c > @@ -932,9 +932,12 @@ int main(int argc, char **argv) > error_report("Failed to fork: %s", strerror(errno)); > exit(EXIT_FAILURE); > } else if (pid == 0) { > +int saved_errno; > + > close(stderr_fd[0]); > > ret = qemu_daemon(1, 0); > +saved_errno = errno;/* dup2 will overwrite error below */ I would have written 'may', not 'will'; but that's a triviality not worth changing. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org
Re: [PATCH 3/5] qemu-nbd: properly report error on error in dup2() after qemu_daemon()
On Mon, Jul 17, 2023 at 04:55:42PM +0200, Denis V. Lunev wrote: > We are trying to temporary redirect stderr of daemonized process to temporarily > a pipe to report a error and get failed. In that case we could not > use error_report() helper, but should write the message directly into > the problematic pipe. > > Signed-off-by: Denis V. Lunev > CC: Eric Blake > CC: Vladimir Sementsov-Ogievskiy > --- > qemu-nbd.c | 15 ++- > 1 file changed, 14 insertions(+), 1 deletion(-) > > diff --git a/qemu-nbd.c b/qemu-nbd.c > index 186ce9474c..4450cc826b 100644 > --- a/qemu-nbd.c > +++ b/qemu-nbd.c > @@ -937,7 +937,20 @@ int main(int argc, char **argv) > ret = qemu_daemon(1, 0); > errno from qemu_daemon... > /* Temporarily redirect stderr to the parent's pipe... */ > -dup2(stderr_fd[1], STDERR_FILENO); > +if (dup2(stderr_fd[1], STDERR_FILENO) < 0) { ...is still potentially lost here... > +char str[256]; > +snprintf(str, sizeof(str), > + "%s: Failed to link stderr to the pipe: %s\n", > + g_get_prgname(), strerror(errno)); > +/* > + * We are unable to use error_report() here as we need to get > + * stderr pointed to the parent's pipe. Write to that pipe > + * manually. > + */ > +ret = write(stderr_fd[1], str, strlen(str)); > +exit(EXIT_FAILURE); > +} > + > if (ret < 0) { > error_report("Failed to daemonize: %s", strerror(errno)); ...before use here. Patch 4/5 addresses that, but I'm inclined to just swap the order of the two patches when applying the series in my NBD tree. Reviewed-by: Eric Blake -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org
Re: [PATCH] hw/nvme: fix endianness issue for shadow doorbells
On 18/7/23 13:34, Klaus Jensen wrote: On Jul 18 13:18, Philippe Mathieu-Daudé wrote: On 18/7/23 12:35, Klaus Jensen wrote: From: Klaus Jensen In commit 2fda0726e514 ("hw/nvme: fix missing endian conversions for doorbell buffers"), we fixed shadow doorbells for big-endian guests running on little endian hosts. But I did not fix little-endian guests on big-endian hosts. Fix this. Solves issue #1765. Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support") Cc: qemu-sta...@nongnu.org Reported-by: Thomas Huth Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 18 +- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 8e8e870b9a80..dadc2dc7da10 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -6801,6 +6801,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) PCIDevice *pci = PCI_DEVICE(n); uint64_t dbs_addr = le64_to_cpu(req->cmd.dptr.prp1); uint64_t eis_addr = le64_to_cpu(req->cmd.dptr.prp2); +uint32_t v; int i; /* Address should be page aligned */ @@ -6818,6 +6819,8 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) NvmeCQueue *cq = n->cq[i]; if (sq) { +v = cpu_to_le32(sq->tail); + /* * CAP.DSTRD is 0, so offset of ith sq db_addr is (i<<3) * nvme_process_db() uses this hard-coded way to calculate @@ -6825,7 +6828,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) */ sq->db_addr = dbs_addr + (i << 3); sq->ei_addr = eis_addr + (i << 3); -pci_dma_write(pci, sq->db_addr, &sq->tail, sizeof(sq->tail)); +pci_dma_write(pci, sq->db_addr, &v, sizeof(sq->tail)); Or use the PCI DMA ldst API which does the swapping for you: stl_le_pci_dma(pci, sq->db_addr, sq->tail, MEMTXATTRS_UNSPECIFIED); Nice! That's definitely preferable. I'll queue that up for next, but leave this patch as-is since it's been tested on a real big-endian host and gotten its tags ;) OK, then: Reviewed-by: Philippe Mathieu-Daudé
Re: [PATCH] hw/nvme: fix endianness issue for shadow doorbells
On Jul 18 13:18, Philippe Mathieu-Daudé wrote: > On 18/7/23 12:35, Klaus Jensen wrote: > > From: Klaus Jensen > > > > In commit 2fda0726e514 ("hw/nvme: fix missing endian conversions for > > doorbell buffers"), we fixed shadow doorbells for big-endian guests > > running on little endian hosts. But I did not fix little-endian guests > > on big-endian hosts. Fix this. > > > > Solves issue #1765. > > > > Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support") > > Cc: qemu-sta...@nongnu.org > > Reported-by: Thomas Huth > > Signed-off-by: Klaus Jensen > > --- > > hw/nvme/ctrl.c | 18 +- > > 1 file changed, 13 insertions(+), 5 deletions(-) > > > > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c > > index 8e8e870b9a80..dadc2dc7da10 100644 > > --- a/hw/nvme/ctrl.c > > +++ b/hw/nvme/ctrl.c > > @@ -6801,6 +6801,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const > > NvmeRequest *req) > > PCIDevice *pci = PCI_DEVICE(n); > > uint64_t dbs_addr = le64_to_cpu(req->cmd.dptr.prp1); > > uint64_t eis_addr = le64_to_cpu(req->cmd.dptr.prp2); > > +uint32_t v; > > int i; > > /* Address should be page aligned */ > > @@ -6818,6 +6819,8 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const > > NvmeRequest *req) > > NvmeCQueue *cq = n->cq[i]; > > if (sq) { > > +v = cpu_to_le32(sq->tail); > > + > > /* > >* CAP.DSTRD is 0, so offset of ith sq db_addr is (i<<3) > >* nvme_process_db() uses this hard-coded way to calculate > > @@ -6825,7 +6828,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const > > NvmeRequest *req) > >*/ > > sq->db_addr = dbs_addr + (i << 3); > > sq->ei_addr = eis_addr + (i << 3); > > -pci_dma_write(pci, sq->db_addr, &sq->tail, sizeof(sq->tail)); > > +pci_dma_write(pci, sq->db_addr, &v, sizeof(sq->tail)); > > Or use the PCI DMA ldst API which does the swapping for you: > > stl_le_pci_dma(pci, sq->db_addr, sq->tail, MEMTXATTRS_UNSPECIFIED); > Nice! That's definitely preferable. I'll queue that up for next, but leave this patch as-is since it's been tested on a real big-endian host and gotten its tags ;) signature.asc Description: PGP signature
Re: [PATCH] hw/nvme: fix endianness issue for shadow doorbells
On 18/7/23 12:35, Klaus Jensen wrote: From: Klaus Jensen In commit 2fda0726e514 ("hw/nvme: fix missing endian conversions for doorbell buffers"), we fixed shadow doorbells for big-endian guests running on little endian hosts. But I did not fix little-endian guests on big-endian hosts. Fix this. Solves issue #1765. Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support") Cc: qemu-sta...@nongnu.org Reported-by: Thomas Huth Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 18 +- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 8e8e870b9a80..dadc2dc7da10 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -6801,6 +6801,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) PCIDevice *pci = PCI_DEVICE(n); uint64_t dbs_addr = le64_to_cpu(req->cmd.dptr.prp1); uint64_t eis_addr = le64_to_cpu(req->cmd.dptr.prp2); +uint32_t v; int i; /* Address should be page aligned */ @@ -6818,6 +6819,8 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) NvmeCQueue *cq = n->cq[i]; if (sq) { +v = cpu_to_le32(sq->tail); + /* * CAP.DSTRD is 0, so offset of ith sq db_addr is (i<<3) * nvme_process_db() uses this hard-coded way to calculate @@ -6825,7 +6828,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) */ sq->db_addr = dbs_addr + (i << 3); sq->ei_addr = eis_addr + (i << 3); -pci_dma_write(pci, sq->db_addr, &sq->tail, sizeof(sq->tail)); +pci_dma_write(pci, sq->db_addr, &v, sizeof(sq->tail)); Or use the PCI DMA ldst API which does the swapping for you: stl_le_pci_dma(pci, sq->db_addr, sq->tail, MEMTXATTRS_UNSPECIFIED); if (n->params.ioeventfd && sq->sqid != 0) { if (!nvme_init_sq_ioeventfd(sq)) { @@ -6835,10 +6838,12 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) } if (cq) { +v = cpu_to_le32(cq->head); + /* CAP.DSTRD is 0, so offset of ith cq db_addr is (i<<3)+(1<<2) */ cq->db_addr = dbs_addr + (i << 3) + (1 << 2); cq->ei_addr = eis_addr + (i << 3) + (1 << 2); -pci_dma_write(pci, cq->db_addr, &cq->head, sizeof(cq->head)); +pci_dma_write(pci, cq->db_addr, &v, sizeof(cq->head)); stl_le_pci_dma(pci, cq->db_addr, cq->head, MEMTXATTRS_UNSPECIFIED); if (n->params.ioeventfd && cq->cqid != 0) { if (!nvme_init_cq_ioeventfd(cq)) { @@ -7587,7 +7592,7 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) { PCIDevice *pci = PCI_DEVICE(n); -uint32_t qid; +uint32_t qid, v; if (unlikely(addr & ((1 << 2) - 1))) { NVME_GUEST_ERR(pci_nvme_ub_db_wr_misaligned, @@ -7654,7 +7659,8 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) start_sqs = nvme_cq_full(cq) ? 1 : 0; cq->head = new_head; if (!qid && n->dbbuf_enabled) { -pci_dma_write(pci, cq->db_addr, &cq->head, sizeof(cq->head)); +v = cpu_to_le32(cq->head); +pci_dma_write(pci, cq->db_addr, &v, sizeof(cq->head)); stl_le_pci_dma(pci, cq->db_addr, cq->head, MEMTXATTRS_UNSPECIFIED); } if (start_sqs) { NvmeSQueue *sq; @@ -7714,6 +7720,8 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) sq->tail = new_tail; if (!qid && n->dbbuf_enabled) { +v = cpu_to_le32(sq->tail); + /* * The spec states "the host shall also update the controller's * corresponding doorbell property to match the value of that entry @@ -7727,7 +7735,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) * including ones that run on Linux, are not updating Admin Queues, * so we can't trust reading it for an appropriate sq tail. */ -pci_dma_write(pci, sq->db_addr, &sq->tail, sizeof(sq->tail)); +pci_dma_write(pci, sq->db_addr, &v, sizeof(sq->tail)); stl_le_pci_dma(pci, sq->db_addr, sq->tail, MEMTXATTRS_UNSPECIFIED); } qemu_bh_schedule(sq->bh);
Re: [PATCH] hw/nvme: fix endianness issue for shadow doorbells
On Tue, Jul 18, 2023 at 12:35:12PM +0200, Klaus Jensen wrote: > From: Klaus Jensen > > In commit 2fda0726e514 ("hw/nvme: fix missing endian conversions for > doorbell buffers"), we fixed shadow doorbells for big-endian guests > running on little endian hosts. But I did not fix little-endian guests > on big-endian hosts. Fix this. > > Solves issue #1765. > > Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support") > Cc: qemu-sta...@nongnu.org > Reported-by: Thomas Huth > Signed-off-by: Klaus Jensen Looks good! Reviewed-by: Keith Busch
Re: [PATCH] hw/nvme: fix endianness issue for shadow doorbells
On 18/07/2023 12.35, Klaus Jensen wrote: From: Klaus Jensen In commit 2fda0726e514 ("hw/nvme: fix missing endian conversions for doorbell buffers"), we fixed shadow doorbells for big-endian guests running on little endian hosts. But I did not fix little-endian guests on big-endian hosts. Fix this. Solves issue #1765. Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support") Cc: qemu-sta...@nongnu.org Reported-by: Thomas Huth Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 18 +- 1 file changed, 13 insertions(+), 5 deletions(-) Thank you very much, this fixes tests/avocado/boot_linux_console.py:BootLinuxConsole.test_ppc_powernv8 on my s390x host: Tested-by: Thomas Huth
Re: [PATCH v2 0/7] iotests/parallels: Add new tests and fix old
This patchset also could be applied upon [PATCH v8 00/10] parallels: Add duplication check, repair at open, fix bugs. On 7/1/23 12:11, Alexander Ivanov wrote: This patchset should be applied on top of [PATCH v7 0/8] parallels: Add duplication check, repair at open, fix bugs Add out-of-image, leak and BAT entries duplication checks tests. Old parallels images check test (131): Refactor, fix cluster size and fix after repairing was added to parallels_open(). v2: 5: Fix a typo. 7: Add a test for data_off check. Alexander Ivanov (7): iotests: Add out-of-image check test for parallels format iotests: Add leak check test for parallels format iotests: Add test for BAT entries duplication check iotests: Refactor tests of parallels images checks (131) iotests: Fix cluster size in parallels images tests (131) iotests: Fix test 131 after repair was added to parallels_open() iotests: Add test for data_off check tests/qemu-iotests/131| 36 +++-- tests/qemu-iotests/131.out| 59 +++ tests/qemu-iotests/tests/parallels-checks | 145 ++ tests/qemu-iotests/tests/parallels-checks.out | 75 + 4 files changed, 264 insertions(+), 51 deletions(-) create mode 100755 tests/qemu-iotests/tests/parallels-checks create mode 100644 tests/qemu-iotests/tests/parallels-checks.out -- Best regards, Alexander Ivanov
[PATCH v8 07/10] parallels: Image repairing in parallels_open()
Repair an image at opening if the image is unclean or out-of-image corruption was detected. Signed-off-by: Alexander Ivanov Reviewed-by: Denis V. Lunev --- block/parallels.c | 70 +-- 1 file changed, 38 insertions(+), 32 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index a78238eadd..5100c8f903 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -951,7 +951,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, BDRVParallelsState *s = bs->opaque; ParallelsHeader ph; int ret, size, i; -int64_t file_nb_sectors; +int64_t file_nb_sectors, sector; QemuOpts *opts = NULL; Error *local_err = NULL; char *buf; @@ -1024,11 +1024,6 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, */ s->header_size = size; } -if (s->data_end > file_nb_sectors) { -error_setg(errp, "Invalid image: incorrect data_off field"); -ret = -EINVAL; -goto fail; -} ret = bdrv_pread(bs->file, 0, s->header_size, s->header, 0); if (ret < 0) { @@ -1036,33 +1031,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, } s->bat_bitmap = (uint32_t *)(s->header + 1); -for (i = 0; i < s->bat_size; i++) { -int64_t off = bat2sect(s, i); -if (off >= file_nb_sectors) { -if (flags & BDRV_O_CHECK) { -continue; -} -error_setg(errp, "parallels: Offset %" PRIi64 " in BAT[%d] entry " - "is larger than file size (%" PRIi64 ")", - off << BDRV_SECTOR_BITS, i, - file_nb_sectors << BDRV_SECTOR_BITS); -ret = -EINVAL; -goto fail; -} -if (off >= s->data_end) { -s->data_end = off + s->tracks; -} -} - if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) { -/* Image was not closed correctly. The check is mandatory */ s->header_unclean = true; -if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) { -error_setg(errp, "parallels: Image was not closed correctly; " - "cannot be opened read/write"); -ret = -EACCES; -goto fail; -} } opts = qemu_opts_create(¶llels_runtime_opts, NULL, 0, errp); @@ -1123,10 +1093,40 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, bdrv_get_device_or_node_name(bs)); ret = migrate_add_blocker(s->migration_blocker, errp); if (ret < 0) { -error_free(s->migration_blocker); +error_setg(errp, "Migration blocker error"); goto fail; } qemu_co_mutex_init(&s->lock); + +for (i = 0; i < s->bat_size; i++) { +sector = bat2sect(s, i); +if (sector + s->tracks > s->data_end) { +s->data_end = sector + s->tracks; +} +} + +/* + * We don't repair the image here if it's opened for checks. Also we don't + * want to change inactive images and can't change readonly images. + */ +if ((flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) || !(flags & BDRV_O_RDWR)) { +return 0; +} + +/* + * Repair the image if it's dirty or + * out-of-image corruption was detected. + */ +if (s->data_end > file_nb_sectors || s->header_unclean) { +BdrvCheckResult res; +ret = bdrv_check(bs, &res, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS); +if (ret < 0) { +error_setg_errno(errp, -ret, "Could not repair corrupted image"); +migrate_del_blocker(s->migration_blocker); +goto fail; +} +} + return 0; fail_format: @@ -1134,6 +1134,12 @@ fail_format: fail_options: ret = -EINVAL; fail: +/* + * "s" object was allocated by g_malloc0 so we can safely + * try to free its fields even they were not allocated. + */ +error_free(s->migration_blocker); +g_free(s->bat_dirty_bmap); qemu_vfree(s->header); return ret; } -- 2.34.1
[PATCH v8 09/10] parallels: Add data_off check
data_off field of the parallels image header can be corrupted. Check if this field greater than the header + BAT size and less than file size. Signed-off-by: Alexander Ivanov --- block/parallels.c | 80 +++ 1 file changed, 80 insertions(+) diff --git a/block/parallels.c b/block/parallels.c index 3414807089..103acb7b40 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -450,6 +450,81 @@ static void parallels_check_unclean(BlockDriverState *bs, } } +/* + * Returns true if data_off is correct, otherwise false. In both cases + * correct_offset is set to the proper value. + */ +static bool parallels_test_data_off(BDRVParallelsState *s, +int64_t file_nb_sectors, +uint32_t *correct_offset) +{ +uint32_t data_off, min_off; +bool old_magic; + +/* + * There are two slightly different image formats: with "WithoutFreeSpace" + * or "WithouFreSpacExt" magic words. Call the first one as "old magic". + * In such images data_off field can be zero. In this case the offset is + * calculated as the end of BAT table plus some padding to ensure sector + * size alignment. + */ +old_magic = !memcmp(s->header->magic, HEADER_MAGIC, 16); + +min_off = DIV_ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE); +if (!old_magic) { +min_off = ROUND_UP(min_off, s->cluster_size / BDRV_SECTOR_SIZE); +} + +if (correct_offset) { +*correct_offset = min_off; +} + +data_off = le32_to_cpu(s->header->data_off); +if (data_off == 0 && old_magic) { +return true; +} + +if (data_off < min_off || data_off > file_nb_sectors) { +return false; +} + +if (correct_offset) { +*correct_offset = data_off; +} + +return true; +} + +static int coroutine_fn GRAPH_RDLOCK +parallels_check_data_off(BlockDriverState *bs, BdrvCheckResult *res, + BdrvCheckMode fix) +{ +BDRVParallelsState *s = bs->opaque; +int64_t file_size; +uint32_t data_off; + +file_size = bdrv_co_nb_sectors(bs->file->bs); +if (file_size < 0) { +res->check_errors++; +return file_size; +} + +if (parallels_test_data_off(s, file_size, &data_off)) { +return 0; +} + +res->corruptions++; +if (fix & BDRV_FIX_ERRORS) { +s->header->data_off = cpu_to_le32(data_off); +res->corruptions_fixed++; +} + +fprintf(stderr, "%s data_off field has incorrect value\n", +fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR"); + +return 0; +} + static int coroutine_fn GRAPH_RDLOCK parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) @@ -713,6 +788,11 @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, WITH_QEMU_LOCK_GUARD(&s->lock) { parallels_check_unclean(bs, res, fix); +ret = parallels_check_data_off(bs, res, fix); +if (ret < 0) { +return ret; +} + ret = parallels_check_outside_image(bs, res, fix); if (ret < 0) { return ret; -- 2.34.1
Re: [PATCH] hw/nvme: fix endianness issue for shadow doorbells
On 7/18/23 12:35, Klaus Jensen wrote: From: Klaus Jensen In commit 2fda0726e514 ("hw/nvme: fix missing endian conversions for doorbell buffers"), we fixed shadow doorbells for big-endian guests running on little endian hosts. But I did not fix little-endian guests on big-endian hosts. Fix this. Solves issue #1765. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1765 Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support") Cc: qemu-sta...@nongnu.org Reported-by: Thomas Huth Signed-off-by: Klaus Jensen Tested-by: Cédric Le Goater with a PowerNV QEMU machine running on a s390x LPAR. Thanks, C. --- hw/nvme/ctrl.c | 18 +- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 8e8e870b9a80..dadc2dc7da10 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -6801,6 +6801,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) PCIDevice *pci = PCI_DEVICE(n); uint64_t dbs_addr = le64_to_cpu(req->cmd.dptr.prp1); uint64_t eis_addr = le64_to_cpu(req->cmd.dptr.prp2); +uint32_t v; int i; /* Address should be page aligned */ @@ -6818,6 +6819,8 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) NvmeCQueue *cq = n->cq[i]; if (sq) { +v = cpu_to_le32(sq->tail); + /* * CAP.DSTRD is 0, so offset of ith sq db_addr is (i<<3) * nvme_process_db() uses this hard-coded way to calculate @@ -6825,7 +6828,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) */ sq->db_addr = dbs_addr + (i << 3); sq->ei_addr = eis_addr + (i << 3); -pci_dma_write(pci, sq->db_addr, &sq->tail, sizeof(sq->tail)); +pci_dma_write(pci, sq->db_addr, &v, sizeof(sq->tail)); if (n->params.ioeventfd && sq->sqid != 0) { if (!nvme_init_sq_ioeventfd(sq)) { @@ -6835,10 +6838,12 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) } if (cq) { +v = cpu_to_le32(cq->head); + /* CAP.DSTRD is 0, so offset of ith cq db_addr is (i<<3)+(1<<2) */ cq->db_addr = dbs_addr + (i << 3) + (1 << 2); cq->ei_addr = eis_addr + (i << 3) + (1 << 2); -pci_dma_write(pci, cq->db_addr, &cq->head, sizeof(cq->head)); +pci_dma_write(pci, cq->db_addr, &v, sizeof(cq->head)); if (n->params.ioeventfd && cq->cqid != 0) { if (!nvme_init_cq_ioeventfd(cq)) { @@ -7587,7 +7592,7 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) { PCIDevice *pci = PCI_DEVICE(n); -uint32_t qid; +uint32_t qid, v; if (unlikely(addr & ((1 << 2) - 1))) { NVME_GUEST_ERR(pci_nvme_ub_db_wr_misaligned, @@ -7654,7 +7659,8 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) start_sqs = nvme_cq_full(cq) ? 1 : 0; cq->head = new_head; if (!qid && n->dbbuf_enabled) { -pci_dma_write(pci, cq->db_addr, &cq->head, sizeof(cq->head)); +v = cpu_to_le32(cq->head); +pci_dma_write(pci, cq->db_addr, &v, sizeof(cq->head)); } if (start_sqs) { NvmeSQueue *sq; @@ -7714,6 +7720,8 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) sq->tail = new_tail; if (!qid && n->dbbuf_enabled) { +v = cpu_to_le32(sq->tail); + /* * The spec states "the host shall also update the controller's * corresponding doorbell property to match the value of that entry @@ -7727,7 +7735,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) * including ones that run on Linux, are not updating Admin Queues, * so we can't trust reading it for an appropriate sq tail. */ -pci_dma_write(pci, sq->db_addr, &sq->tail, sizeof(sq->tail)); +pci_dma_write(pci, sq->db_addr, &v, sizeof(sq->tail)); } qemu_bh_schedule(sq->bh);
[PATCH v8 03/10] parallels: Check if data_end greater than the file size
Initially data_end is set to the data_off image header field and must not be greater than the file size. Signed-off-by: Alexander Ivanov Reviewed-by: Denis V. Lunev --- block/parallels.c | 5 + 1 file changed, 5 insertions(+) diff --git a/block/parallels.c b/block/parallels.c index 3c0dca3dbf..6a3d41373a 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -874,6 +874,11 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, */ s->header_size = size; } +if (s->data_end > file_nb_sectors) { +error_setg(errp, "Invalid image: incorrect data_off field"); +ret = -EINVAL; +goto fail; +} ret = bdrv_pread(bs->file, 0, s->header_size, s->header, 0); if (ret < 0) { -- 2.34.1
[PATCH v8 02/10] parallels: Incorrect data end calculation in parallels_open()
The BDRVParallelsState structure contains data_end field that is measured in sectors. In parallels_open() initially this field is set by data_off field from parallels image header. According to the parallels format documentation, data_off field contains an offset, in sectors, from the start of the file to the start of the data area. For "WithoutFreeSpace" images: if data_off is zero, the offset is calculated as the end of the BAT table plus some padding to ensure sector size alignment. The parallels_open() function has code for handling zero value in data_off, but in the result data_end contains the offset in bytes. Replace the alignment to sector size by division by sector size and fix the comparision with s->header_size. Signed-off-by: Alexander Ivanov Reviewed-by: Denis V. Lunev Reviewed-by: Hanna Czenczek --- block/parallels.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index c7b2ed5a54..3c0dca3dbf 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -865,9 +865,9 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, } s->data_end = le32_to_cpu(ph.data_off); if (s->data_end == 0) { -s->data_end = ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE); +s->data_end = DIV_ROUND_UP(size, BDRV_SECTOR_SIZE); } -if (s->data_end < s->header_size) { +if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) { /* * There is not enough unused space to fit to block align between BAT * and actual data. We can't avoid read-modify-write... -- 2.34.1
[PATCH v8 08/10] parallels: Use bdrv_co_getlength() in parallels_check_outside_image()
bdrv_co_getlength() should be used in coroutine context. Replace bdrv_getlength() by bdrv_co_getlength() in parallels_check_outside_image(). Signed-off-by: Alexander Ivanov Reviewed-by: Denis V. Lunev --- block/parallels.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/parallels.c b/block/parallels.c index 5100c8f903..3414807089 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -500,7 +500,7 @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res, int64_t size; int ret; -size = bdrv_getlength(bs->file->bs); +size = bdrv_co_getlength(bs->file->bs); if (size < 0) { res->check_errors++; return size; -- 2.34.1
[PATCH v8 04/10] parallels: Add "explicit" argument to parallels_check_leak()
In the on of the next patches we need to repair leaks without changing leaks and leaks_fixed info in res. Also we don't want to print any warning about leaks. Add "explicit" argument to skip info changing if the argument is false. Signed-off-by: Alexander Ivanov Reviewed-by: Denis V. Lunev --- block/parallels.c | 19 --- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index 6a3d41373a..8bb5d115fc 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -488,7 +488,7 @@ parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res, static int coroutine_fn GRAPH_RDLOCK parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res, - BdrvCheckMode fix) + BdrvCheckMode fix, bool explicit) { BDRVParallelsState *s = bs->opaque; int64_t size; @@ -503,10 +503,13 @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res, if (size > res->image_end_offset) { int64_t count; count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size); -fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n", -fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", -size - res->image_end_offset); -res->leaks += count; +if (explicit) { +fprintf(stderr, +"%s space leaked at the end of the image %" PRId64 "\n", +fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", +size - res->image_end_offset); +res->leaks += count; +} if (fix & BDRV_FIX_LEAKS) { Error *local_err = NULL; @@ -521,7 +524,9 @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res, res->check_errors++; return ret; } -res->leaks_fixed += count; +if (explicit) { +res->leaks_fixed += count; +} } } @@ -574,7 +579,7 @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, return ret; } -ret = parallels_check_leak(bs, res, fix); +ret = parallels_check_leak(bs, res, fix, true); if (ret < 0) { return ret; } -- 2.34.1
[PATCH v8 06/10] parallels: Add checking and repairing duplicate offsets in BAT
Cluster offsets must be unique among all the BAT entries. Find duplicate offsets in the BAT and fix it by copying the content of the relevant cluster to a newly allocated cluster and set the new cluster offset to the duplicated entry. Add host_cluster_index() helper to deduplicate the code. When new clusters are allocated, the file size increases by 128 Mb. Call parallels_check_leak() to fix this leak. Signed-off-by: Alexander Ivanov Reviewed-by: Denis V. Lunev --- block/parallels.c | 144 ++ 1 file changed, 144 insertions(+) diff --git a/block/parallels.c b/block/parallels.c index f7b44cb433..a78238eadd 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -136,6 +136,12 @@ static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num, return MIN(nb_sectors, ret); } +static uint32_t host_cluster_index(BDRVParallelsState *s, int64_t off) +{ +off -= s->data_start << BDRV_SECTOR_BITS; +return off / s->cluster_size; +} + static int64_t block_status(BDRVParallelsState *s, int64_t sector_num, int nb_sectors, int *pnum) { @@ -533,6 +539,139 @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res, return 0; } +static int coroutine_fn GRAPH_RDLOCK +parallels_check_duplicate(BlockDriverState *bs, BdrvCheckResult *res, + BdrvCheckMode fix) +{ +BDRVParallelsState *s = bs->opaque; +int64_t host_off, host_sector, guest_sector; +unsigned long *bitmap; +uint32_t i, bitmap_size, cluster_index, bat_entry; +int n, ret = 0; +uint64_t *buf = NULL; +bool fixed = false; + +/* + * Create a bitmap of used clusters. + * If a bit is set, there is a BAT entry pointing to this cluster. + * Loop through the BAT entries, check bits relevant to an entry offset. + * If bit is set, this entry is duplicated. Otherwise set the bit. + * + * We shouldn't worry about newly allocated clusters outside the image + * because they are created higher then any existing cluster pointed by + * a BAT entry. + */ +bitmap_size = host_cluster_index(s, res->image_end_offset); +if (bitmap_size == 0) { +return 0; +} +if (res->image_end_offset % s->cluster_size) { +/* A not aligned image end leads to a bitmap shorter by 1 */ +bitmap_size++; +} + +bitmap = bitmap_new(bitmap_size); + +buf = qemu_blockalign(bs, s->cluster_size); + +for (i = 0; i < s->bat_size; i++) { +host_off = bat2sect(s, i) << BDRV_SECTOR_BITS; +if (host_off == 0) { +continue; +} + +cluster_index = host_cluster_index(s, host_off); +assert(cluster_index < bitmap_size); +if (!test_bit(cluster_index, bitmap)) { +bitmap_set(bitmap, cluster_index, 1); +continue; +} + +/* this cluster duplicates another one */ +fprintf(stderr, "%s duplicate offset in BAT entry %u\n", +fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); + +res->corruptions++; + +if (!(fix & BDRV_FIX_ERRORS)) { +continue; +} + +/* + * Reset the entry and allocate a new cluster + * for the relevant guest offset. In this way we let + * the lower layer to place the new cluster properly. + * Copy the original cluster to the allocated one. + * But before save the old offset value for repairing + * if we have an error. + */ +bat_entry = s->bat_bitmap[i]; +parallels_set_bat_entry(s, i, 0); + +ret = bdrv_co_pread(bs->file, host_off, s->cluster_size, buf, 0); +if (ret < 0) { +res->check_errors++; +goto out_repair_bat; +} + +guest_sector = (i * (int64_t)s->cluster_size) >> BDRV_SECTOR_BITS; +host_sector = allocate_clusters(bs, guest_sector, s->tracks, &n); +if (host_sector < 0) { +res->check_errors++; +goto out_repair_bat; +} +host_off = host_sector << BDRV_SECTOR_BITS; + +ret = bdrv_co_pwrite(bs->file, host_off, s->cluster_size, buf, 0); +if (ret < 0) { +res->check_errors++; +goto out_repair_bat; +} + +if (host_off + s->cluster_size > res->image_end_offset) { +res->image_end_offset = host_off + s->cluster_size; +} + +/* + * In the future allocate_cluster() will reuse holed offsets + * inside the image. Keep the used clusters bitmap content + * consistent for the new allocated clusters too. + * + * Note, clusters allocated outside the current image are not + * considered, and the bitmap size doesn't change. + */ +cluster_index = host_cluster_index(s, host_off); +if (cluster_index < bitmap_size) { +bitmap_set(bitmap, cluster_index, 1); +} + +fix
[PATCH v8 05/10] parallels: Add data_start field to BDRVParallelsState
In the next patch we will need the offset of the data area for host cluster index calculation. Add this field and setting up code. Signed-off-by: Alexander Ivanov Reviewed-by: Denis V. Lunev --- block/parallels.c | 7 --- block/parallels.h | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index 8bb5d115fc..f7b44cb433 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -868,10 +868,11 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, ret = -ENOMEM; goto fail; } -s->data_end = le32_to_cpu(ph.data_off); -if (s->data_end == 0) { -s->data_end = DIV_ROUND_UP(size, BDRV_SECTOR_SIZE); +s->data_start = le32_to_cpu(ph.data_off); +if (s->data_start == 0) { +s->data_start = DIV_ROUND_UP(size, BDRV_SECTOR_SIZE); } +s->data_end = s->data_start; if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) { /* * There is not enough unused space to fit to block align between BAT diff --git a/block/parallels.h b/block/parallels.h index f22f43f988..4e53e9572d 100644 --- a/block/parallels.h +++ b/block/parallels.h @@ -75,6 +75,7 @@ typedef struct BDRVParallelsState { uint32_t *bat_bitmap; unsigned int bat_size; +int64_t data_start; int64_t data_end; uint64_t prealloc_size; ParallelsPreallocMode prealloc_mode; -- 2.34.1
[PATCH v8 00/10] parallels: Add duplication check, repair at open, fix bugs
Fix incorrect data end calculation in parallels_open(). Check if data_end greater than the file size. Add change_info argument to parallels_check_leak(). Add checking and repairing duplicate offsets in BAT Image repairing in parallels_open(). v8: 1: New patch. Fixed comments formatting. 2: Fixed a comment line length. 6: Fixed a typo in a label. 9: Patch was splitted to two patches. This patch only adds data_off check. Function parallels_test_data_off() was changed to return the correct offset in any case. 10: Added data_off repairing to parallels_open(), v7: 3: Renamed "change_info" argument to "explicit", move info printing under explicit condition. 4: Different patch. Add data_start field to BDRVParallelsState for future host_cluster_index() function. 5: Prevously was 4th. Used s->data_start instead of s->header->data_off. Add bitmap size increasing if file length is not cluster aligned. Revert a couple conditions for better readability. Changed sectors calculation for better code transparency. Replaced sector variable by host_sector and guest_sector. Renamed off to host_off. Moved out_repare_bat: below return to get jumps on error path only. 6: Prevously was 5th. 7: New patch. Replace bdrv_getlength() by bdrv_co_getlength() in parallels_check_outside_image() because it is used in coroutine context. 8: New patch. Add data_off check. v6: 2: Different patch. Refused to split image leak handling. Instead there is a patch with a data_end check. 3: Different patch. There is a patch with change_info argument. 4: Removed changing fprintf by qemu_log from this patchset. Previously 3rd patch became 4th. Replaced qemu_memalign() by qemu_blockalign(). Got rid of iovecs, replaced bdrv_co_pwritev() by bdrv_co_pwrite(). Added assert(cluster_index < bitmap_size). Now BAT changes are reverted if there was an error in the cluster copying process. Simplified a sector calculation. 5: Moved header magic check to the appropriate place. Added a migrate_del_blocker() call and s->bat_dirty_bmap freeing on error. v5: 3: Fixed a byteorder bug, fixed zero-length image handling and fixed uint32 truncation. v4: 2,5: Rebased. v3: 2: Added (size >= res->image_end_offset) assert and changed the comment in parallels_get_leak_size(). Changed error printing and leaks fixing order. 3: Removed highest_offset() helper, instead image_end_offset field is used. 5: Moved highest_offset() code to parallels_open() - now it is used only in this function. Fixed data_end update condition. Fixed a leak of s->migration_blocker. v2: 2: Moved outsude parallels_check_leak() 2 helpers: parallels_get_leak_size() and parallels_fix_leak(). 3: Used highest_offset() helper in parallels_check_leak(). Fixed a typo. Added comments. Replaced g_malloc() call by qemu_memalign(). Replaced bdrv_pread() call by bdrv_co_pread(). Got rid of keeping bytes and sectors in the same variable. Added setting the bitmap of the used clusters for a new allocated cluster if it isn't out of the bitmap. Moved the leak fix to the end of all the checks. Removed a dependence on image format for the duplicate check. 4 (old): Merged this patch to the previous. 4 (former 5): Fixed formatting. 5 (former 6): Fixed comments. Added O_INACTIVE check in the condition. Replaced inuse detection by header_unclean checking. Replaced playing with corutines by bdrv_check() usage. Alexander Ivanov (10): parallels: Fix comments formatting inside parallels driver parallels: Incorrect data end calculation in parallels_open() parallels: Check if data_end greater than the file size parallels: Add "explicit" argument to parallels_check_leak() parallels: Add data_start field to BDRVParallelsState parallels: Add checking and repairing duplicate offsets in BAT parallels: Image repairing in parallels_open() parallels: Use bdrv_co_getlength() in parallels_check_outside_image() parallels: Add data_off check parallels: Add data_off repairing to parallels_open() block/parallels.c | 346 +++--- block/parallels.h | 1 + 2 files changed, 299 insertions(+), 48 deletions(-) -- 2.34.1
[PATCH v8 10/10] parallels: Add data_off repairing to parallels_open()
Place data_start/data_end calculation after reading the image header to s->header. Set s->data_start to the offset calculated in parallels_test_data_off(). Call bdrv_check() if data_off is incorrect. Signed-off-by: Alexander Ivanov --- block/parallels.c | 31 +-- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index 103acb7b40..48c32d6821 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -1032,9 +1032,11 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, ParallelsHeader ph; int ret, size, i; int64_t file_nb_sectors, sector; +uint32_t data_start; QemuOpts *opts = NULL; Error *local_err = NULL; char *buf; +bool data_off_is_correct; ret = bdrv_open_file_child(NULL, options, "file", bs, errp); if (ret < 0) { @@ -1092,10 +1094,20 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, ret = -ENOMEM; goto fail; } -s->data_start = le32_to_cpu(ph.data_off); -if (s->data_start == 0) { -s->data_start = DIV_ROUND_UP(size, BDRV_SECTOR_SIZE); + +ret = bdrv_pread(bs->file, 0, s->header_size, s->header, 0); +if (ret < 0) { +goto fail; } +s->bat_bitmap = (uint32_t *)(s->header + 1); + +if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) { +s->header_unclean = true; +} + +data_off_is_correct = parallels_test_data_off(s, file_nb_sectors, + &data_start); +s->data_start = data_start; s->data_end = s->data_start; if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) { /* @@ -1105,16 +1117,6 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, s->header_size = size; } -ret = bdrv_pread(bs->file, 0, s->header_size, s->header, 0); -if (ret < 0) { -goto fail; -} -s->bat_bitmap = (uint32_t *)(s->header + 1); - -if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) { -s->header_unclean = true; -} - opts = qemu_opts_create(¶llels_runtime_opts, NULL, 0, errp); if (!opts) { goto fail_options; @@ -1197,7 +1199,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, * Repair the image if it's dirty or * out-of-image corruption was detected. */ -if (s->data_end > file_nb_sectors || s->header_unclean) { +if (s->data_end > file_nb_sectors || s->header_unclean +|| !data_off_is_correct) { BdrvCheckResult res; ret = bdrv_check(bs, &res, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS); if (ret < 0) { -- 2.34.1
[PATCH v8 01/10] parallels: Fix comments formatting inside parallels driver
This patch is technically necessary as git patch rendering could result in moving some code from one place to the another and that hits checkpatch.pl warning. This problem specifically happens within next series. Signed-off-by: Alexander Ivanov --- block/parallels.c | 18 -- 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index 18e34aef28..c7b2ed5a54 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -188,7 +188,8 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, idx = sector_num / s->tracks; to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx; -/* This function is called only by parallels_co_writev(), which will never +/* + * This function is called only by parallels_co_writev(), which will never * pass a sector_num at or beyond the end of the image (because the block * layer never passes such a sector_num to that function). Therefore, idx * is always below s->bat_size. @@ -196,7 +197,8 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, * exceed the image end. Therefore, idx + to_allocate cannot exceed * s->bat_size. * Note that s->bat_size is an unsigned int, therefore idx + to_allocate - * will always fit into a uint32_t. */ + * will always fit into a uint32_t. + */ assert(idx < s->bat_size && idx + to_allocate <= s->bat_size); space = to_allocate * s->tracks; @@ -230,13 +232,15 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, } } -/* Try to read from backing to fill empty clusters +/* + * Try to read from backing to fill empty clusters * FIXME: 1. previous write_zeroes may be redundant *2. most of data we read from backing will be rewritten by * parallels_co_writev. On aligned-to-cluster write we do not need * this read at all. *3. it would be good to combine write of data from backing and new - * data into one write call */ + * data into one write call. + */ if (bs->backing) { int64_t nb_cow_sectors = to_allocate * s->tracks; int64_t nb_cow_bytes = nb_cow_sectors << BDRV_SECTOR_BITS; @@ -864,8 +868,10 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, s->data_end = ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE); } if (s->data_end < s->header_size) { -/* there is not enough unused space to fit to block align between BAT - and actual data. We can't avoid read-modify-write... */ +/* + * There is not enough unused space to fit to block align between BAT + * and actual data. We can't avoid read-modify-write... + */ s->header_size = size; } -- 2.34.1
[PATCH] hw/nvme: fix endianness issue for shadow doorbells
From: Klaus Jensen In commit 2fda0726e514 ("hw/nvme: fix missing endian conversions for doorbell buffers"), we fixed shadow doorbells for big-endian guests running on little endian hosts. But I did not fix little-endian guests on big-endian hosts. Fix this. Solves issue #1765. Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support") Cc: qemu-sta...@nongnu.org Reported-by: Thomas Huth Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 18 +- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 8e8e870b9a80..dadc2dc7da10 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -6801,6 +6801,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) PCIDevice *pci = PCI_DEVICE(n); uint64_t dbs_addr = le64_to_cpu(req->cmd.dptr.prp1); uint64_t eis_addr = le64_to_cpu(req->cmd.dptr.prp2); +uint32_t v; int i; /* Address should be page aligned */ @@ -6818,6 +6819,8 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) NvmeCQueue *cq = n->cq[i]; if (sq) { +v = cpu_to_le32(sq->tail); + /* * CAP.DSTRD is 0, so offset of ith sq db_addr is (i<<3) * nvme_process_db() uses this hard-coded way to calculate @@ -6825,7 +6828,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) */ sq->db_addr = dbs_addr + (i << 3); sq->ei_addr = eis_addr + (i << 3); -pci_dma_write(pci, sq->db_addr, &sq->tail, sizeof(sq->tail)); +pci_dma_write(pci, sq->db_addr, &v, sizeof(sq->tail)); if (n->params.ioeventfd && sq->sqid != 0) { if (!nvme_init_sq_ioeventfd(sq)) { @@ -6835,10 +6838,12 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) } if (cq) { +v = cpu_to_le32(cq->head); + /* CAP.DSTRD is 0, so offset of ith cq db_addr is (i<<3)+(1<<2) */ cq->db_addr = dbs_addr + (i << 3) + (1 << 2); cq->ei_addr = eis_addr + (i << 3) + (1 << 2); -pci_dma_write(pci, cq->db_addr, &cq->head, sizeof(cq->head)); +pci_dma_write(pci, cq->db_addr, &v, sizeof(cq->head)); if (n->params.ioeventfd && cq->cqid != 0) { if (!nvme_init_cq_ioeventfd(cq)) { @@ -7587,7 +7592,7 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) { PCIDevice *pci = PCI_DEVICE(n); -uint32_t qid; +uint32_t qid, v; if (unlikely(addr & ((1 << 2) - 1))) { NVME_GUEST_ERR(pci_nvme_ub_db_wr_misaligned, @@ -7654,7 +7659,8 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) start_sqs = nvme_cq_full(cq) ? 1 : 0; cq->head = new_head; if (!qid && n->dbbuf_enabled) { -pci_dma_write(pci, cq->db_addr, &cq->head, sizeof(cq->head)); +v = cpu_to_le32(cq->head); +pci_dma_write(pci, cq->db_addr, &v, sizeof(cq->head)); } if (start_sqs) { NvmeSQueue *sq; @@ -7714,6 +7720,8 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) sq->tail = new_tail; if (!qid && n->dbbuf_enabled) { +v = cpu_to_le32(sq->tail); + /* * The spec states "the host shall also update the controller's * corresponding doorbell property to match the value of that entry @@ -7727,7 +7735,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) * including ones that run on Linux, are not updating Admin Queues, * so we can't trust reading it for an appropriate sq tail. */ -pci_dma_write(pci, sq->db_addr, &sq->tail, sizeof(sq->tail)); +pci_dma_write(pci, sq->db_addr, &v, sizeof(sq->tail)); } qemu_bh_schedule(sq->bh); -- 2.41.0