date:20230725

[PATCH v7 4/4] tests/qtest: Introduce tests for UFS

2023-07-25 Thread Jeuk Kim

This patch includes the following tests
  Test mmio read
  Test ufs device initialization and ufs-lu recognition
  Test I/O (Performs a write followed by a read to verify)

Signed-off-by: Jeuk Kim 
---
 MAINTAINERS |   1 +
 tests/qtest/meson.build |   1 +
 tests/qtest/ufs-test.c  | 575 
 3 files changed, 577 insertions(+)
 create mode 100644 tests/qtest/ufs-test.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 0c8a955b42..546f226e85 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2261,6 +2261,7 @@ M: Jeuk Kim 
 S: Supported
 F: hw/ufs/*
 F: include/block/ufs.h
+F: tests/qtest/ufs-test.c
 
 megasas
 M: Hannes Reinecke 
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index b071d400b3..2b1d589a87 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -269,6 +269,7 @@ qos_test_ss.add(
   'virtio-iommu-test.c',
   'vmxnet3-test.c',
   'igb-test.c',
+  'ufs-test.c',
 )
 
 if config_all_devices.has_key('CONFIG_VIRTIO_SERIAL')
diff --git a/tests/qtest/ufs-test.c b/tests/qtest/ufs-test.c
new file mode 100644
index 00..5104a0a56a
--- /dev/null
+++ b/tests/qtest/ufs-test.c
@@ -0,0 +1,575 @@
+/*
+ * QTest testcase for UFS
+ *
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/module.h"
+#include "qemu/units.h"
+#include "libqtest.h"
+#include "libqos/qgraph.h"
+#include "libqos/pci.h"
+#include "scsi/constants.h"
+#include "include/block/ufs.h"
+
+/* Test images sizes in Bytes */
+#define TEST_IMAGE_SIZE (64 * 1024 * 1024)
+/* Timeout for various operations, in seconds. */
+#define TIMEOUT_SECONDS 5
+/* Maximum PRD entry count */
+#define MAX_PRD_ENTRY_COUNT 10
+#define PRD_ENTRY_DATA_SIZE 4096
+/* Constants to build upiu */
+#define UTP_COMMAND_DESCRIPTOR_SIZE 4096
+#define UTP_RESPONSE_UPIU_OFFSET 1024
+#define UTP_PRDT_UPIU_OFFSET 2048
+
+typedef struct QUfs QUfs;
+
+struct QUfs {
+QOSGraphObject obj;
+QPCIDevice dev;
+QPCIBar bar;
+
+uint64_t utrlba;
+uint64_t utmrlba;
+uint64_t cmd_desc_addr;
+uint64_t data_buffer_addr;
+
+bool enabled;
+};
+
+static inline uint32_t ufs_rreg(QUfs *ufs, size_t offset)
+{
+return qpci_io_readl(>dev, ufs->bar, offset);
+}
+
+static inline void ufs_wreg(QUfs *ufs, size_t offset, uint32_t value)
+{
+qpci_io_writel(>dev, ufs->bar, offset, value);
+}
+
+static void ufs_wait_for_irq(QUfs *ufs)
+{
+uint64_t end_time;
+uint32_t is;
+/* Wait for device to reset as the linux driver does. */
+end_time = g_get_monotonic_time() + TIMEOUT_SECONDS * G_TIME_SPAN_SECOND;
+do {
+qtest_clock_step(ufs->dev.bus->qts, 100);
+is = ufs_rreg(ufs, A_IS);
+} while (is == 0 && g_get_monotonic_time() < end_time);
+}
+
+static UtpTransferReqDesc ufs_build_req_utrd(uint64_t cmd_desc_addr,
+ uint8_t slot,
+ uint32_t data_direction,
+ uint16_t prd_table_length)
+{
+UtpTransferReqDesc req = { 0 };
+uint64_t command_desc_base_addr =
+cmd_desc_addr + slot * UTP_COMMAND_DESCRIPTOR_SIZE;
+
+req.header.dword_0 =
+cpu_to_le32(1 << 28 | data_direction | UTP_REQ_DESC_INT_CMD);
+req.header.dword_2 = cpu_to_le32(OCS_INVALID_COMMAND_STATUS);
+
+req.command_desc_base_addr_hi = cpu_to_le32(command_desc_base_addr >> 32);
+req.command_desc_base_addr_lo =
+cpu_to_le32(command_desc_base_addr & 0x);
+req.response_upiu_offset =
+cpu_to_le16(UTP_RESPONSE_UPIU_OFFSET / sizeof(uint32_t));
+req.response_upiu_length = cpu_to_le16(sizeof(UtpUpiuRsp));
+req.prd_table_offset = cpu_to_le16(UTP_PRDT_UPIU_OFFSET / 
sizeof(uint32_t));
+req.prd_table_length = cpu_to_le16(prd_table_length);
+return req;
+}
+
+static void ufs_send_nop_out(QUfs *ufs, uint8_t slot,
+ UtpTransferReqDesc *utrd_out, UtpUpiuRsp *rsp_out)
+{
+/* Build up utp transfer request descriptor */
+UtpTransferReqDesc utrd =
+ufs_build_req_utrd(ufs->cmd_desc_addr, slot, UTP_NO_DATA_TRANSFER, 0);
+uint64_t utrd_addr = ufs->utrlba + slot * sizeof(UtpTransferReqDesc);
+uint64_t req_upiu_addr =
+ufs->cmd_desc_addr + slot * UTP_COMMAND_DESCRIPTOR_SIZE;
+uint64_t rsp_upiu_addr = req_upiu_addr + UTP_RESPONSE_UPIU_OFFSET;
+qtest_memwrite(ufs->dev.bus->qts, utrd_addr, , sizeof(utrd));
+
+/* Build up request upiu */
+UtpUpiuReq req_upiu = { 0 };
+req_upiu.header.trans_type = UPIU_TRANSACTION_NOP_OUT;
+req_upiu.header.task_tag = slot;
+qtest_memwrite(ufs->dev.bus->qts, req_upiu_addr, _upiu,
+   sizeof(req_upiu));
+
+/* Ring Doorbell */
+ufs_wreg(ufs, A_UTRLDBR, 1);
+ufs_wait_for_irq(ufs);
+g_assert_true(FIELD_EX32(ufs_rreg(ufs, A_IS), IS, UTRCS));
+ufs_wreg(ufs, A_IS,

[PATCH v7 2/4] hw/ufs: Support for Query Transfer Requests

2023-07-25 Thread Jeuk Kim

This commit makes the UFS device support query
and nop out transfer requests.

The next patch would be support for UFS logical
unit and scsi command transfer request.

Signed-off-by: Jeuk Kim 
---
 hw/ufs/trace-events |   1 +
 hw/ufs/ufs.c| 980 +++-
 hw/ufs/ufs.h|  46 +++
 3 files changed, 1025 insertions(+), 2 deletions(-)

diff --git a/hw/ufs/trace-events b/hw/ufs/trace-events
index d1badcad10..665e1a942b 100644
--- a/hw/ufs/trace-events
+++ b/hw/ufs/trace-events
@@ -18,6 +18,7 @@ ufs_err_dma_read_req_upiu(uint32_t slot, uint64_t addr) 
"failed to read req upiu
 ufs_err_dma_read_prdt(uint32_t slot, uint64_t addr) "failed to read prdt. 
UTRLDBR slot %"PRIu32", prdt addr %"PRIu64""
 ufs_err_dma_write_utrd(uint32_t slot, uint64_t addr) "failed to write utrd. 
UTRLDBR slot %"PRIu32", UTRD dma addr %"PRIu64""
 ufs_err_dma_write_rsp_upiu(uint32_t slot, uint64_t addr) "failed to write rsp 
upiu. UTRLDBR slot %"PRIu32", response upiu addr %"PRIu64""
+ufs_err_utrl_slot_error(uint32_t slot) "UTRLDBR slot %"PRIu32" is in error"
 ufs_err_utrl_slot_busy(uint32_t slot) "UTRLDBR slot %"PRIu32" is busy"
 ufs_err_unsupport_register_offset(uint32_t offset) "Register offset 
0x%"PRIx32" is not yet supported"
 ufs_err_invalid_register_offset(uint32_t offset) "Register offset 0x%"PRIx32" 
is invalid"
diff --git a/hw/ufs/ufs.c b/hw/ufs/ufs.c
index 101082a8a3..903418925c 100644
--- a/hw/ufs/ufs.c
+++ b/hw/ufs/ufs.c
@@ -15,10 +15,221 @@
 #include "ufs.h"
 
 /* The QEMU-UFS device follows spec version 3.1 */
-#define UFS_SPEC_VER 0x0310
+#define UFS_SPEC_VER 0x0310
 #define UFS_MAX_NUTRS 32
 #define UFS_MAX_NUTMRS 8
 
+static MemTxResult ufs_addr_read(UfsHc *u, hwaddr addr, void *buf, int size)
+{
+hwaddr hi = addr + size - 1;
+
+if (hi < addr) {
+return MEMTX_DECODE_ERROR;
+}
+
+if (!FIELD_EX32(u->reg.cap, CAP, 64AS) && (hi >> 32)) {
+return MEMTX_DECODE_ERROR;
+}
+
+return pci_dma_read(PCI_DEVICE(u), addr, buf, size);
+}
+
+static MemTxResult ufs_addr_write(UfsHc *u, hwaddr addr, const void *buf,
+  int size)
+{
+hwaddr hi = addr + size - 1;
+if (hi < addr) {
+return MEMTX_DECODE_ERROR;
+}
+
+if (!FIELD_EX32(u->reg.cap, CAP, 64AS) && (hi >> 32)) {
+return MEMTX_DECODE_ERROR;
+}
+
+return pci_dma_write(PCI_DEVICE(u), addr, buf, size);
+}
+
+static void ufs_complete_req(UfsRequest *req, UfsReqResult req_result);
+
+static inline hwaddr ufs_get_utrd_addr(UfsHc *u, uint32_t slot)
+{
+hwaddr utrl_base_addr = (((hwaddr)u->reg.utrlbau) << 32) + u->reg.utrlba;
+hwaddr utrd_addr = utrl_base_addr + slot * sizeof(UtpTransferReqDesc);
+
+return utrd_addr;
+}
+
+static inline hwaddr ufs_get_req_upiu_base_addr(const UtpTransferReqDesc *utrd)
+{
+uint32_t cmd_desc_base_addr_lo =
+le32_to_cpu(utrd->command_desc_base_addr_lo);
+uint32_t cmd_desc_base_addr_hi =
+le32_to_cpu(utrd->command_desc_base_addr_hi);
+
+return (((hwaddr)cmd_desc_base_addr_hi) << 32) + cmd_desc_base_addr_lo;
+}
+
+static inline hwaddr ufs_get_rsp_upiu_base_addr(const UtpTransferReqDesc *utrd)
+{
+hwaddr req_upiu_base_addr = ufs_get_req_upiu_base_addr(utrd);
+uint32_t rsp_upiu_byte_off =
+le16_to_cpu(utrd->response_upiu_offset) * sizeof(uint32_t);
+return req_upiu_base_addr + rsp_upiu_byte_off;
+}
+
+static MemTxResult ufs_dma_read_utrd(UfsRequest *req)
+{
+UfsHc *u = req->hc;
+hwaddr utrd_addr = ufs_get_utrd_addr(u, req->slot);
+MemTxResult ret;
+
+ret = ufs_addr_read(u, utrd_addr, >utrd, sizeof(req->utrd));
+if (ret) {
+trace_ufs_err_dma_read_utrd(req->slot, utrd_addr);
+}
+return ret;
+}
+
+static MemTxResult ufs_dma_read_req_upiu(UfsRequest *req)
+{
+UfsHc *u = req->hc;
+hwaddr req_upiu_base_addr = ufs_get_req_upiu_base_addr(>utrd);
+UtpUpiuReq *req_upiu = >req_upiu;
+uint32_t copy_size;
+uint16_t data_segment_length;
+MemTxResult ret;
+
+/*
+ * To know the size of the req_upiu, we need to read the
+ * data_segment_length in the header first.
+ */
+ret = ufs_addr_read(u, req_upiu_base_addr, _upiu->header,
+sizeof(UtpUpiuHeader));
+if (ret) {
+trace_ufs_err_dma_read_req_upiu(req->slot, req_upiu_base_addr);
+return ret;
+}
+data_segment_length = be16_to_cpu(req_upiu->header.data_segment_length);
+
+copy_size = sizeof(UtpUpiuHeader) + UFS_TRANSACTION_SPECIFIC_FIELD_SIZE +
+data_segment_length;
+
+ret = ufs_addr_read(u, req_upiu_base_addr, >req_upiu, copy_size);
+if (ret) {
+trace_ufs_err_dma_read_req_upiu(req->slot, req_upiu_base_addr);
+}
+return ret;
+}
+
+static MemTxResult ufs_dma_read_prdt(UfsRequest *req)
+{
+UfsHc *u = req->hc;
+uint16_t prdt_len = le16_to_cpu(req->utrd.prd_table_length);
+uint16_t prdt_byte_off =
+

[PATCH v7 3/4] hw/ufs: Support for UFS logical unit

2023-07-25 Thread Jeuk Kim

This commit adds support for ufs logical unit.
The LU handles processing for the SCSI command,
unit descriptor query request.

This commit enables the UFS device to process
IO requests.

Signed-off-by: Jeuk Kim 
---
 hw/ufs/lu.c  | 1445 ++
 hw/ufs/meson.build   |2 +-
 hw/ufs/trace-events  |   25 +
 hw/ufs/ufs.c |  252 ++-
 hw/ufs/ufs.h |   43 ++
 include/scsi/constants.h |1 +
 6 files changed, 1761 insertions(+), 7 deletions(-)
 create mode 100644 hw/ufs/lu.c

diff --git a/hw/ufs/lu.c b/hw/ufs/lu.c
new file mode 100644
index 00..ec8932ff86
--- /dev/null
+++ b/hw/ufs/lu.c
@@ -0,0 +1,1445 @@
+/*
+ * QEMU UFS Logical Unit
+ *
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * Written by Jeuk Kim 
+ *
+ * This code is licensed under the GNU GPL v2 or later.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qapi/error.h"
+#include "qemu/memalign.h"
+#include "hw/scsi/scsi.h"
+#include "scsi/constants.h"
+#include "sysemu/block-backend.h"
+#include "qemu/cutils.h"
+#include "trace.h"
+#include "ufs.h"
+
+/*
+ * The code below handling SCSI commands is copied from hw/scsi/scsi-disk.c,
+ * with minor adjustments to make it work for UFS.
+ */
+
+#define SCSI_DMA_BUF_SIZE (128 * KiB)
+#define SCSI_MAX_INQUIRY_LEN 256
+#define SCSI_INQUIRY_DATA_SIZE 36
+#define SCSI_MAX_MODE_LEN 256
+
+typedef struct UfsSCSIReq {
+SCSIRequest req;
+/* Both sector and sector_count are in terms of BDRV_SECTOR_SIZE bytes.  */
+uint64_t sector;
+uint32_t sector_count;
+uint32_t buflen;
+bool started;
+bool need_fua_emulation;
+struct iovec iov;
+QEMUIOVector qiov;
+BlockAcctCookie acct;
+} UfsSCSIReq;
+
+static void ufs_scsi_free_request(SCSIRequest *req)
+{
+UfsSCSIReq *r = DO_UPCAST(UfsSCSIReq, req, req);
+
+qemu_vfree(r->iov.iov_base);
+}
+
+static void scsi_check_condition(UfsSCSIReq *r, SCSISense sense)
+{
+trace_ufs_scsi_check_condition(r->req.tag, sense.key, sense.asc,
+   sense.ascq);
+scsi_req_build_sense(>req, sense);
+scsi_req_complete(>req, CHECK_CONDITION);
+}
+
+static int ufs_scsi_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf,
+ uint32_t outbuf_len)
+{
+UfsHc *u = UFS(req->bus->qbus.parent);
+UfsLu *lu = DO_UPCAST(UfsLu, qdev, req->dev);
+uint8_t page_code = req->cmd.buf[2];
+int start, buflen = 0;
+
+if (outbuf_len < SCSI_INQUIRY_DATA_SIZE) {
+return -1;
+}
+
+outbuf[buflen++] = lu->qdev.type & 0x1f;
+outbuf[buflen++] = page_code;
+outbuf[buflen++] = 0x00;
+outbuf[buflen++] = 0x00;
+start = buflen;
+
+switch (page_code) {
+case 0x00: /* Supported page codes, mandatory */
+{
+trace_ufs_scsi_emulate_vpd_page_00(req->cmd.xfer);
+outbuf[buflen++] = 0x00; /* list of supported pages (this page) */
+if (u->params.serial) {
+outbuf[buflen++] = 0x80; /* unit serial number */
+}
+outbuf[buflen++] = 0x87; /* mode page policy */
+break;
+}
+case 0x80: /* Device serial number, optional */
+{
+int l;
+
+if (!u->params.serial) {
+trace_ufs_scsi_emulate_vpd_page_80_not_supported();
+return -1;
+}
+
+l = strlen(u->params.serial);
+if (l > SCSI_INQUIRY_DATA_SIZE) {
+l = SCSI_INQUIRY_DATA_SIZE;
+}
+
+trace_ufs_scsi_emulate_vpd_page_80(req->cmd.xfer);
+memcpy(outbuf + buflen, u->params.serial, l);
+buflen += l;
+break;
+}
+case 0x87: /* Mode Page Policy, mandatory */
+{
+trace_ufs_scsi_emulate_vpd_page_87(req->cmd.xfer);
+outbuf[buflen++] = 0x3f; /* apply to all mode pages and subpages */
+outbuf[buflen++] = 0xff;
+outbuf[buflen++] = 0; /* shared */
+outbuf[buflen++] = 0;
+break;
+}
+default:
+return -1;
+}
+/* done with EVPD */
+assert(buflen - start <= 255);
+outbuf[start - 1] = buflen - start;
+return buflen;
+}
+
+static int ufs_scsi_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf,
+uint32_t outbuf_len)
+{
+int buflen = 0;
+
+if (outbuf_len < SCSI_INQUIRY_DATA_SIZE) {
+return -1;
+}
+
+if (req->cmd.buf[1] & 0x1) {
+/* Vital product data */
+return ufs_scsi_emulate_vpd_page(req, outbuf, outbuf_len);
+}
+
+/* Standard INQUIRY data */
+if (req->cmd.buf[2] != 0) {
+return -1;
+}
+
+/* PAGE CODE == 0 */
+buflen = req->cmd.xfer;
+if (buflen > SCSI_MAX_INQUIRY_LEN) {
+buflen = SCSI_MAX_INQUIRY_LEN;
+}
+
+if (is_wlun(req->lun)) {
+outbuf[0] = TYPE_WLUN;
+} else {
+outbuf[0] = 0;
+}
+outbuf[1] = 0;
+
+strpadcpy((char *)[16], 16, "QEMU UFS", ' ');
+strpadcpy((char

[PATCH v7 1/4] hw/ufs: Initial commit for emulated Universal-Flash-Storage

2023-07-25 Thread Jeuk Kim

Universal Flash Storage (UFS) is a high-performance mass storage device
with a serial interface. It is primarily used as a high-performance
data storage device for embedded applications.

This commit contains code for UFS device to be recognized
as a UFS PCI device.
Patches to handle UFS logical unit and Transfer Request will follow.

Signed-off-by: Jeuk Kim 
---
 MAINTAINERS  |6 +
 docs/specs/pci-ids.rst   |2 +
 hw/Kconfig   |1 +
 hw/meson.build   |1 +
 hw/ufs/Kconfig   |4 +
 hw/ufs/meson.build   |1 +
 hw/ufs/trace-events  |   32 ++
 hw/ufs/trace.h   |1 +
 hw/ufs/ufs.c |  278 ++
 hw/ufs/ufs.h |   42 ++
 include/block/ufs.h  | 1090 ++
 include/hw/pci/pci.h |1 +
 include/hw/pci/pci_ids.h |1 +
 meson.build  |1 +
 14 files changed, 1461 insertions(+)
 create mode 100644 hw/ufs/Kconfig
 create mode 100644 hw/ufs/meson.build
 create mode 100644 hw/ufs/trace-events
 create mode 100644 hw/ufs/trace.h
 create mode 100644 hw/ufs/ufs.c
 create mode 100644 hw/ufs/ufs.h
 create mode 100644 include/block/ufs.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 12e59b6b27..0c8a955b42 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2256,6 +2256,12 @@ F: tests/qtest/nvme-test.c
 F: docs/system/devices/nvme.rst
 T: git git://git.infradead.org/qemu-nvme.git nvme-next
 
+ufs
+M: Jeuk Kim 
+S: Supported
+F: hw/ufs/*
+F: include/block/ufs.h
+
 megasas
 M: Hannes Reinecke 
 L: qemu-bl...@nongnu.org
diff --git a/docs/specs/pci-ids.rst b/docs/specs/pci-ids.rst
index e302bea484..d6707fa069 100644
--- a/docs/specs/pci-ids.rst
+++ b/docs/specs/pci-ids.rst
@@ -92,6 +92,8 @@ PCI devices (other than virtio):
   PCI PVPanic device (``-device pvpanic-pci``)
 1b36:0012
   PCI ACPI ERST device (``-device acpi-erst``)
+1b36:0013
+  PCI UFS device (``-device ufs``)
 
 All these devices are documented in :doc:`index`.
 
diff --git a/hw/Kconfig b/hw/Kconfig
index ba62ff6417..9ca7b38c31 100644
--- a/hw/Kconfig
+++ b/hw/Kconfig
@@ -38,6 +38,7 @@ source smbios/Kconfig
 source ssi/Kconfig
 source timer/Kconfig
 source tpm/Kconfig
+source ufs/Kconfig
 source usb/Kconfig
 source virtio/Kconfig
 source vfio/Kconfig
diff --git a/hw/meson.build b/hw/meson.build
index c7ac7d3d75..f01fac4617 100644
--- a/hw/meson.build
+++ b/hw/meson.build
@@ -37,6 +37,7 @@ subdir('smbios')
 subdir('ssi')
 subdir('timer')
 subdir('tpm')
+subdir('ufs')
 subdir('usb')
 subdir('vfio')
 subdir('virtio')
diff --git a/hw/ufs/Kconfig b/hw/ufs/Kconfig
new file mode 100644
index 00..b7b3392e85
--- /dev/null
+++ b/hw/ufs/Kconfig
@@ -0,0 +1,4 @@
+config UFS_PCI
+bool
+default y if PCI_DEVICES
+depends on PCI
diff --git a/hw/ufs/meson.build b/hw/ufs/meson.build
new file mode 100644
index 00..eb5164bde9
--- /dev/null
+++ b/hw/ufs/meson.build
@@ -0,0 +1 @@
+system_ss.add(when: 'CONFIG_UFS_PCI', if_true: files('ufs.c'))
diff --git a/hw/ufs/trace-events b/hw/ufs/trace-events
new file mode 100644
index 00..d1badcad10
--- /dev/null
+++ b/hw/ufs/trace-events
@@ -0,0 +1,32 @@
+# ufs.c
+ufs_irq_raise(void) "INTx"
+ufs_irq_lower(void) "INTx"
+ufs_mmio_read(uint64_t addr, uint64_t data, unsigned size) "addr 0x%"PRIx64" 
data 0x%"PRIx64" size %d"
+ufs_mmio_write(uint64_t addr, uint64_t data, unsigned size) "addr 0x%"PRIx64" 
data 0x%"PRIx64" size %d"
+ufs_process_db(uint32_t slot) "UTRLDBR slot %"PRIu32""
+ufs_process_req(uint32_t slot) "UTRLDBR slot %"PRIu32""
+ufs_complete_req(uint32_t slot) "UTRLDBR slot %"PRIu32""
+ufs_sendback_req(uint32_t slot) "UTRLDBR slot %"PRIu32""
+ufs_exec_nop_cmd(uint32_t slot) "UTRLDBR slot %"PRIu32""
+ufs_exec_scsi_cmd(uint32_t slot, uint8_t lun, uint8_t opcode) "slot %"PRIu32", 
lun 0x%"PRIx8", opcode 0x%"PRIx8""
+ufs_exec_query_cmd(uint32_t slot, uint8_t opcode) "slot %"PRIu32", opcode 
0x%"PRIx8""
+ufs_process_uiccmd(uint32_t uiccmd, uint32_t ucmdarg1, uint32_t ucmdarg2, 
uint32_t ucmdarg3) "uiccmd 0x%"PRIx32", ucmdarg1 0x%"PRIx32", ucmdarg2 
0x%"PRIx32", ucmdarg3 0x%"PRIx32""
+
+# error condition
+ufs_err_dma_read_utrd(uint32_t slot, uint64_t addr) "failed to read utrd. 
UTRLDBR slot %"PRIu32", UTRD dma addr %"PRIu64""
+ufs_err_dma_read_req_upiu(uint32_t slot, uint64_t addr) "failed to read req 
upiu. UTRLDBR slot %"PRIu32", request upiu addr %"PRIu64""
+ufs_err_dma_read_prdt(uint32_t slot, uint64_t addr) "failed to read prdt. 
UTRLDBR slot %"PRIu32", prdt addr %"PRIu64""
+ufs_err_dma_write_utrd(uint32_t slot, uint64_t addr) "failed to write utrd. 
UTRLDBR slot %"PRIu32", UTRD dma addr %"PRIu64""
+ufs_err_dma_write_rsp_upiu(uint32_t slot, uint64_t addr) "failed to write rsp 
upiu. UTRLDBR slot %"PRIu32", response upiu addr %"PRIu64""
+ufs_err_utrl_slot_busy(uint32_t slot) "UTRLDBR slot %"PRIu32" is busy"
+ufs_err_unsupport_register_offset(uint32_t offset) "Register offset 
0x%"PRIx32" is not yet supported"
+ufs_err_invalid_register_offset(uint32_t

[PATCH v7 0/4] hw/ufs: Add Universal Flash Storage (UFS) support

2023-07-25 Thread Jeuk Kim

Since v6:
- Add tests/qtest/ufs-test.c to test ufs initialisation and I/O
- Add struct UtpTaskReqDesc to include/block/ufs.h
- Fix ufs_log2() logic
- Fix ufs-lu to use 4K as default block size to match the ufs spec

Since I created a new file, tests/qtest/ufs-test.c, I added Laurent Vivier to 
the cc list.
Thank you.

Since v5:
- Fix to print an error message instead of a segmentation fault
  when no drive property is specified for a ufs-lu device

Since v4:
Addressed review comment from Stefan Hajnoczi. The fixes are as
follows.
- Keep u->reg fields in host endian (Removed little-endian helper 
  functions from MemoryRegionOps)
- Remove unnecessary NULL checks for g_new and g_malloc0
- Replace DEFINE_PROP_DRIVE_IOTHREAD -> DEFINE_PROP_DRIVE

In case you were wondering, ufs and ufs-lu have been tested for the
following behaviours.
1. Checked ufs device recognition in Windows10 environment
2. Verified ufs device recognition in Ubuntu 22.04 environment
3. Verified io behaviour via fio in Ubuntu 22.04 environment
4. Verified query request via ufs-tools in Ubuntu 22.04 environment

Since v3:
- Replace softmmu_ss -> system_ss in meson

Since v2:
Addressed review comment from Stefan Hajnoczi. The main fixes are as
follows.
- Use of SPDX licence identifiers
- fixed endianness error
- removed memory leak
- fixed DMA error handling logic

Since v1:
- use macros of "hw/registerfields.h" (Addressed Philippe's review
  comments)

This patch series adds support for a new PCI-based UFS device.

The UFS pci device id (PCI_DEVICE_ID_REDHAT_UFS) is not registered
in the Linux kernel yet, so it does not work right away, but I confirmed
that it works with Linux when the UFS pci device id is registered.

I have also verified that it works with Windows 10.

Jeuk Kim (4):
  hw/ufs: Initial commit for emulated Universal-Flash-Storage
  hw/ufs: Support for Query Transfer Requests
  hw/ufs: Support for UFS logical unit
  tests/qtest: Introduce tests for UFS

 MAINTAINERS  |7 +
 docs/specs/pci-ids.rst   |2 +
 hw/Kconfig   |1 +
 hw/meson.build   |1 +
 hw/ufs/Kconfig   |4 +
 hw/ufs/lu.c  | 1445 
 hw/ufs/meson.build   |1 +
 hw/ufs/trace-events  |   58 ++
 hw/ufs/trace.h   |1 +
 hw/ufs/ufs.c | 1494 ++
 hw/ufs/ufs.h |  131 
 include/block/ufs.h  | 1090 +++
 include/hw/pci/pci.h |1 +
 include/hw/pci/pci_ids.h |1 +
 include/scsi/constants.h |1 +
 meson.build  |1 +
 tests/qtest/meson.build  |1 +
 tests/qtest/ufs-test.c   |  575 +++
 18 files changed, 4815 insertions(+)
 create mode 100644 hw/ufs/Kconfig
 create mode 100644 hw/ufs/lu.c
 create mode 100644 hw/ufs/meson.build
 create mode 100644 hw/ufs/trace-events
 create mode 100644 hw/ufs/trace.h
 create mode 100644 hw/ufs/ufs.c
 create mode 100644 hw/ufs/ufs.h
 create mode 100644 include/block/ufs.h
 create mode 100644 tests/qtest/ufs-test.c

-- 
2.34.1

回复: [PATCH] semihosting/uaccess.c: Replaced a malloc call with g_malloc.

2023-07-25 Thread dinglimin

>On Tue, 25 Jul 2023 at 10:13, Michael Tokarev  wrote:
> >
> > 25.07.2023 12:00, dinglimin wrote:
> > > Replaced a call to malloc() and its respective call to free() with 
> > > g_malloc() and g_free().
> > >
> > > Signed-off-by: dinglimin 
> > >
> > > V1 -> V2:if cpu_memory_rw_debug failed, still need to set p=NULL
> > > ---
> > >   semihosting/uaccess.c | 4 ++--
> > >   1 file changed, 2 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/semihosting/uaccess.c b/semihosting/uaccess.c
> > > index 8018828069..2ac754cdb6 100644
> > > --- a/semihosting/uaccess.c
> > > +++ b/semihosting/uaccess.c
> > > @@ -14,10 +14,10 @@
> > >   void *softmmu_lock_user(CPUArchState *env, target_ulong addr,
> > >   target_ulong len, bool copy)
> > >   {
> > > -void *p = malloc(len);
> > > +void *p = g_malloc(len);
> > >   if (p && copy) {
> > >   if (cpu_memory_rw_debug(env_cpu(env), addr, p, len, 0)) {
> > > -free(p);
> > > +g_free(p);
> > >   p = NULL;
> > >   }
>> >   }
> >
>> Ok, that was the obvious part.  Now a next one, also obvious.
> >
> > You changed lock_user to use g_malloc(), but unlock_user
> > still uses free() instead of g_free().  At the very least
> > the other one needs to be changed too.  And I'd say the callers
> > should be analyzed to ensure they don't free() the result
> (they should not, think it is a bug if they do).
>
> We can be pretty sure the callers don't free() the returned
> value, because the calling code is also used in user-mode,
> where the lock/unlock implementation is entirely different
> and calling free() on the pointer will not work.
> 
> > lock_user/unlock_user (which #defines to softmmu_lock_user/
> > softmmu_unlock_user in softmmu mode) is used a *lot*.
> 
> The third part here, is that g_malloc() does not ever
> fail -- it will abort() on out of memory. However
> the code here is still handling g_malloc() returning NULL.
> The equivalent for "we expect this might fail" (which we want
> here, because the guest is passing us the length of memory
> to try to allocate) is g_try_malloc().
> 
> thanks
> -- PMM
g_malloc() is preferred more than g_try_* functions, which return NULL on error,
 when the size of the requested allocation  is small. 
This is because allocating few bytes should not be a problem in a healthy 
system. 
Otherwise, the system is already in a critical state.

Plan to delete null checks after g malloc().


发件人: Peter Maydell
发送时间: 2023年7月25日 17:35
收件人: Michael Tokarev
抄送: dinglimin; richard.hender...@linaro.org; qemu-devel@nongnu.org
主题: Re: [PATCH] semihosting/uaccess.c: Replaced a malloc call with g_malloc.

On Tue, 25 Jul 2023 at 10:13, Michael Tokarev  wrote:
>
> 25.07.2023 12:00, dinglimin wrote:
> > Replaced a call to malloc() and its respective call to free() with 
> > g_malloc() and g_free().
> >
> > Signed-off-by: dinglimin 
> >
> > V1 -> V2:if cpu_memory_rw_debug failed, still need to set p=NULL
> > ---
> >   semihosting/uaccess.c | 4 ++--
> >   1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/semihosting/uaccess.c b/semihosting/uaccess.c
> > index 8018828069..2ac754cdb6 100644
> > --- a/semihosting/uaccess.c
> > +++ b/semihosting/uaccess.c
> > @@ -14,10 +14,10 @@
> >   void *softmmu_lock_user(CPUArchState *env, target_ulong addr,
> >   target_ulong len, bool copy)
> >   {
> > -void *p = malloc(len);
> > +void *p = g_malloc(len);
> >   if (p && copy) {
> >   if (cpu_memory_rw_debug(env_cpu(env), addr, p, len, 0)) {
> > -free(p);
> > +g_free(p);
> >   p = NULL;
> >   }
> >   }
>
> Ok, that was the obvious part.  Now a next one, also obvious.
>
> You changed lock_user to use g_malloc(), but unlock_user
> still uses free() instead of g_free().  At the very least
> the other one needs to be changed too.  And I'd say the callers
> should be analyzed to ensure they don't free() the result
> (they should not, think it is a bug if they do).

We can be pretty sure the callers don't free() the returned
value, because the calling code is also used in user-mode,
where the lock/unlock implementation is entirely different
and calling free() on the pointer will not work.

> lock_user/unlock_user (which #defines to softmmu_lock_user/
> softmmu_unlock_user in softmmu mode) is used a *lot*.

The third part here, is that g_malloc() does not ever
fail -- it will abort() on out of memory. However
the code here is still handling g_malloc() returning NULL.
The equivalent for "we expect this might fail" (which we want
here, because the guest is passing us the length of memory
to try to allocate) is g_try_malloc().

thanks
-- PMM

[PATCH v2 5/6] qtest: irq_intercept_[out/in]: return FAIL if no intercepts are installed

2023-07-25 Thread Chris Laplante

This is much better than just silently failing with OK.

Signed-off-by: Chris Laplante 
Reviewed-by: Peter Maydell 
---
 softmmu/qtest.c | 13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/softmmu/qtest.c b/softmmu/qtest.c
index 1719bbddc3..c9751f527f 100644
--- a/softmmu/qtest.c
+++ b/softmmu/qtest.c
@@ -399,6 +399,7 @@ static void qtest_process_command(CharBackend *chr, gchar 
**words)
 NamedGPIOList *ngl;
 bool is_named;
 bool is_outbound;
+bool interception_succeeded = false;
 
 g_assert(words[1]);
 is_named = words[2] != NULL;
@@ -431,6 +432,7 @@ static void qtest_process_command(CharBackend *chr, gchar 
**words)
 if (is_named) {
 if (ngl->name && strcmp(ngl->name, words[2]) == 0) {
 qtest_install_gpio_out_intercept(dev, ngl->name, 0);
+interception_succeeded = true;
 break;
 }
 } else if (!ngl->name) {
@@ -438,15 +440,22 @@ static void qtest_process_command(CharBackend *chr, gchar 
**words)
 for (i = 0; i < ngl->num_out; ++i) {
 qtest_install_gpio_out_intercept(dev, ngl->name, i);
 }
+interception_succeeded = true;
 }
 } else {
 qemu_irq_intercept_in(ngl->in, qtest_irq_handler,
   ngl->num_in);
+interception_succeeded = true;
 }
 }
-irq_intercept_dev = dev;
+
 qtest_send_prefix(chr);
-qtest_send(chr, "OK\n");
+if (interception_succeeded) {
+irq_intercept_dev = dev;
+qtest_send(chr, "OK\n");
+} else {
+qtest_send(chr, "FAIL No intercepts installed\n");
+}
 } else if (strcmp(words[0], "set_irq_in") == 0) {
 DeviceState *dev;
 qemu_irq irq;
-- 
2.41.0

[PATCH v2 4/6] qtest: bail from irq_intercept_in if name is specified

2023-07-25 Thread Chris Laplante

Named interception of in-GPIOs is not supported yet.

Signed-off-by: Chris Laplante 
Reviewed-by: Peter Maydell 
---
 softmmu/qtest.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/softmmu/qtest.c b/softmmu/qtest.c
index 7fd8546ed2..1719bbddc3 100644
--- a/softmmu/qtest.c
+++ b/softmmu/qtest.c
@@ -410,6 +410,12 @@ static void qtest_process_command(CharBackend *chr, gchar 
**words)
 return;
 }
 
+if (is_named && !is_outbound) {
+qtest_send_prefix(chr);
+qtest_send(chr, "FAIL Interception of named in-GPIOs not yet 
supported\n");
+return;
+}
+
 if (irq_intercept_dev) {
 qtest_send_prefix(chr);
 if (irq_intercept_dev != dev) {
@@ -421,7 +427,6 @@ static void qtest_process_command(CharBackend *chr, gchar 
**words)
 }
 
 QLIST_FOREACH(ngl, >gpios, node) {
-/* We don't support inbound interception of named GPIOs yet */
 if (is_outbound) {
 if (is_named) {
 if (ngl->name && strcmp(ngl->name, words[2]) == 0) {
-- 
2.41.0

[PATCH v2 1/6] hw/gpio/nrf51: implement DETECT signal

2023-07-25 Thread Chris Laplante

Implement nRF51 DETECT signal in the GPIO peripheral.

The reference manual makes mention of a per-pin DETECT signal, but these
are not exposed to the user. See 
https://devzone.nordicsemi.com/f/nordic-q-a/39858/gpio-per-pin-detect-signal-available
for more information. Currently, I don't see a reason to model these.

Signed-off-by: Chris Laplante 
---
 hw/gpio/nrf51_gpio.c | 14 +-
 include/hw/gpio/nrf51_gpio.h |  1 +
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/hw/gpio/nrf51_gpio.c b/hw/gpio/nrf51_gpio.c
index b47fddf4ed..08396c69a4 100644
--- a/hw/gpio/nrf51_gpio.c
+++ b/hw/gpio/nrf51_gpio.c
@@ -78,6 +78,7 @@ static void update_state(NRF51GPIOState *s)
 int pull;
 size_t i;
 bool connected_out, dir, connected_in, out, in, input;
+bool assert_detect = false;
 
 for (i = 0; i < NRF51_GPIO_PINS; i++) {
 pull = pull_value(s->cnf[i]);
@@ -99,7 +100,15 @@ static void update_state(NRF51GPIOState *s)
 qemu_log_mask(LOG_GUEST_ERROR,
   "GPIO pin %zu short circuited\n", i);
 }
-if (!connected_in) {
+if (connected_in) {
+uint32_t detect_config = extract32(s->cnf[i], 16, 2);
+if ((detect_config == 2) && (in == 1)) {
+assert_detect = true;
+}
+if ((detect_config == 3) && (in == 0)) {
+assert_detect = true;
+}
+} else {
 /*
  * Floating input: the output stimulates IN if connected,
  * otherwise pull-up/pull-down resistors put a value on both
@@ -116,6 +125,8 @@ static void update_state(NRF51GPIOState *s)
 }
 update_output_irq(s, i, connected_out, out);
 }
+
+qemu_set_irq(s->detect, assert_detect);
 }
 
 /*
@@ -291,6 +302,7 @@ static void nrf51_gpio_init(Object *obj)
 
 qdev_init_gpio_in(DEVICE(s), nrf51_gpio_set, NRF51_GPIO_PINS);
 qdev_init_gpio_out(DEVICE(s), s->output, NRF51_GPIO_PINS);
+qdev_init_gpio_out_named(DEVICE(s), >detect, "detect", 1);
 }
 
 static void nrf51_gpio_class_init(ObjectClass *klass, void *data)
diff --git a/include/hw/gpio/nrf51_gpio.h b/include/hw/gpio/nrf51_gpio.h
index 8f9c2f86da..fcfa2bac17 100644
--- a/include/hw/gpio/nrf51_gpio.h
+++ b/include/hw/gpio/nrf51_gpio.h
@@ -64,6 +64,7 @@ struct NRF51GPIOState {
 uint32_t old_out_connected;
 
 qemu_irq output[NRF51_GPIO_PINS];
+qemu_irq detect;
 };
 
 
-- 
2.41.0

[PATCH v2 6/6] qtest: microbit-test: add tests for nRF51 DETECT

2023-07-25 Thread Chris Laplante

Exercise the DETECT mechanism of the GPIO peripheral.

Signed-off-by: Chris Laplante 
Reviewed-by: Peter Maydell 
---
 tests/qtest/microbit-test.c | 42 +
 1 file changed, 42 insertions(+)

diff --git a/tests/qtest/microbit-test.c b/tests/qtest/microbit-test.c
index 6022a92b6a..8f87810cd5 100644
--- a/tests/qtest/microbit-test.c
+++ b/tests/qtest/microbit-test.c
@@ -393,6 +393,47 @@ static void test_nrf51_gpio(void)
 qtest_quit(qts);
 }
 
+static void test_nrf51_gpio_detect(void) {
+QTestState *qts = qtest_init("-M microbit");
+int i;
+
+// Connect input buffer on pins 1-7, configure SENSE for high level
+for (i = 1; i <= 7; i++) {
+qtest_writel(qts, NRF51_GPIO_BASE + NRF51_GPIO_REG_CNF_START + i * 4, 
deposit32(0, 16, 2, 2));
+}
+
+qtest_irq_intercept_out_named(qts, "/machine/nrf51/gpio", "detect");
+
+for (i = 1; i <= 7; i++) {
+// Set pin high
+qtest_set_irq_in(qts, "/machine/nrf51", "unnamed-gpio-in", i, 1);
+uint32_t actual = qtest_readl(qts, NRF51_GPIO_BASE + 
NRF51_GPIO_REG_IN);
+g_assert_cmpuint(actual, ==, 1 << i);
+
+// Check that DETECT is high
+g_assert_true(qtest_get_irq(qts, 0));
+
+// Set pin low, check that DETECT goes low.
+qtest_set_irq_in(qts, "/machine/nrf51", "unnamed-gpio-in", i, 0);
+actual = qtest_readl(qts, NRF51_GPIO_BASE + NRF51_GPIO_REG_IN);
+g_assert_cmpuint(actual, ==, 0x0);
+g_assert_false(qtest_get_irq(qts, 0));
+}
+
+// Set pin 0 high, check that DETECT doesn't fire
+qtest_set_irq_in(qts, "/machine/nrf51", "unnamed-gpio-in", 0, 1);
+g_assert_false(qtest_get_irq(qts, 0));
+qtest_set_irq_in(qts, "/machine/nrf51", "unnamed-gpio-in", 0, 0);
+
+// Set pins 1, 2, and 3 high, then set 3 low. Check that DETECT is still 
high.
+for (i = 1; i <= 3; i++) {
+qtest_set_irq_in(qts, "/machine/nrf51", "unnamed-gpio-in", i, 1);
+}
+g_assert_true(qtest_get_irq(qts, 0));
+qtest_set_irq_in(qts, "/machine/nrf51", "unnamed-gpio-in", 3, 0);
+g_assert_true(qtest_get_irq(qts, 0));
+}
+
 static void timer_task(QTestState *qts, hwaddr task)
 {
 qtest_writel(qts, NRF51_TIMER_BASE + task, NRF51_TRIGGER_TASK);
@@ -499,6 +540,7 @@ int main(int argc, char **argv)
 
 qtest_add_func("/microbit/nrf51/uart", test_nrf51_uart);
 qtest_add_func("/microbit/nrf51/gpio", test_nrf51_gpio);
+qtest_add_func("/microbit/nrf51/gpio_detect", test_nrf51_gpio_detect);
 qtest_add_func("/microbit/nrf51/nvmc", test_nrf51_nvmc);
 qtest_add_func("/microbit/nrf51/timer", test_nrf51_timer);
 qtest_add_func("/microbit/microbit/i2c", test_microbit_i2c);
-- 
2.41.0

[PATCH v2 0/6] Add nRF51 DETECT signal with test

2023-07-25 Thread Chris Laplante

This patch series implements the nRF51 DETECT signal
in the GPIO peripheral. A qtest is added exercising the signal.

To implement the test, named out-GPIO IRQ interception had to be added
to the qtest framework. I also took the opportunity to improve IRQ
interception a bit by adding 'FAIL' responses when interception fails.
Otherwise, it is frustrating to troubleshoot why calls to
qtest_irq_intercept_out and friends appears to do nothing.

v1: https://patchwork.kernel.org/project/qemu-devel/list/?series=766078

Testing
===
Passes 'make check'

Changelog
=
v2: factor out qtest_install_gpio_out_intercept before usage (Peter)
renamed qtest_install_gpio_out_intercepts => 
qtest_install_gpio_out_intercept
don't pass DETECT to soc level (Peter)
change qtest to use DETECT at GPIO level (Peter)


Chris Laplante (6):
  hw/gpio/nrf51: implement DETECT signal
  qtest: factor out qtest_install_gpio_out_intercept
  qtest: implement named interception of out-GPIO
  qtest: bail from irq_intercept_in if name is specified
  qtest: irq_intercept_[out/in]: return FAIL if no intercepts are
installed
  qtest: microbit-test: add tests for nRF51 DETECT

 hw/gpio/nrf51_gpio.c | 14 -
 include/hw/gpio/nrf51_gpio.h |  1 +
 softmmu/qtest.c  | 56 ++--
 tests/qtest/libqtest.c   |  6 
 tests/qtest/libqtest.h   | 11 +++
 tests/qtest/microbit-test.c  | 42 +++
 6 files changed, 114 insertions(+), 16 deletions(-)

--
2.41.0

[PATCH v2 2/6] qtest: factor out qtest_install_gpio_out_intercept

2023-07-25 Thread Chris Laplante

Signed-off-by: Chris Laplante 
---
 softmmu/qtest.c | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/softmmu/qtest.c b/softmmu/qtest.c
index f8d764b719..1c92e5a6a3 100644
--- a/softmmu/qtest.c
+++ b/softmmu/qtest.c
@@ -365,6 +365,15 @@ void qtest_set_command_cb(bool (*pc_cb)(CharBackend *chr, 
gchar **words))
 process_command_cb = pc_cb;
 }
 
+static void qtest_install_gpio_out_intercept(DeviceState *dev, const char 
*name, int n)
+{
+qemu_irq *disconnected = g_new0(qemu_irq, 1);
+qemu_irq icpt = qemu_allocate_irq(qtest_irq_handler,
+  disconnected, n);
+
+*disconnected = qdev_intercept_gpio_out(dev, icpt,name, n);
+}
+
 static void qtest_process_command(CharBackend *chr, gchar **words)
 {
 const gchar *command;
@@ -415,12 +424,7 @@ static void qtest_process_command(CharBackend *chr, gchar 
**words)
 if (words[0][14] == 'o') {
 int i;
 for (i = 0; i < ngl->num_out; ++i) {
-qemu_irq *disconnected = g_new0(qemu_irq, 1);
-qemu_irq icpt = qemu_allocate_irq(qtest_irq_handler,
-  disconnected, i);
-
-*disconnected = qdev_intercept_gpio_out(dev, icpt,
-ngl->name, i);
+qtest_install_gpio_out_intercept(dev, ngl->name, i);
 }
 } else {
 qemu_irq_intercept_in(ngl->in, qtest_irq_handler,
-- 
2.41.0

[PATCH v2 3/6] qtest: implement named interception of out-GPIO

2023-07-25 Thread Chris Laplante

Adds qtest_irq_intercept_out_named method, which utilizes a new optional
name parameter to the irq_intercept_out qtest command.

Signed-off-by: Chris Laplante 
---
 softmmu/qtest.c| 24 
 tests/qtest/libqtest.c |  6 ++
 tests/qtest/libqtest.h | 11 +++
 3 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/softmmu/qtest.c b/softmmu/qtest.c
index 1c92e5a6a3..7fd8546ed2 100644
--- a/softmmu/qtest.c
+++ b/softmmu/qtest.c
@@ -397,8 +397,12 @@ static void qtest_process_command(CharBackend *chr, gchar 
**words)
 || strcmp(words[0], "irq_intercept_in") == 0) {
 DeviceState *dev;
 NamedGPIOList *ngl;
+bool is_named;
+bool is_outbound;
 
 g_assert(words[1]);
+is_named = words[2] != NULL;
+is_outbound = words[0][14] == 'o';
 dev = DEVICE(object_resolve_path(words[1], NULL));
 if (!dev) {
 qtest_send_prefix(chr);
@@ -417,14 +421,18 @@ static void qtest_process_command(CharBackend *chr, gchar 
**words)
 }
 
 QLIST_FOREACH(ngl, >gpios, node) {
-/* We don't support intercept of named GPIOs yet */
-if (ngl->name) {
-continue;
-}
-if (words[0][14] == 'o') {
-int i;
-for (i = 0; i < ngl->num_out; ++i) {
-qtest_install_gpio_out_intercept(dev, ngl->name, i);
+/* We don't support inbound interception of named GPIOs yet */
+if (is_outbound) {
+if (is_named) {
+if (ngl->name && strcmp(ngl->name, words[2]) == 0) {
+qtest_install_gpio_out_intercept(dev, ngl->name, 0);
+break;
+}
+} else if (!ngl->name) {
+int i;
+for (i = 0; i < ngl->num_out; ++i) {
+qtest_install_gpio_out_intercept(dev, ngl->name, i);
+}
 }
 } else {
 qemu_irq_intercept_in(ngl->in, qtest_irq_handler,
diff --git a/tests/qtest/libqtest.c b/tests/qtest/libqtest.c
index c22dfc30d3..471529e6cc 100644
--- a/tests/qtest/libqtest.c
+++ b/tests/qtest/libqtest.c
@@ -993,6 +993,12 @@ void qtest_irq_intercept_out(QTestState *s, const char 
*qom_path)
 qtest_rsp(s);
 }
 
+void qtest_irq_intercept_out_named(QTestState *s, const char *qom_path, const 
char *name)
+{
+qtest_sendf(s, "irq_intercept_out %s %s\n", qom_path, name);
+qtest_rsp(s);
+}
+
 void qtest_irq_intercept_in(QTestState *s, const char *qom_path)
 {
 qtest_sendf(s, "irq_intercept_in %s\n", qom_path);
diff --git a/tests/qtest/libqtest.h b/tests/qtest/libqtest.h
index 3a71bc45fc..e53e350e3a 100644
--- a/tests/qtest/libqtest.h
+++ b/tests/qtest/libqtest.h
@@ -371,6 +371,17 @@ void qtest_irq_intercept_in(QTestState *s, const char 
*string);
  */
 void qtest_irq_intercept_out(QTestState *s, const char *string);
 
+/**
+ * qtest_irq_intercept_out_named:
+ * @s: #QTestState instance to operate on.
+ * @qom_path: QOM path of a device.
+ * @name: Name of the GPIO out pin
+ *
+ * Associate a qtest irq with the named GPIO-out pin of the device
+ * whose path is specified by @string and whose name is @name.
+ */
+void qtest_irq_intercept_out_named(QTestState *s, const char *qom_path, const 
char *name);
+
 /**
  * qtest_set_irq_in:
  * @s: QTestState instance to operate on.
-- 
2.41.0

Re: [PULL 10/10] target/tricore: Rename tricore_feature

2023-07-25 Thread Michael Tokarev


25.07.2023 22:11, Bastian Koppelmann wrote:
...

Michael Tokarev has already picked it up. See 
https://lore.kernel.org/qemu-devel/20230725145829.37782-11-phi...@linaro.org/T/#u


I noticed that too, we did it almost at the same time.

But there's nothing wrong with that.  It doesn't matter
how a particular change enters the tree. When pulling
the same change for the 2nd time, git will notice the
change is already there and do nothing.

/mjt

Re: [PATCH 0/6] Add nRF51 DETECT signal with test

2023-07-25 Thread Chris Laplante

> > 2. I also have some implementations for pieces of CLOCK, namely the 
> > HFCLKSTART/HFCLKSTOP events and HFCLKSTARTED event. Should I include that 
> > in this patch series, or would you prefer it in a separate series? It is 
> > unrelated to DETECT and POWER.
> 
> 
> If you think they're ready to go in, and it doesn't
> make the series more than about 12-15 patches long,
> you can put them on the end of the series. If the
> patchset is starting to get a bit big then it might
> be easier to get the POWER/DETECT parts reviewed
> first.

I'm just going to send the POWER/DETECT bits first. There is quite a lot to 
emulate in CLOCK, POWER, and MPU, and I'd like to do a good job on it.

Thanks.
Chris

Re: [PATCH v2 3/4] vdpa: Restore vlan filtering state

2023-07-25 Thread Jason Wang

On Tue, Jul 25, 2023 at 3:48 PM Hawkins Jiawei  wrote:
>
> On 2023/7/25 14:47, Jason Wang wrote:
> > On Sun, Jul 23, 2023 at 5:28 PM Hawkins Jiawei  wrote:
> >>
> >> This patch introduces vhost_vdpa_net_load_single_vlan()
> >> and vhost_vdpa_net_load_vlan() to restore the vlan
> >> filtering state at device's startup.
> >>
> >> Co-developed-by: Eugenio Pérez 
> >> Signed-off-by: Eugenio Pérez 
> >> Signed-off-by: Hawkins Jiawei 
> >
> > Acked-by: Jason Wang 
> >
> > But this seems to be a source of latency killer as it may at most send
> > 1024 commands.
> >
> > As discussed in the past, we need a better cvq command to do this: for
> > example, a single command to carray a bitmap.
>
> Hi Jason,
>
> Thanks for your review.
>
> You are right, we need some improvement here.
>
> Therefore, I have submitted another patch series titled "vdpa: Send all
> CVQ state load commands in parallel" at [1], which allows QEMU to delay
> polling and checking the device used buffer until either the SVQ is full
> or control commands shadow buffers are full, so that QEMU can send all
> the SVQ control commands in parallel, which has better performance
> improvement.
>
> To test that patch series, I created 4094 VLANS in guest to build an
> environment for sending multiple CVQ state load commands. According to
> the result on the real vdpa device at [2], this patch series can improve
> latency from 23296 us to 6539 us.
>
> Thanks!
>
> [1]. https://lists.gnu.org/archive/html/qemu-devel/2023-07/msg03726.html
> [2]. https://lists.gnu.org/archive/html/qemu-devel/2023-07/msg03947.html
>

That's great, and if we can use a single command to get/set vid it
would be still helpful (I meant we can extend the virtio spec).

Thanks

>
> >
> > Thanks
> >
> >> ---
> >> v2:
> >>   - remove the extra line pointed out by Eugenio
> >>
> >> v1: 
> >> https://lore.kernel.org/all/0a568cc8a8d2b750c2e09b2237e9f05cece07c3f.1689690854.git.yin31...@gmail.com/
> >>
> >>   net/vhost-vdpa.c | 48 
> >>   1 file changed, 48 insertions(+)
> >>
> >> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> >> index 9795306742..347241796d 100644
> >> --- a/net/vhost-vdpa.c
> >> +++ b/net/vhost-vdpa.c
> >> @@ -965,6 +965,50 @@ static int vhost_vdpa_net_load_rx(VhostVDPAState *s,
> >>   return 0;
> >>   }
> >>
> >> +static int vhost_vdpa_net_load_single_vlan(VhostVDPAState *s,
> >> +   const VirtIONet *n,
> >> +   uint16_t vid)
> >> +{
> >> +const struct iovec data = {
> >> +.iov_base = ,
> >> +.iov_len = sizeof(vid),
> >> +};
> >> +ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_VLAN,
> >> +  
> >> VIRTIO_NET_CTRL_VLAN_ADD,
> >> +  , 1);
> >> +if (unlikely(dev_written < 0)) {
> >> +return dev_written;
> >> +}
> >> +if (unlikely(*s->status != VIRTIO_NET_OK)) {
> >> +return -EIO;
> >> +}
> >> +
> >> +return 0;
> >> +}
> >> +
> >> +static int vhost_vdpa_net_load_vlan(VhostVDPAState *s,
> >> +const VirtIONet *n)
> >> +{
> >> +int r;
> >> +
> >> +if (!virtio_vdev_has_feature(>parent_obj, VIRTIO_NET_F_CTRL_VLAN)) 
> >> {
> >> +return 0;
> >> +}
> >> +
> >> +for (int i = 0; i < MAX_VLAN >> 5; i++) {
> >> +for (int j = 0; n->vlans[i] && j <= 0x1f; j++) {
> >> +if (n->vlans[i] & (1U << j)) {
> >> +r = vhost_vdpa_net_load_single_vlan(s, n, (i << 5) + j);
> >> +if (unlikely(r != 0)) {
> >> +return r;
> >> +}
> >> +}
> >> +}
> >> +}
> >> +
> >> +return 0;
> >> +}
> >> +
> >>   static int vhost_vdpa_net_load(NetClientState *nc)
> >>   {
> >>   VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> >> @@ -995,6 +1039,10 @@ static int vhost_vdpa_net_load(NetClientState *nc)
> >>   if (unlikely(r)) {
> >>   return r;
> >>   }
> >> +r = vhost_vdpa_net_load_vlan(s, n);
> >> +if (unlikely(r)) {
> >> +return r;
> >> +}
> >>
> >>   return 0;
> >>   }
> >> --
> >> 2.25.1
> >>
> >
>

[ANNOUNCE] QEMU 8.1.0-rc1 is now available

2023-07-25 Thread Michael Roth

Hello,

On behalf of the QEMU Team, I'd like to announce the availability of the
second release candidate for the QEMU 8.1 release. This release is meant
for testing purposes and should not be used in a production environment.

  http://download.qemu.org/qemu-8.1.0-rc1.tar.xz
  http://download.qemu.org/qemu-8.1.0-rc1.tar.xz.sig

You can help improve the quality of the QEMU 8.1 release by testing this
release and reporting bugs using our GitLab issue tracker:

  https://gitlab.com/qemu-project/qemu/-/milestones/8#tab-issues

The release plan, as well a documented known issues for release
candidates, are available at:

  http://wiki.qemu.org/Planning/8.1

Please add entries to the ChangeLog for the 8.1 release below:

  http://wiki.qemu.org/ChangeLog/8.1

Thank you to everyone involved!

Changes since rc0:

6cb2011fed: Update version for v8.1.0-rc1 release (Peter Maydell)
ff62c21016: qapi: Correct "eg." to "e.g." in documentation (Markus Armbruster)
67d045a0ef: hw/pci: add comment to explain checking for available function 0 in 
pci hotplug (Ani Sinha)
8c0e8ed327: target/tricore: Rename tricore_feature (Bastian Koppelmann)
28cbbdd28e: hw/9pfs: spelling fixes (Michael Tokarev)
8b81968c1c: other architectures: spelling fixes (Michael Tokarev)
673d821541: arm: spelling fixes (Michael Tokarev)
cced0d6539: s390x: spelling fixes (Michael Tokarev)
d8b71d96b3: migration: spelling fixes (Michael Tokarev)
f8cfdd2038: target/tricore: Rename tricore_feature (Bastian Koppelmann)
ca4d5d862d: target/sparc: Handle FPRS correctly on big-endian hosts (Peter 
Maydell)
0fe4cac5dd: target/mips: Avoid shift by negative number in 
page_table_walk_refill() (Peter Maydell)
60a38a3a57: target/mips: Pass directory/leaf shift values to walk_directory() 
(Philippe Mathieu-Daudé)
fb51df0c8e: target/mips/mxu: Avoid overrun in gen_mxu_q8adde() (Philippe 
Mathieu-Daudé)
e37fdc7381: target/mips/mxu: Avoid overrun in gen_mxu_S32SLT() (Philippe 
Mathieu-Daudé)
d4eda549d2: target/mips/mxu: Replace magic array size by its definition 
(Philippe Mathieu-Daudé)
02388b5925: hw/char/escc: Implement loopback mode (Thomas Huth)
5fc1a68660: hw/mips: Improve the default USB settings in the loongson3-virt 
machine (Thomas Huth)
3b83079015: hw/sd/sdhci: Do not force sdhci_mmio_*_ops onto all SD controllers 
(Bernhard Beschow)
78cc90346e: tests/decode: Suppress "error: " string for expected-failure tests 
(Peter Maydell)
9b579543d7: For curses display, recognize a few more control keys (Sean 
Estabrooks)
5d78893f39: target/arm: Special case M-profile in debug_helper.c code (Peter 
Maydell)
f9540bb1b2: scripts/git-submodule.sh: Don't rely on non-POSIX 'read' behaviour 
(Peter Maydell)
c6445544d4: hw/arm/smmu: Handle big-endian hosts correctly (Peter Maydell)
bd39b7b5f3: tests/avocado/machine_s390_ccw_virtio: Skip the flaky virtio-gpu 
test by default (Thomas Huth)
c34ad45992: target/loongarch: Fix the CSRRD CPUID instruction on big endian 
hosts (Thomas Huth)
71a00a5bae: tests/avocado/migration: Remove the malfunctioning s390x tests 
(Thomas Huth)
241ab36c0a: tests/tcg/s390x: Test VCKSM (Ilya Leoshkevich)
e11e2fc6fb: tests/tcg/s390x: Test STPQ (Ilya Leoshkevich)
eacfe7cbbd: tests/tcg/s390x: Test MC (Ilya Leoshkevich)
f383b2f770: tests/tcg/s390x: Test ICM (Ilya Leoshkevich)
285a672d29: tests/tcg/s390x: Test CLM (Ilya Leoshkevich)
372886d2ae: tests/tcg/s390x: Test CLGEBR and CGEBRA (Ilya Leoshkevich)
f6044c994a: tests/tcg/s390x: Test CKSM (Ilya Leoshkevich)
ff537b0370: target/s390x: Fix assertion failure in VFMIN/VFMAX with type 13 
(Ilya Leoshkevich)
9c028c057a: target/s390x: Make MC raise specification exception when class >= 
16 (Ilya Leoshkevich)
a2025557ed: target/s390x: Fix ICM with M3=0 (Ilya Leoshkevich)
53684e344a: target/s390x: Fix CONVERT TO LOGICAL/FIXED with out-of-range inputs 
(Ilya Leoshkevich)
4b6e4c0b82: target/s390x: Fix CLM with M3=0 (Ilya Leoshkevich)
761b0aa938: target/s390x: Make CKSM raise an exception if R2 is odd (Ilya 
Leoshkevich)
32b120394c: accel/tcg: Fix type of 'last' for pageflags_{find,next} (Luca 
Bonissi)
8c605cf1d4: accel/tcg: Zero-pad vaddr in tlb_debug output (Anton Johansson)
22d2e5351a: tcg/{i386, s390x}: Add earlyclobber to the op_add2's first output 
(Ilya Leoshkevich)
2c8412d469: accel/tcg: Take mmap_lock in load_atomic*_or_exit (Richard 
Henderson)
f1ce0b8028: accel/tcg: Fix sense of read-only probes in ldst_atomicity (Richard 
Henderson)
990ef9182b: include/exec: Add WITH_MMAP_LOCK_GUARD (Richard Henderson)
736a1588c1: tcg/ppc: Fix race in goto_tb implementation (Jordan Niethe)
dcaaf2bf9b: roms/opensbi: Upgrade from v1.3 to v1.3.1 (Bin Meng)
4ea3fa99be: Revert "linux-user: Fix qemu-arm to run static armhf binaries" 
(Michael Tokarev)
bfe04d0a7d: nbd: Use enum for various negotiation modes (Eric Blake)
70fa99f445: nbd/client: Add safety check on chunk payload length (Eric Blake)
8cb98a725e: nbd/client: Simplify cookie vs. index computation (Eric Blake)
22efd81104: nbd: s/handle/cookie/ to match NBD spec (Eric

Re: [PATCH] vdpa: set old virtio status at cvq isolation probing end

2023-07-25 Thread Jason Wang

On Wed, Jul 26, 2023 at 2:21 AM Eugenio Pérez  wrote:
>
> The device already has a virtio status set by vhost_vdpa_init by the
> time vhost_vdpa_probe_cvq_isolation is called. vhost_vdpa_init set
> S_ACKNOWLEDGE and S_DRIVER, so it is invalid to just reset it.
>
> It is invalid to start the device after it, but all devices seems to be
> fine with it.  Fixing qemu so it follows virtio start procedure.
>
> Fixes: 152128d64697 ("vdpa: move CVQ isolation check to net_init_vhost_vdpa")
> Reported-by: Dragos Tatulea 
> Signed-off-by: Eugenio Pérez 
> ---
>  net/vhost-vdpa.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index 9795306742..d7e2b714b4 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -1333,6 +1333,8 @@ static int vhost_vdpa_probe_cvq_isolation(int 
> device_fd, uint64_t features,
>  out:
>  status = 0;
>  ioctl(device_fd, VHOST_VDPA_SET_STATUS, );
> +status = VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER;
> +ioctl(device_fd, VHOST_VDPA_SET_STATUS, );

So if we fail after FEATURES_OK, this basically clears that bit. Spec
doesn't say it can or not, I wonder if a reset is better?

Btw, spec requires a read of status after setting FEATURES_OK, this
seems to be missed in the current code.

Thanks

>  return r;
>  }
>
> --
> 2.39.3
>

Re: [PATCH v1 0/9] gfxstream + rutabaga_gfx

2023-07-25 Thread Gurchetan Singh

On Mon, Jul 24, 2023 at 2:56 AM Alyssa Ross  wrote:
>
> Gurchetan Singh  writes:
>
> > In terms of API stability/versioning/packaging, once this series is
> > reviewed, the plan is to cut a "gfxstream upstream release branch".  We
> > will have the same API guarantees as any other QEMU project then, i.e no
> > breaking API changes for 5 years.
>
> What about Rutabaga?

Yes, rutabaga + gfxstream will both be versioned and maintain API
backwards compatibility in line with QEMU guidelines.

[PATCH v2 5/8] hw/arm/virt: Unsupported host CPU model on TCG

2023-07-25 Thread Gavin Shan

The 'host' CPU model isn't supported until KVM or HVF is enabled.
For example, the following error messages are seen when the guest
is started with option '-cpu cortex-a8'.

  qemu-system-aarch64: Invalid CPU type: cortex-a8-arm-cpu
  The valid models are: cortex-a7, cortex-a15, cortex-a35,
  cortex-a55, cortex-a72, cortex-a76, a64fx, neoverse-n1,
  neoverse-v1, cortex-a53, cortex-a57, host, max

Hide 'host' CPU model until KVM or HVF is enabled.

Signed-off-by: Gavin Shan 
---
 hw/arm/virt.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index debd85614e..2562ca0c1e 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -217,7 +217,9 @@ static const char * const valid_cpu_types[] = {
 #endif
 ARM_CPU_TYPE_NAME("cortex-a53"),
 ARM_CPU_TYPE_NAME("cortex-a57"),
+#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
 ARM_CPU_TYPE_NAME("host"),
+#endif
 ARM_CPU_TYPE_NAME("max"),
 NULL
 };
@@ -236,7 +238,9 @@ static const char * const valid_cpu_models[] = {
 #endif
 "cortex-a53",
 "cortex-a57",
+#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
 "host",
+#endif
 "max",
 NULL
 };
-- 
2.41.0

[PATCH v2 3/8] machine: Print supported CPU models instead of typenames

2023-07-25 Thread Gavin Shan

The supported CPU models instead of typenames should be printed when
the user specified CPU type isn't supported in is_cpu_type_supported(),
to be consistent with the CPU model specified by user through '-cpu
' option.

Correct the error messages to print CPU models, maintained in the newly
added mc->valid_cpu_models because there is no fixed pattern for the
conversion between CPU model and typename. Besides, mc->valid_cpu_types
and mc->valid_cpu_models are further constified since we're here.

Signed-off-by: Gavin Shan 
---
 hw/core/machine.c   | 10 ++
 hw/m68k/q800.c  |  8 +++-
 include/hw/boards.h |  3 ++-
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index fe110e9b0a..858f8ede89 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -1362,6 +1362,8 @@ static void is_cpu_type_supported(MachineState *machine, 
Error **errp)
  * type is provided through '-cpu' option.
  */
 if (mc->valid_cpu_types && machine->cpu_type) {
+assert(mc->valid_cpu_models && mc->valid_cpu_models[0]);
+
 for (i = 0; mc->valid_cpu_types[i]; i++) {
 if (object_class_dynamic_cast(oc, mc->valid_cpu_types[i])) {
 break;
@@ -1371,10 +1373,10 @@ static void is_cpu_type_supported(MachineState 
*machine, Error **errp)
 /* The user specified CPU type isn't valid */
 if (!mc->valid_cpu_types[i]) {
 error_setg(errp, "Invalid CPU type: %s", machine->cpu_type);
-error_append_hint(errp, "The valid types are: %s",
-  mc->valid_cpu_types[0]);
-for (i = 1; mc->valid_cpu_types[i]; i++) {
-error_append_hint(errp, ", %s", mc->valid_cpu_types[i]);
+error_append_hint(errp, "The valid models are: %s",
+  mc->valid_cpu_models[0]);
+for (i = 1; mc->valid_cpu_models[i]; i++) {
+error_append_hint(errp, ", %s", mc->valid_cpu_models[i]);
 }
 error_append_hint(errp, "\n");
 
diff --git a/hw/m68k/q800.c b/hw/m68k/q800.c
index b770b71d54..1e360674a7 100644
--- a/hw/m68k/q800.c
+++ b/hw/m68k/q800.c
@@ -596,11 +596,16 @@ static GlobalProperty hw_compat_q800[] = {
 };
 static const size_t hw_compat_q800_len = G_N_ELEMENTS(hw_compat_q800);
 
-static const char *q800_machine_valid_cpu_types[] = {
+static const char * const q800_machine_valid_cpu_types[] = {
 M68K_CPU_TYPE_NAME("m68040"),
 NULL
 };
 
+static const char * const q800_machine_valid_cpu_models[] = {
+"m68040",
+NULL
+};
+
 static void q800_machine_class_init(ObjectClass *oc, void *data)
 {
 MachineClass *mc = MACHINE_CLASS(oc);
@@ -609,6 +614,7 @@ static void q800_machine_class_init(ObjectClass *oc, void 
*data)
 mc->init = q800_machine_init;
 mc->default_cpu_type = M68K_CPU_TYPE_NAME("m68040");
 mc->valid_cpu_types = q800_machine_valid_cpu_types;
+mc->valid_cpu_models = q800_machine_valid_cpu_models;
 mc->max_cpus = 1;
 mc->block_default_type = IF_SCSI;
 mc->default_ram_id = "m68k_mac.ram";
diff --git a/include/hw/boards.h b/include/hw/boards.h
index ed83360198..81747b0788 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -268,7 +268,8 @@ struct MachineClass {
 bool has_hotpluggable_cpus;
 bool ignore_memory_transaction_failures;
 int numa_mem_align_shift;
-const char **valid_cpu_types;
+const char * const *valid_cpu_types;
+const char * const *valid_cpu_models;
 strList *allowed_dynamic_sysbus_devices;
 bool auto_enable_numa_with_memhp;
 bool auto_enable_numa_with_memdev;
-- 
2.41.0

[PATCH v2 2/8] machine: Introduce helper is_cpu_type_supported()

2023-07-25 Thread Gavin Shan

The logic of checking if the specified CPU type is supported in
machine_run_board_init() is independent enough. Factor it out into
helper is_cpu_type_supported(). With this, machine_run_board_init()
looks a bit clean. Since we're here, @machine_class is renamed to
@mc to avoid multiple line spanning of code. The comments are tweaked
a bit either.

No functional change intended.

Signed-off-by: Gavin Shan 
---
 hw/core/machine.c | 82 +--
 1 file changed, 44 insertions(+), 38 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index d7e7f8f120..fe110e9b0a 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -1349,12 +1349,50 @@ out:
 return r;
 }
 
+static void is_cpu_type_supported(MachineState *machine, Error **errp)
+{
+MachineClass *mc = MACHINE_GET_CLASS(machine);
+ObjectClass *oc = object_class_by_name(machine->cpu_type);
+CPUClass *cc;
+int i;
+
+/*
+ * Check if the user specified CPU type is supported when the valid
+ * CPU types have been determined. Note that the user specified CPU
+ * type is provided through '-cpu' option.
+ */
+if (mc->valid_cpu_types && machine->cpu_type) {
+for (i = 0; mc->valid_cpu_types[i]; i++) {
+if (object_class_dynamic_cast(oc, mc->valid_cpu_types[i])) {
+break;
+}
+}
+
+/* The user specified CPU type isn't valid */
+if (!mc->valid_cpu_types[i]) {
+error_setg(errp, "Invalid CPU type: %s", machine->cpu_type);
+error_append_hint(errp, "The valid types are: %s",
+  mc->valid_cpu_types[0]);
+for (i = 1; mc->valid_cpu_types[i]; i++) {
+error_append_hint(errp, ", %s", mc->valid_cpu_types[i]);
+}
+error_append_hint(errp, "\n");
+
+return;
+}
+}
+
+/* Check if CPU type is deprecated and warn if so */
+cc = CPU_CLASS(oc);
+if (cc && cc->deprecation_note) {
+warn_report("CPU model %s is deprecated -- %s", machine->cpu_type,
+cc->deprecation_note);
+}
+}
 
 void machine_run_board_init(MachineState *machine, const char *mem_path, Error 
**errp)
 {
 MachineClass *machine_class = MACHINE_GET_CLASS(machine);
-ObjectClass *oc = object_class_by_name(machine->cpu_type);
-CPUClass *cc;
 Error *local_err = NULL;
 
 /* This checkpoint is required by replay to separate prior clock
@@ -1406,42 +1444,10 @@ void machine_run_board_init(MachineState *machine, 
const char *mem_path, Error *
 machine->ram = machine_consume_memdev(machine, machine->memdev);
 }
 
-/* If the machine supports the valid_cpu_types check and the user
- * specified a CPU with -cpu check here that the user CPU is supported.
- */
-if (machine_class->valid_cpu_types && machine->cpu_type) {
-int i;
-
-for (i = 0; machine_class->valid_cpu_types[i]; i++) {
-if (object_class_dynamic_cast(oc,
-  machine_class->valid_cpu_types[i])) {
-/* The user specificed CPU is in the valid field, we are
- * good to go.
- */
-break;
-}
-}
-
-if (!machine_class->valid_cpu_types[i]) {
-/* The user specified CPU is not valid */
-error_setg(_err, "Invalid CPU type: %s", machine->cpu_type);
-error_append_hint(_err, "The valid types are: %s",
-  machine_class->valid_cpu_types[0]);
-for (i = 1; machine_class->valid_cpu_types[i]; i++) {
-error_append_hint(_err, ", %s",
-  machine_class->valid_cpu_types[i]);
-}
-error_append_hint(_err, "\n");
-
-error_propagate(errp, local_err);
-}
-}
-
-/* Check if CPU type is deprecated and warn if so */
-cc = CPU_CLASS(oc);
-if (cc && cc->deprecation_note) {
-warn_report("CPU model %s is deprecated -- %s", machine->cpu_type,
-cc->deprecation_note);
+/* Check if the CPU type is supported */
+is_cpu_type_supported(machine, _err);
+if (local_err) {
+error_propagate(errp, local_err);
 }
 
 if (machine->cgs) {
-- 
2.41.0

[PATCH v2 7/8] hw/arm: Check CPU type in machine_run_board_init()

2023-07-25 Thread Gavin Shan

Set mc->valid_cpu_{types, models} so that the specified CPU type
can be checked in machine_run_board_init(). We needn't to do the
check by ourselves.

Signed-off-by: Gavin Shan 
---
 hw/arm/bananapi_m2u.c   | 18 +++--
 hw/arm/cubieboard.c | 18 +++--
 hw/arm/mps2-tz.c| 34 +--
 hw/arm/mps2.c   | 44 +++--
 hw/arm/msf2-som.c   | 18 +++--
 hw/arm/musca.c  | 19 +++---
 hw/arm/npcm7xx_boards.c | 19 +++---
 hw/arm/orangepi.c   | 18 +++--
 8 files changed, 138 insertions(+), 50 deletions(-)

diff --git a/hw/arm/bananapi_m2u.c b/hw/arm/bananapi_m2u.c
index 74121d8966..d6c9b90370 100644
--- a/hw/arm/bananapi_m2u.c
+++ b/hw/arm/bananapi_m2u.c
@@ -29,6 +29,16 @@
 
 static struct arm_boot_info bpim2u_binfo;
 
+static const char * const valid_cpu_types[] = {
+ARM_CPU_TYPE_NAME("cortex-a7"),
+NULL
+};
+
+static const char * const valid_cpu_models[] = {
+"cortex-a7",
+NULL
+};
+
 /*
  * R40 can boot from mmc0 and mmc2, and bpim2u has two mmc interface, one is
  * connected to sdcard and another mount an emmc media.
@@ -70,12 +80,6 @@ static void bpim2u_init(MachineState *machine)
 exit(1);
 }
 
-/* Only allow Cortex-A7 for this board */
-if (strcmp(machine->cpu_type, ARM_CPU_TYPE_NAME("cortex-a7")) != 0) {
-error_report("This board can only be used with cortex-a7 CPU");
-exit(1);
-}
-
 r40 = AW_R40(object_new(TYPE_AW_R40));
 object_property_add_child(OBJECT(machine), "soc", OBJECT(r40));
 object_unref(OBJECT(r40));
@@ -138,6 +142,8 @@ static void bpim2u_machine_init(MachineClass *mc)
 mc->max_cpus = AW_R40_NUM_CPUS;
 mc->default_cpus = AW_R40_NUM_CPUS;
 mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a7");
+mc->valid_cpu_types = valid_cpu_types;
+mc->valid_cpu_models = valid_cpu_models;
 mc->default_ram_size = 1 * GiB;
 mc->default_ram_id = "bpim2u.ram";
 }
diff --git a/hw/arm/cubieboard.c b/hw/arm/cubieboard.c
index 8c7fa91529..4a66a781a4 100644
--- a/hw/arm/cubieboard.c
+++ b/hw/arm/cubieboard.c
@@ -28,6 +28,16 @@ static struct arm_boot_info cubieboard_binfo = {
 .board_id = 0x1008,
 };
 
+static const char * const valid_cpu_types[] = {
+ARM_CPU_TYPE_NAME("cortex-a8"),
+NULL
+};
+
+static const char * const valid_cpu_models[] = {
+"cortex-a8",
+NULL
+};
+
 static void cubieboard_init(MachineState *machine)
 {
 AwA10State *a10;
@@ -51,12 +61,6 @@ static void cubieboard_init(MachineState *machine)
 exit(1);
 }
 
-/* Only allow Cortex-A8 for this board */
-if (strcmp(machine->cpu_type, ARM_CPU_TYPE_NAME("cortex-a8")) != 0) {
-error_report("This board can only be used with cortex-a8 CPU");
-exit(1);
-}
-
 a10 = AW_A10(object_new(TYPE_AW_A10));
 object_property_add_child(OBJECT(machine), "soc", OBJECT(a10));
 object_unref(OBJECT(a10));
@@ -115,6 +119,8 @@ static void cubieboard_machine_init(MachineClass *mc)
 {
 mc->desc = "cubietech cubieboard (Cortex-A8)";
 mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a8");
+mc->valid_cpu_types = valid_cpu_types;
+mc->valid_cpu_models = valid_cpu_models;
 mc->default_ram_size = 1 * GiB;
 mc->init = cubieboard_init;
 mc->block_default_type = IF_IDE;
diff --git a/hw/arm/mps2-tz.c b/hw/arm/mps2-tz.c
index 5873107302..7eba212659 100644
--- a/hw/arm/mps2-tz.c
+++ b/hw/arm/mps2-tz.c
@@ -183,6 +183,26 @@ OBJECT_DECLARE_TYPE(MPS2TZMachineState, 
MPS2TZMachineClass, MPS2TZ_MACHINE)
 #define MPS3_DDR_SIZE (2 * GiB)
 #endif
 
+static const char * const valid_cpu_types[] = {
+ARM_CPU_TYPE_NAME("cortex-m33"),
+NULL
+};
+
+static const char * const mps3tz_an547_valid_cpu_types[] = {
+ARM_CPU_TYPE_NAME("cortex-m55"),
+NULL
+};
+
+static const char * const valid_cpu_models[] = {
+"cortex-m33",
+NULL
+};
+
+static const char * const mps3tz_an547_valid_cpu_models[] = {
+"cortex-m55",
+NULL
+};
+
 static const uint32_t an505_oscclk[] = {
 4000,
 2458,
@@ -802,12 +822,6 @@ static void mps2tz_common_init(MachineState *machine)
 int num_ppcs;
 int i;
 
-if (strcmp(machine->cpu_type, mc->default_cpu_type) != 0) {
-error_report("This board can only be used with CPU %s",
- mc->default_cpu_type);
-exit(1);
-}
-
 if (machine->ram_size != mc->default_ram_size) {
 char *sz = size_to_str(mc->default_ram_size);
 error_report("Invalid RAM size, should be %s", sz);
@@ -1293,6 +1307,8 @@ static void mps2tz_an505_class_init(ObjectClass *oc, void 
*data)
 mc->max_cpus = mc->default_cpus;
 mmc->fpga_type = FPGA_AN505;
 mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-m33");
+mc->valid_cpu_types = valid_cpu_types;
+mc->valid_cpu_models = valid_cpu_models;
 mmc->scc_id = 0x41045050;
 mmc->sysclk_frq = 20 * 1000 *

[PATCH v2 1/8] machine: Use error handling when CPU type is checked

2023-07-25 Thread Gavin Shan

QEMU will be terminated if the specified CPU type isn't supported
in machine_run_board_init(). The list of supported CPU type is
maintained in mc->valid_cpu_types. The error handling can be used
to propagate error messages, to be consistent how the errors are
handled for other situations in the same function.

No functional change intended.

Suggested-by: Igor Mammedov 
Signed-off-by: Gavin Shan 
---
 hw/core/machine.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index f0d35c6401..d7e7f8f120 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -1355,6 +1355,7 @@ void machine_run_board_init(MachineState *machine, const 
char *mem_path, Error *
 MachineClass *machine_class = MACHINE_GET_CLASS(machine);
 ObjectClass *oc = object_class_by_name(machine->cpu_type);
 CPUClass *cc;
+Error *local_err = NULL;
 
 /* This checkpoint is required by replay to separate prior clock
reading from the other reads, because timer polling functions query
@@ -1423,15 +1424,16 @@ void machine_run_board_init(MachineState *machine, 
const char *mem_path, Error *
 
 if (!machine_class->valid_cpu_types[i]) {
 /* The user specified CPU is not valid */
-error_report("Invalid CPU type: %s", machine->cpu_type);
-error_printf("The valid types are: %s",
- machine_class->valid_cpu_types[0]);
+error_setg(_err, "Invalid CPU type: %s", machine->cpu_type);
+error_append_hint(_err, "The valid types are: %s",
+  machine_class->valid_cpu_types[0]);
 for (i = 1; machine_class->valid_cpu_types[i]; i++) {
-error_printf(", %s", machine_class->valid_cpu_types[i]);
+error_append_hint(_err, ", %s",
+  machine_class->valid_cpu_types[i]);
 }
-error_printf("\n");
+error_append_hint(_err, "\n");
 
-exit(1);
+error_propagate(errp, local_err);
 }
 }
 
-- 
2.41.0

[PATCH v2 6/8] hw/arm/sbsa-ref: Check CPU type in machine_run_board_init()

2023-07-25 Thread Gavin Shan

Set mc->valid_cpu_{types, models} so that the specified CPU type
can be checked in machine_run_board_init(). We needn't to do the
check by ourselves.

Signed-off-by: Gavin Shan 
---
 hw/arm/sbsa-ref.c | 29 -
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c
index bc89eb4806..66d171b745 100644
--- a/hw/arm/sbsa-ref.c
+++ b/hw/arm/sbsa-ref.c
@@ -149,25 +149,23 @@ static const int sbsa_ref_irqmap[] = {
 [SBSA_GWDT_WS0] = 16,
 };
 
-static const char * const valid_cpus[] = {
+static const char * const valid_cpu_types[] = {
 ARM_CPU_TYPE_NAME("cortex-a57"),
 ARM_CPU_TYPE_NAME("cortex-a72"),
 ARM_CPU_TYPE_NAME("neoverse-n1"),
 ARM_CPU_TYPE_NAME("neoverse-v1"),
 ARM_CPU_TYPE_NAME("max"),
+NULL,
 };
 
-static bool cpu_type_valid(const char *cpu)
-{
-int i;
-
-for (i = 0; i < ARRAY_SIZE(valid_cpus); i++) {
-if (strcmp(cpu, valid_cpus[i]) == 0) {
-return true;
-}
-}
-return false;
-}
+static const char * const valid_cpu_models[] = {
+"cortex-a57",
+"cortex-a72",
+"neoverse-n1",
+"neoverse-v1",
+"max",
+NULL,
+};
 
 static uint64_t sbsa_ref_cpu_mp_affinity(SBSAMachineState *sms, int idx)
 {
@@ -730,11 +728,6 @@ static void sbsa_ref_init(MachineState *machine)
 const CPUArchIdList *possible_cpus;
 int n, sbsa_max_cpus;
 
-if (!cpu_type_valid(machine->cpu_type)) {
-error_report("sbsa-ref: CPU type %s not supported", machine->cpu_type);
-exit(1);
-}
-
 if (kvm_enabled()) {
 error_report("sbsa-ref: KVM is not supported for this machine");
 exit(1);
@@ -899,6 +892,8 @@ static void sbsa_ref_class_init(ObjectClass *oc, void *data)
 mc->init = sbsa_ref_init;
 mc->desc = "QEMU 'SBSA Reference' ARM Virtual Machine";
 mc->default_cpu_type = ARM_CPU_TYPE_NAME("neoverse-n1");
+mc->valid_cpu_types = valid_cpu_types;
+mc->valid_cpu_models = valid_cpu_models;
 mc->max_cpus = 512;
 mc->pci_allow_0_address = true;
 mc->minimum_page_bits = 12;
-- 
2.41.0

[PATCH v2 8/8] hw/riscv/shakti_c: Check CPU type in machine_run_board_init()

2023-07-25 Thread Gavin Shan

Set mc->valid_cpu_{types, models} so that the specified CPU type
can be checked in machine_run_board_init(). We needn't to do the
check by ourselves.

Signed-off-by: Gavin Shan 
---
 hw/riscv/shakti_c.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/hw/riscv/shakti_c.c b/hw/riscv/shakti_c.c
index 12ea74b032..0bd59d47cd 100644
--- a/hw/riscv/shakti_c.c
+++ b/hw/riscv/shakti_c.c
@@ -28,6 +28,15 @@
 #include "exec/address-spaces.h"
 #include "hw/riscv/boot.h"
 
+static const char * const valid_cpu_types[] = {
+RISCV_CPU_TYPE_NAME("shakti-c"),
+NULL
+};
+
+static const char * const valid_cpu_models[] = {
+"shakti-c",
+NULL
+};
 
 static const struct MemmapEntry {
 hwaddr base;
@@ -47,12 +56,6 @@ static void shakti_c_machine_state_init(MachineState *mstate)
 ShaktiCMachineState *sms = RISCV_SHAKTI_MACHINE(mstate);
 MemoryRegion *system_memory = get_system_memory();
 
-/* Allow only Shakti C CPU for this platform */
-if (strcmp(mstate->cpu_type, TYPE_RISCV_CPU_SHAKTI_C) != 0) {
-error_report("This board can only be used with Shakti C CPU");
-exit(1);
-}
-
 /* Initialize SoC */
 object_initialize_child(OBJECT(mstate), "soc", >soc,
 TYPE_RISCV_SHAKTI_SOC);
@@ -85,6 +88,8 @@ static void shakti_c_machine_class_init(ObjectClass *klass, 
void *data)
 mc->desc = "RISC-V Board compatible with Shakti SDK";
 mc->init = shakti_c_machine_state_init;
 mc->default_cpu_type = TYPE_RISCV_CPU_SHAKTI_C;
+mc->valid_cpu_types = valid_cpu_types;
+mc->valid_cpu_models = valid_cpu_models;
 mc->default_ram_id = "riscv.shakti.c.ram";
 }
 
-- 
2.41.0

[PATCH v2 4/8] hw/arm/virt: Check CPU type in machine_run_board_init()

2023-07-25 Thread Gavin Shan

Set mc->valid_cpu_{types, models} so that the specified CPU type
can be checked in machine_run_board_init(). We needn't to do the
check by ourselves.

Signed-off-by: Gavin Shan 
---
 hw/arm/virt.c | 39 ++-
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 7d9dbc2663..debd85614e 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -203,7 +203,7 @@ static const int a15irqmap[] = {
 [VIRT_PLATFORM_BUS] = 112, /* ...to 112 + PLATFORM_BUS_NUM_IRQS -1 */
 };
 
-static const char *valid_cpus[] = {
+static const char * const valid_cpu_types[] = {
 #ifdef CONFIG_TCG
 ARM_CPU_TYPE_NAME("cortex-a7"),
 ARM_CPU_TYPE_NAME("cortex-a15"),
@@ -219,19 +219,27 @@ static const char *valid_cpus[] = {
 ARM_CPU_TYPE_NAME("cortex-a57"),
 ARM_CPU_TYPE_NAME("host"),
 ARM_CPU_TYPE_NAME("max"),
+NULL
 };
 
-static bool cpu_type_valid(const char *cpu)
-{
-int i;
-
-for (i = 0; i < ARRAY_SIZE(valid_cpus); i++) {
-if (strcmp(cpu, valid_cpus[i]) == 0) {
-return true;
-}
-}
-return false;
-}
+static const char * const valid_cpu_models[] = {
+#ifdef CONFIG_TCG
+"cortex-a7",
+"cortex-a15",
+"cortex-a35",
+"cortex-a55",
+"cortex-a72",
+"cortex-a76",
+"a64fx",
+"neoverse-n1",
+"neoverse-v1",
+#endif
+"cortex-a53",
+"cortex-a57",
+"host",
+"max",
+NULL
+};
 
 static void create_randomness(MachineState *ms, const char *node)
 {
@@ -2030,11 +2038,6 @@ static void machvirt_init(MachineState *machine)
 unsigned int smp_cpus = machine->smp.cpus;
 unsigned int max_cpus = machine->smp.max_cpus;
 
-if (!cpu_type_valid(machine->cpu_type)) {
-error_report("mach-virt: CPU type %s not supported", 
machine->cpu_type);
-exit(1);
-}
-
 possible_cpus = mc->possible_cpu_arch_ids(machine);
 
 /*
@@ -2953,6 +2956,8 @@ static void virt_machine_class_init(ObjectClass *oc, void 
*data)
 #else
 mc->default_cpu_type = ARM_CPU_TYPE_NAME("max");
 #endif
+mc->valid_cpu_types = valid_cpu_types;
+mc->valid_cpu_models = valid_cpu_models;
 mc->get_default_cpu_node_id = virt_get_default_cpu_node_id;
 mc->kvm_type = virt_kvm_type;
 assert(!mc->get_hotplug_handler);
-- 
2.41.0

[PATCH v2 0/8] machine: Unified CPU type check

2023-07-25 Thread Gavin Shan

There are two places where the user specified CPU type is checked to see
if it's supported or allowed by the board: machine_run_board_init() and
mc->init(). We don't have to maintain two duplicate sets of logic. This
series intends to move the check to machine_run_board_init().

PATCH[1-3] Improves the check in machine_run_board_init()
PATCH[4-8] Move the check mc->init() to machine_run_board_init()

v1: https://lists.nongnu.org/archive/html/qemu-arm/2023-07/msg00302.html

Testing
===

With the following command lines, the output messages are varied before
and after the series is applied.

  /home/gshan/sandbox/src/qemu/main/build/qemu-system-aarch64 \
  -accel tcg -machine virt,gic-version=3,nvdimm=on\
  -cpu cortex-a8 -smp maxcpus=2,cpus=1\
:

Before the series is applied:

  qemu-system-aarch64: mach-virt: CPU type cortex-a8-arm-cpu not supported

After the series is applied:

  qemu-system-aarch64: Invalid CPU type: cortex-a8-arm-cpu
  The valid models are: cortex-a7, cortex-a15, cortex-a35, cortex-a55,
cortex-a72, cortex-a76, a64fx, neoverse-n1,
neoverse-v1, cortex-a53, cortex-a57, max

Changelog
=
v2:
  * Constify mc->valid_cpu_types(Richard)
  * Print the supported CPU models, instead of typenames(Peter)
  * Misc improvements for the hleper to do the check(Igor)
  * More patches to move the check  (Marcin)

Gavin Shan (8):
  machine: Use error handling when CPU type is checked
  machine: Introduce helper is_cpu_type_supported()
  machine: Print supported CPU models instead of typenames
  hw/arm/virt: Check CPU type in machine_run_board_init()
  hw/arm/virt: Unsupported host CPU model on TCG
  hw/arm/sbsa-ref: Check CPU type in machine_run_board_init()
  hw/arm: Check CPU type in machine_run_board_init()
  hw/riscv/shakti_c: Check CPU type in machine_run_board_init()

 hw/arm/bananapi_m2u.c   | 18 ++---
 hw/arm/cubieboard.c | 18 ++---
 hw/arm/mps2-tz.c| 34 ++---
 hw/arm/mps2.c   | 44 ++---
 hw/arm/msf2-som.c   | 18 ++---
 hw/arm/musca.c  | 19 ++
 hw/arm/npcm7xx_boards.c | 19 ++
 hw/arm/orangepi.c   | 18 ++---
 hw/arm/sbsa-ref.c   | 29 ++
 hw/arm/virt.c   | 43 -
 hw/core/machine.c   | 84 +++--
 hw/m68k/q800.c  |  8 +++-
 hw/riscv/shakti_c.c | 17 ++---
 include/hw/boards.h |  3 +-
 14 files changed, 243 insertions(+), 129 deletions(-)

-- 
2.41.0

Re: [PATCH for-8.2 1/3] target/arm: Do all "ARM_FEATURE_X implies Y" checks in post_init

2023-07-25 Thread Richard Henderson


On 7/24/23 10:43, Peter Maydell wrote:

Where architecturally one ARM_FEATURE_X flag implies another
ARM_FEATURE_Y, we allow the CPU init function to only set X, and then
set Y for it.  Currently we do this in two places -- we set a few
flags in arm_cpu_post_init() because we need them to decide which
properties to create on the CPU object, and then we do the rest in
arm_cpu_realizefn().  However, this is fragile, because it's easy to
add a new property and not notice that this means that an X-implies-Y
check now has to move from realize to post-init.

As a specific example, the pmsav7-dregion property is conditional
on ARM_FEATURE_PMSA && ARM_FEATURE_V7, which means it won't appear
on the Cortex-M33 and -M55, because they set ARM_FEATURE_V8 and
rely on V8-implies-V7, which doesn't happen until the realizefn.

Move all of these X-implies-Y checks into a new function, which
we call at the top of arm_cpu_post_init(), so the feature bits
are available at that point.

This does now give us the reverse issue, that if there's a feature
bit which is enabled or disabled by the setting of a property then
then X-implies-Y features that are dependent on that property need to
be in realize, not in this new function.  But the only one of those
is the "EL3 implies VBAR" which is already in the right place, so
putting things this way round seems better to me.

Signed-off-by: Peter Maydell 
---
  target/arm/cpu.c | 176 +--
  1 file changed, 94 insertions(+), 82 deletions(-)

diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 69e2bde3c2d..58301c4b7d8 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1356,17 +1356,105 @@ unsigned int gt_cntfrq_period_ns(ARMCPU *cpu)
NANOSECONDS_PER_SECOND / cpu->gt_cntfrq_hz : 1;
  }
  
+static void arm_cpu_propagate_feature_implications(ARMCPU *cpu)

+{
+CPUARMState *env = >env;
+bool no_aa32 = false;
+/*
+ * Some features automatically imply others: set the feature


Spacing after local vars.


+if (arm_feature(env, ARM_FEATURE_V7VE)) {
+/* v7 Virtualization Extensions. In real hardware this implies


Should fix the comment formatting.

Otherwise,
Reviewed-by: Richard Henderson 

I thought I had tried this myself at some point, and ran into a problem.  But I can't 
recall the specifics now.



r~

Re: [RFC PATCH] vfio-user: add live migration to vfio-user protocol specification

2023-07-25 Thread Stefan Hajnoczi

On Tue, 18 Jul 2023 at 05:42, William Henderson
 wrote:
>
> This patch adds live migration to the vfio-user specification, based on the 
> new
> VFIO migration interface introduced in the kernel here:
>
> https://lore.kernel.org/all/20220224142024.147653-10-yish...@nvidia.com/

Hi,
This is not an in-depth review, but here is what I have:

>
> We differ from the VFIO protocol in that, while VFIO transfers migration data
> using a file descriptor, we simply use the already-established vfio-user 
> socket
> with two additional commands, VFIO_USER_MIG_DATA_READ and
> VFIO_USER_MIG_DATA_WRITE, which have stream semantics.

Transferring migration data over a separate fd eliminates the risk of
blocking the vfio-user socket and might make zero-copy easier. Are you
sure you want to transfer migration data over the vfio-user socket?

> We also don't use P2P
> states as we don't yet have a use-case for them, although this may change in 
> the
> future.
>
> This patch should be applied on the previous pending patch which introduces
> the vfio-user protocol:
>
> https://lists.nongnu.org/archive/html/qemu-devel/2023-06/msg06567.html
> Signed-off-by: William Henderson 
> ---
>  docs/devel/vfio-user.rst | 413 +--
>  1 file changed, 396 insertions(+), 17 deletions(-)
>
> diff --git a/docs/devel/vfio-user.rst b/docs/devel/vfio-user.rst
> index 0d96477a68..f433579db0 100644
> --- a/docs/devel/vfio-user.rst
> +++ b/docs/devel/vfio-user.rst
> @@ -4,7 +4,7 @@ vfio-user Protocol Specification
>  
>
>  --
> -Version_ 0.9.1
> +Version_ 0.9.2
>  --
>
>  .. contents:: Table of Contents
> @@ -366,6 +366,9 @@ NameCommand
> Request Direction
>  ``VFIO_USER_DMA_WRITE`` 12 server -> client
>  ``VFIO_USER_DEVICE_RESET``  13 client -> server
>  ``VFIO_USER_REGION_WRITE_MULTI``15 client -> server
> +``VFIO_USER_DEVICE_FEATURE``16 client -> server
> +``VFIO_USER_MIG_DATA_READ`` 17 client -> server
> +``VFIO_USER_MIG_DATA_WRITE``18 client -> server
>  ==  =  =
>
>  Header
> @@ -508,26 +511,10 @@ Capabilities:
>  || | valid simultaneously.  Optional, with a 
>|
>  || | value of 65535 (64k-1). 
>|
>  
> ++-++
> -| migration  | object  | Migration capability parameters. If missing 
>|
> -|| | then migration is not supported by the 
> sender. |
> -++-++
>  | write_multiple | boolean | ``VFIO_USER_REGION_WRITE_MULTI`` messages   
>|
>  || | are supported if the value is ``true``. 
>|
>  
> ++-++
>
> -The migration capability contains the following name/value pairs:
> -
> -+-++--+
> -| Name| Type   | Description 
>  |
> -+=++==+
> -| pgsize  | number | Page size of dirty pages bitmap. The smallest   
>  |
> -| || between the client and the server is used.  
>  |
> -+-++--+
> -| max_bitmap_size | number | Maximum bitmap size in 
> ``VFIO_USER_DIRTY_PAGES`` |
> -| || and ``VFIO_DMA_UNMAP`` messages.  Optional, 
>  |
> -| || with a default value of 256MB.  
>  |
> -+-++--+
> -

Why are existing spec features being deleted? Are you sure there are
no existing implementations of the old migration interface and there
is no need to keep the spec backwards compatible?

>  Reply
>  ^
>
> @@ -1468,6 +1455,398 @@ Reply
>
>  * *wr_cnt* is the number of device writes completed.
>
> +``VFIO_USER_DEVICE_FEATURE``
> +
> +
> +This command is analogous to ``VFIO_DEVICE_FEATURE``. It is used to get, 
> set, or
> +probe feature data of the device.
> +
> +Request
> +^^^
> +
> +The request payload for this message is a structure of the following format.
> +
> ++---+++
> +| Name  | Offset | Size   |
> ++===+++
> +| argsz | 0  | 4  |
> ++---+++
> +| flags | 4  | 4  |
>

Re: [PATCH 2/2] block/blkio: use blkio_set_int("fd") to check fd support

2023-07-25 Thread Stefan Hajnoczi

On Mon, Jul 24, 2023 at 05:46:11PM +0200, Stefano Garzarella wrote:
> The way the virtio-blk driver is implemented in libblkio,
> it's much easier to use blkio_set_int() instead of blkio_get_int()
> and have it fail right away to see if `fd` is supported by the
> transport. See https://gitlab.com/libblkio/libblkio/-/merge_requests/208

The commit description is vague about what's going on here. My
understanding is:

  Setting the `fd` property fails with virtio-blk-* libblkio drivers
  that do not support fd passing since
  https://gitlab.com/libblkio/libblkio/-/merge_requests/208.

  Getting the `fd` property, on the other hand, always succeeds for
  virtio-blk-* libblkio drivers even when they don't support fd passing.

  This patch switches to setting the `fd` property because it is a
  better mechanism for probing fd passing support than getting the `fd`
  property.

Please update the commit description. Thanks!

> 
> Signed-off-by: Stefano Garzarella 
> ---
>  block/blkio.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/block/blkio.c b/block/blkio.c
> index ca1149042a..719b19324b 100644
> --- a/block/blkio.c
> +++ b/block/blkio.c
> @@ -665,7 +665,7 @@ static int blkio_virtio_blk_common_open(BlockDriverState 
> *bs,
>  const char *blkio_driver = bs->drv->protocol_name;
>  BDRVBlkioState *s = bs->opaque;
>  bool fd_supported = false;
> -int fd, ret;
> +int ret;
>  
>  if (!path) {
>  error_setg(errp, "missing 'path' option");
> @@ -678,7 +678,7 @@ static int blkio_virtio_blk_common_open(BlockDriverState 
> *bs,
>  }
>  
>  if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0 &&
> -blkio_get_int(s->blkio, "fd", ) == 0) {
> +blkio_set_int(s->blkio, "fd", -1) == 0) {
>  fd_supported = true;
>  }
>  
> @@ -688,7 +688,7 @@ static int blkio_virtio_blk_common_open(BlockDriverState 
> *bs,
>   * layer through the "/dev/fdset/N" special path.
>   */
>  if (fd_supported) {
> -int open_flags;
> +int open_flags, fd;
>  
>  if (flags & BDRV_O_RDWR) {
>  open_flags = O_RDWR;
> -- 
> 2.41.0
> 


signature.asc
Description: PGP signature

Re: [PATCH 1/2] block/blkio: fix opening virtio-blk drivers

2023-07-25 Thread Stefan Hajnoczi

On Mon, Jul 24, 2023 at 05:46:10PM +0200, Stefano Garzarella wrote:
> libblkio 1.3.0 added support of "fd" property for virtio-blk-vhost-vdpa
> driver. In QEMU, starting from commit cad2ccc395 ("block/blkio: use
> qemu_open() to support fd passing for virtio-blk") we are using
> `blkio_get_int(..., "fd")` to check if the "fd" property is supported
> for all the virtio-blk-* driver.
> 
> Unfortunately that property is also available for those driver that do
> not support it, such as virtio-blk-vhost-user. Indeed now QEMU is
> failing if used with virtio-blk-vhost-user in this way:
> 
>-blockdev 
> node-name=drive0,driver=virtio-blk-vhost-user,path=vhost-user-blk.sock,cache.direct=on:
>  Could not open 'vhost-user-blk.sock': No such device or address
> 
> So, `blkio_get_int()` is not enough to check whether the driver supports
> the `fd` property or not. This is because the virito-blk common libblkio
> driver only checks whether or not `fd` is set during `blkio_connect()`
> and fails for those transports that do not support it (all except
> vhost-vdpa for now).
> 
> So for now let's also check that the driver is virtio-blk-vhost-vdpa,
> since that's the only one that supports it.

What happens when more virtio-blk-* libblkio drivers gain support for
`fd`? I think we'll be back to the same problem because QEMU will be
unable to distinguish between old and new libraries.

How about retrying with `path` if opening with `fd` fails?

> 
> Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for 
> virtio-blk")
> Reported-by: Qing Wang 
> Signed-off-by: Stefano Garzarella 
> ---
>  block/blkio.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/block/blkio.c b/block/blkio.c
> index 1798648134..ca1149042a 100644
> --- a/block/blkio.c
> +++ b/block/blkio.c
> @@ -662,6 +662,7 @@ static int blkio_virtio_blk_common_open(BlockDriverState 
> *bs,
>  QDict *options, int flags, Error **errp)
>  {
>  const char *path = qdict_get_try_str(options, "path");
> +const char *blkio_driver = bs->drv->protocol_name;
>  BDRVBlkioState *s = bs->opaque;
>  bool fd_supported = false;
>  int fd, ret;
> @@ -676,7 +677,8 @@ static int blkio_virtio_blk_common_open(BlockDriverState 
> *bs,
>  return -EINVAL;
>  }
>  
> -if (blkio_get_int(s->blkio, "fd", ) == 0) {
> +if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0 &&
> +blkio_get_int(s->blkio, "fd", ) == 0) {
>  fd_supported = true;
>  }
>  
> -- 
> 2.41.0
> 


signature.asc
Description: PGP signature

Re: [PATCH v9 06/10] migration: New migrate and migrate-incoming argument 'channels'

2023-07-25 Thread Het Gala

Sorry, last reply on this patch was accidently replied only to Daniel. 
Pasting the reply again so it is received by all the active maintianers 
here. Apologies for the error 


On 26/07/23 12:07 am, Daniel P. Berrangé wrote:

On Tue, Jul 25, 2023 at 07:34:09PM +0100, Daniel P. Berrangé wrote:

On Fri, Jul 21, 2023 at 02:49:31PM +, Het Gala wrote:

MigrateChannelList allows to connect accross multiple interfaces.
Add MigrateChannelList struct as argument to migration QAPIs.

We plan to include multiple channels in future, to connnect
multiple interfaces. Hence, we choose 'MigrateChannelList'
as the new argument over 'MigrateChannel' to make migration
QAPIs future proof.

Suggested-by: Aravind Retnakaran 
Signed-off-by: Het Gala 
Acked-by: Markus Armbruster 
---
  migration/migration-hmp-cmds.c |   6 +-
  migration/migration.c  |  34 --
  qapi/migration.json| 109 -
  softmmu/vl.c   |   2 +-
  4 files changed, 139 insertions(+), 12 deletions(-)

diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index 9885d7c9f7..49b150f33f 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -424,7 +424,7 @@ void hmp_migrate_incoming(Monitor *mon, const QDict *qdict)
  Error *err = NULL;
  const char *uri = qdict_get_str(qdict, "uri");
  
-qmp_migrate_incoming(uri, );

+qmp_migrate_incoming(uri, false, NULL, );
  
  hmp_handle_error(mon, err);

  }
@@ -705,8 +705,8 @@ void hmp_migrate(Monitor *mon, const QDict *qdict)
  const char *uri = qdict_get_str(qdict, "uri");
  Error *err = NULL;
  
-qmp_migrate(uri, !!blk, blk, !!inc, inc,

-false, false, true, resume, );
+qmp_migrate(uri, false, NULL, !!blk, blk, !!inc, inc,
+ false, false, true, resume, );
  if (hmp_handle_error(mon, err)) {
  return;
  }
diff --git a/migration/migration.c b/migration/migration.c
index f37b388876..bd3a93fc8c 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -466,10 +466,22 @@ static bool migrate_uri_parse(const char *uri,
  return true;
  }
  
-static void qemu_start_incoming_migration(const char *uri, Error **errp)

+static void qemu_start_incoming_migration(const char *uri, bool has_channels,
+  MigrationChannelList *channels,
+  Error **errp)
  {
  g_autoptr(MigrationAddress) channel = g_new0(MigrationAddress, 1);
  
+/*

+ * Having preliminary checks for uri and channel
+ */
+if (uri && has_channels) {
+error_setg(errp, "'uri' and 'channels' arguments are mutually "
+   "exclusive; exactly one of the two should be present in "
+   "'migrate-incoming' qmp command ");
+return;
+}

This checks is both are present.

Also needs a check if neither are present as that's invalid.

Also it should (temporarily) raise an error if "has_channels" is
set, as while we've added the parameter in QAPI, we've not
implemented it yet. IOW, raise an error now, and remove the
error in a later patch.
Ack. So in total there should be 3 checks right. 1) if 'has_channels' is 
set, 2) if 'uri' and 'channels' both are present, 3) if 'uri' and 
'channels' both are absent. Basically right now only uri should allowed 
and should atleast be present.
I think overall only 1) would be enough and should be checked before 
'migration_channels_and_uri_compatible()' and if 'has_channels' is set, 
just return for now. With this 2) would not be necessary or not come 
into play in this patch. 3) will be taken care by 
'migration_channels_and_uri_compatible()' itself IMO.

Let me know if I am missing something here.



@@ -1694,6 +1708,16 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
  MigrationState *s = migrate_get_current();
  g_autoptr(MigrationAddress) channel = g_new0(MigrationAddress, 1);
  
+/*

+ * Having preliminary checks for uri and channel
+ */
+if (uri && has_channels) {
+error_setg(errp, "'uri' and 'channels' arguments are mutually "
+   "exclusive; exactly one of the two should be present in "
+   "'migrate' qmp command ");
+return;
+}

Same here


With regards,
Daniel
--
|: 
https://urldefense.proofpoint.com/v2/url?u=https-3A__berrange.com=DwIDaQ=s883GpUCOChKOHiocYtGcg=-qwZZzrw4EKSsq0BK7MBd3wW1WEpXmJeng3ZUT5uBCg=i0lmmIrs7N4r3uqLYCdRVXLFaEavt77Ltkec0hIlE4aUQITo9-8povsDHWELv-vE=AQ1C7WPg2jLYjNXU29Xw7trQcjmB96Yy3-God3-UaIQ=
   -o-
https://urldefense.proofpoint.com/v2/url?u=https-3A__www.flickr.com_photos_dberrange=DwIDaQ=s883GpUCOChKOHiocYtGcg=-qwZZzrw4EKSsq0BK7MBd3wW1WEpXmJeng3ZUT5uBCg=i0lmmIrs7N4r3uqLYCdRVXLFaEavt77Ltkec0hIlE4aUQITo9-8povsDHWELv-vE=aiGUx76ySVL-epTmaFIZUyZbkzeXGedVaXGvFw4xcgo=
  :|
|:

Re: [PATCH v9 09/10] migration: Implement MigrateChannelList to hmp migration flow.

2023-07-25 Thread Het Gala




On 26/07/23 12:45 am, Daniel P. Berrangé wrote:

On Fri, Jul 21, 2023 at 02:49:35PM +, Het Gala wrote:

Integrate MigrateChannelList with all transport backends
(socket, exec and rdma) for both src and dest migration
endpoints for hmp migration.

Suggested-by: Aravind Retnakaran 
Signed-off-by: Het Gala 
---
  migration/migration-hmp-cmds.c | 16 +---
  migration/migration.c  |  5 ++---
  migration/migration.h  |  3 ++-
  3 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index 49b150f33f..25f51ec99c 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -423,10 +423,14 @@ void hmp_migrate_incoming(Monitor *mon, const QDict 
*qdict)
  {
  Error *err = NULL;
  const char *uri = qdict_get_str(qdict, "uri");
+MigrationChannelList *caps = NULL;
+g_autoptr(MigrationChannel) channel = g_new0(MigrationChannel, 1);
  
-qmp_migrate_incoming(uri, false, NULL, );

+migrate_uri_parse(uri, , );
+QAPI_LIST_PREPEND(caps, channel);
  
-hmp_handle_error(mon, err);

+qmp_migrate_incoming(NULL, true, caps, );
+qapi_free_MigrationChannelList(caps);

IIRC, you still need the hmp_handle_error call to print any
error message.
Yes, sorry, I missed that out while adding the statements. Will add 
hmp_handle_error call here.

  }
  
  void hmp_migrate_recover(Monitor *mon, const QDict *qdict)

@@ -704,9 +708,15 @@ void hmp_migrate(Monitor *mon, const QDict *qdict)
  bool resume = qdict_get_try_bool(qdict, "resume", false);
  const char *uri = qdict_get_str(qdict, "uri");
  Error *err = NULL;
+MigrationChannelList *caps = NULL;
+g_autoptr(MigrationChannel) channel = g_new0(MigrationChannel, 1);
+
+migrate_uri_parse(uri, , );
+QAPI_LIST_PREPEND(caps, channel);
  
-qmp_migrate(uri, false, NULL, !!blk, blk, !!inc, inc,

+qmp_migrate(NULL, true, caps, !!blk, blk, !!inc, inc,
   false, false, true, resume, );
+qapi_free_MigrationChannelList(caps);
  if (hmp_handle_error(mon, err)) {
  return;
  }
diff --git a/migration/migration.c b/migration/migration.c
index acf80b3590..cf063a76df 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -425,9 +425,8 @@ void migrate_add_address(SocketAddress *address)
QAPI_CLONE(SocketAddress, address));
  }
  
-static bool migrate_uri_parse(const char *uri,

-  MigrationChannel **channel,
-  Error **errp)
+bool migrate_uri_parse(const char *uri, MigrationChannel **channel,
+   Error **errp)
  {
  g_autoptr(MigrationChannel) val = g_new0(MigrationChannel, 1);
  g_autoptr(MigrationAddress) addr = g_new0(MigrationAddress, 1);
diff --git a/migration/migration.h b/migration/migration.h
index b7c8b67542..a8268394ca 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -501,7 +501,8 @@ bool check_dirty_bitmap_mig_alias_map(const 
BitmapMigrationNodeAliasList *bbm,
Error **errp);
  
  void migrate_add_address(SocketAddress *address);

-
+bool migrate_uri_parse(const char *uri, MigrationChannel **channel,
+   Error **errp);
  int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque);
  
  #define qemu_ram_foreach_block \

--
2.22.3


With regards,
Daniel

Regards,
Het Gala

Re: [PATCH v9 08/10] migration: Implement MigrateChannelList to qmp migration flow.

2023-07-25 Thread Het Gala




On 26/07/23 12:08 am, Daniel P. Berrangé wrote:

On Fri, Jul 21, 2023 at 02:49:34PM +, Het Gala wrote:

Integrate MigrateChannelList with all transport backends
(socket, exec and rdma) for both src and dest migration
endpoints for qmp migration.

For current series, limit the size of MigrateChannelList
to single element (single interface) as runtime check.

Suggested-by: Aravind Retnakaran 
Signed-off-by: Het Gala 
---
  migration/migration.c | 77 ---
  1 file changed, 50 insertions(+), 27 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 6e0a8beaf2..acf80b3590 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -426,9 +426,10 @@ void migrate_add_address(SocketAddress *address)
  }
  
  static bool migrate_uri_parse(const char *uri,

-  MigrationAddress **channel,
+  MigrationChannel **channel,
Error **errp)
  {
+g_autoptr(MigrationChannel) val = g_new0(MigrationChannel, 1);
  g_autoptr(MigrationAddress) addr = g_new0(MigrationAddress, 1);
  SocketAddress *saddr = >u.socket;
  InetSocketAddress *isock = >u.rdma;
@@ -465,7 +466,9 @@ static bool migrate_uri_parse(const char *uri,
  return false;
  }
  
-*channel = addr;

+val->channel_type = MIGRATION_CHANNEL_TYPE_MAIN;
+val->addr = addr;
+*channel = val;
  return true;
  }
  
@@ -473,7 +476,8 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels,

MigrationChannelList *channels,
Error **errp)
  {
-g_autoptr(MigrationAddress) channel = g_new0(MigrationAddress, 1);
+g_autoptr(MigrationChannel) channel = g_new0(MigrationChannel, 1);
+g_autoptr(MigrationAddress) addr = g_new0(MigrationAddress, 1);
  
  /*

   * Having preliminary checks for uri and channel
@@ -483,20 +487,29 @@ static void qemu_start_incoming_migration(const char 
*uri, bool has_channels,
 "exclusive; exactly one of the two should be present in "
 "'migrate-incoming' qmp command ");
  return;
+} else if (channels) {
+/* To verify that Migrate channel list has only item */
+if (channels->next) {
+error_setg(errp, "Channel list has more than one entries");
+return;
+}
+channel = channels->value;
+} else {
+/* caller uses the old URI syntax */
+if (uri && !migrate_uri_parse(uri, , errp)) {
+return;
+}
  }
-
-if (uri && !migrate_uri_parse(uri, , errp)) {
-return;
-}
+addr = channel->addr;
  
  /* transport mechanism not suitable for migration? */

-if (!migration_channels_and_transport_compatible(channel, errp)) {
+if (!migration_channels_and_transport_compatible(addr, errp)) {
  return;
  }
  
  qapi_event_send_migration(MIGRATION_STATUS_SETUP);

-if (channel->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
-SocketAddress *saddr = >u.socket;
+if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
+SocketAddress *saddr = >u.socket;
  if (saddr->type == SOCKET_ADDRESS_TYPE_INET ||
  saddr->type == SOCKET_ADDRESS_TYPE_UNIX ||
  saddr->type == SOCKET_ADDRESS_TYPE_VSOCK) {
@@ -505,11 +518,11 @@ static void qemu_start_incoming_migration(const char 
*uri, bool has_channels,
  fd_start_incoming_migration(saddr->u.fd.str, errp);
  }
  #ifdef CONFIG_RDMA
-} else if (channel->transport == MIGRATION_ADDRESS_TYPE_RDMA) {
-rdma_start_incoming_migration(>u.rdma, errp);
-#endif
-} else if (channel->transport == MIGRATION_ADDRESS_TYPE_EXEC) {
-exec_start_incoming_migration(channel->u.exec.args, errp);
+} else if (addr->transport == MIGRATION_ADDRESS_TYPE_RDMA) {
+rdma_start_incoming_migration(>u.rdma, errp);
+ #endif
+} else if (addr->transport == MIGRATION_ADDRESS_TYPE_EXEC) {
+exec_start_incoming_migration(addr->u.exec.args, errp);
  } else {
  error_setg(errp, "unknown migration protocol: %s", uri);
  }
@@ -1709,7 +1722,8 @@ void qmp_migrate(const char *uri, bool has_channels,
  bool resume_requested;
  Error *local_err = NULL;
  MigrationState *s = migrate_get_current();
-g_autoptr(MigrationAddress) channel = g_new0(MigrationAddress, 1);
+g_autoptr(MigrationChannel) channel = g_new0(MigrationChannel, 1);
+g_autoptr(MigrationAddress) addr = g_new0(MigrationAddress, 1);
  
  /*

   * Having preliminary checks for uri and channel
@@ -1719,14 +1733,23 @@ void qmp_migrate(const char *uri, bool has_channels,
 "exclusive; exactly one of the two should be present in "
 "'migrate' qmp command ");
  return;
+} else if (channels) {
+/* To

Re: [PATCH] block/blkio: enable the completion eventfd

2023-07-25 Thread Stefan Hajnoczi

On Tue, Jul 25, 2023 at 12:37:44PM +0200, Stefano Garzarella wrote:
> Until libblkio 1.3.0, virtio-blk drivers had completion eventfd
> notifications enabled from the start, but from the next releases
> this is no longer the case, so we have to explicitly enable them.
> 
> In fact, the libblkio documentation says they could be disabled,
> so we should always enable them at the start if we want to be
> sure to get completion eventfd notifications:
> 
> By default, the driver might not generate completion events for
> requests so it is necessary to explicitly enable the completion
> file descriptor before use:
> 
> void blkioq_set_completion_fd_enabled(struct blkioq *q, bool enable);
> 
> I discovered this while trying a development version of libblkio:
> the guest kernel hangs during boot, while probing the device.
> 
> Fixes: fd66dbd424f5 ("blkio: add libblkio block driver")
> Signed-off-by: Stefano Garzarella 
> ---
>  block/blkio.c | 1 +
>  1 file changed, 1 insertion(+)

Thanks, applied to my block tree:
https://gitlab.com/stefanha/qemu/commits/block

Stefan


signature.asc
Description: PGP signature

Re: [PATCH v9 06/10] migration: New migrate and migrate-incoming argument 'channels'

2023-07-25 Thread Het Gala




On 26/07/23 12:07 am, Daniel P. Berrangé wrote:

On Tue, Jul 25, 2023 at 07:34:09PM +0100, Daniel P. Berrangé wrote:

On Fri, Jul 21, 2023 at 02:49:31PM +, Het Gala wrote:

MigrateChannelList allows to connect accross multiple interfaces.
Add MigrateChannelList struct as argument to migration QAPIs.

We plan to include multiple channels in future, to connnect
multiple interfaces. Hence, we choose 'MigrateChannelList'
as the new argument over 'MigrateChannel' to make migration
QAPIs future proof.

Suggested-by: Aravind Retnakaran 
Signed-off-by: Het Gala 
Acked-by: Markus Armbruster 
---
  migration/migration-hmp-cmds.c |   6 +-
  migration/migration.c  |  34 --
  qapi/migration.json| 109 -
  softmmu/vl.c   |   2 +-
  4 files changed, 139 insertions(+), 12 deletions(-)

diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index 9885d7c9f7..49b150f33f 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -424,7 +424,7 @@ void hmp_migrate_incoming(Monitor *mon, const QDict *qdict)
  Error *err = NULL;
  const char *uri = qdict_get_str(qdict, "uri");
  
-qmp_migrate_incoming(uri, );

+qmp_migrate_incoming(uri, false, NULL, );
  
  hmp_handle_error(mon, err);

  }
@@ -705,8 +705,8 @@ void hmp_migrate(Monitor *mon, const QDict *qdict)
  const char *uri = qdict_get_str(qdict, "uri");
  Error *err = NULL;
  
-qmp_migrate(uri, !!blk, blk, !!inc, inc,

-false, false, true, resume, );
+qmp_migrate(uri, false, NULL, !!blk, blk, !!inc, inc,
+ false, false, true, resume, );
  if (hmp_handle_error(mon, err)) {
  return;
  }
diff --git a/migration/migration.c b/migration/migration.c
index f37b388876..bd3a93fc8c 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -466,10 +466,22 @@ static bool migrate_uri_parse(const char *uri,
  return true;
  }
  
-static void qemu_start_incoming_migration(const char *uri, Error **errp)

+static void qemu_start_incoming_migration(const char *uri, bool has_channels,
+  MigrationChannelList *channels,
+  Error **errp)
  {
  g_autoptr(MigrationAddress) channel = g_new0(MigrationAddress, 1);
  
+/*

+ * Having preliminary checks for uri and channel
+ */
+if (uri && has_channels) {
+error_setg(errp, "'uri' and 'channels' arguments are mutually "
+   "exclusive; exactly one of the two should be present in "
+   "'migrate-incoming' qmp command ");
+return;
+}

This checks is both are present.

Also needs a check if neither are present as that's invalid.

Also it should (temporarily) raise an error if "has_channels" is
set, as while we've added the parameter in QAPI, we've not
implemented it yet. IOW, raise an error now, and remove the
error in a later patch.
Ack. So in total there should be 3 checks right. 1) if 'has_channels' is 
set, 2) if 'uri' and 'channels' both are present, 3) if 'uri' and 
'channels' both are absent. Basically right now only uri should allowed 
and should atleast be present.
I think overall only 1) would be enough and should be checked before 
'migration_channels_and_uri_compatible()' and if 'has_channels' is set, 
just return for now. With this 2) would not be necessary or not come 
into play in this patch. 3) will be taken care by 
'migration_channels_and_uri_compatible()' itself IMO.

Let me know if I am missing something here.



@@ -1694,6 +1708,16 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
  MigrationState *s = migrate_get_current();
  g_autoptr(MigrationAddress) channel = g_new0(MigrationAddress, 1);
  
+/*

+ * Having preliminary checks for uri and channel
+ */
+if (uri && has_channels) {
+error_setg(errp, "'uri' and 'channels' arguments are mutually "
+   "exclusive; exactly one of the two should be present in "
+   "'migrate' qmp command ");
+return;
+}

Same here


With regards,
Daniel
--
|: 
https://urldefense.proofpoint.com/v2/url?u=https-3A__berrange.com=DwIDaQ=s883GpUCOChKOHiocYtGcg=-qwZZzrw4EKSsq0BK7MBd3wW1WEpXmJeng3ZUT5uBCg=i0lmmIrs7N4r3uqLYCdRVXLFaEavt77Ltkec0hIlE4aUQITo9-8povsDHWELv-vE=AQ1C7WPg2jLYjNXU29Xw7trQcjmB96Yy3-God3-UaIQ=
   -o-
https://urldefense.proofpoint.com/v2/url?u=https-3A__www.flickr.com_photos_dberrange=DwIDaQ=s883GpUCOChKOHiocYtGcg=-qwZZzrw4EKSsq0BK7MBd3wW1WEpXmJeng3ZUT5uBCg=i0lmmIrs7N4r3uqLYCdRVXLFaEavt77Ltkec0hIlE4aUQITo9-8povsDHWELv-vE=aiGUx76ySVL-epTmaFIZUyZbkzeXGedVaXGvFw4xcgo=
  :|
|: 
https://urldefense.proofpoint.com/v2/url?u=https-3A__libvirt.org=DwIDaQ=s883GpUCOChKOHiocYtGcg=-qwZZzrw4EKSsq0BK7MBd3wW1WEpXmJeng3ZUT5uBCg=i0lmmIrs7N4r3uqLYCdRVXLFaEavt77Ltkec0hIlE4aUQITo9-8povsDHWELv-vE=beFwppzRJ_eYlYPZKHlSZpaysLC5AExPh5_inAZBu_k=
  -o-

Re: [PATCH v9 10/10] migration: modify test_multifd_tcp_none() to use new QAPI syntax.

2023-07-25 Thread Daniel P . Berrangé

On Fri, Jul 21, 2023 at 02:49:37PM +, Het Gala wrote:
> modify multifd tcp common test to incorporate the new QAPI
> syntax defined.
> 
> Suggested-by: Aravind Retnakaran 
> Signed-off-by: Het Gala 
> ---
>  tests/qtest/migration-test.c | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)

Reviewed-by: Daniel P. Berrangé 

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH v9 09/10] migration: Implement MigrateChannelList to hmp migration flow.

2023-07-25 Thread Daniel P . Berrangé

On Fri, Jul 21, 2023 at 02:49:35PM +, Het Gala wrote:
> Integrate MigrateChannelList with all transport backends
> (socket, exec and rdma) for both src and dest migration
> endpoints for hmp migration.
> 
> Suggested-by: Aravind Retnakaran 
> Signed-off-by: Het Gala 
> ---
>  migration/migration-hmp-cmds.c | 16 +---
>  migration/migration.c  |  5 ++---
>  migration/migration.h  |  3 ++-
>  3 files changed, 17 insertions(+), 7 deletions(-)
> 
> diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
> index 49b150f33f..25f51ec99c 100644
> --- a/migration/migration-hmp-cmds.c
> +++ b/migration/migration-hmp-cmds.c
> @@ -423,10 +423,14 @@ void hmp_migrate_incoming(Monitor *mon, const QDict 
> *qdict)
>  {
>  Error *err = NULL;
>  const char *uri = qdict_get_str(qdict, "uri");
> +MigrationChannelList *caps = NULL;
> +g_autoptr(MigrationChannel) channel = g_new0(MigrationChannel, 1);
>  
> -qmp_migrate_incoming(uri, false, NULL, );
> +migrate_uri_parse(uri, , );
> +QAPI_LIST_PREPEND(caps, channel);
>  
> -hmp_handle_error(mon, err);
> +qmp_migrate_incoming(NULL, true, caps, );
> +qapi_free_MigrationChannelList(caps);

IIRC, you still need the hmp_handle_error call to print any
error message.

>  }
>  
>  void hmp_migrate_recover(Monitor *mon, const QDict *qdict)
> @@ -704,9 +708,15 @@ void hmp_migrate(Monitor *mon, const QDict *qdict)
>  bool resume = qdict_get_try_bool(qdict, "resume", false);
>  const char *uri = qdict_get_str(qdict, "uri");
>  Error *err = NULL;
> +MigrationChannelList *caps = NULL;
> +g_autoptr(MigrationChannel) channel = g_new0(MigrationChannel, 1);
> +
> +migrate_uri_parse(uri, , );
> +QAPI_LIST_PREPEND(caps, channel);
>  
> -qmp_migrate(uri, false, NULL, !!blk, blk, !!inc, inc,
> +qmp_migrate(NULL, true, caps, !!blk, blk, !!inc, inc,
>   false, false, true, resume, );
> +qapi_free_MigrationChannelList(caps);
>  if (hmp_handle_error(mon, err)) {
>  return;
>  }
> diff --git a/migration/migration.c b/migration/migration.c
> index acf80b3590..cf063a76df 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -425,9 +425,8 @@ void migrate_add_address(SocketAddress *address)
>QAPI_CLONE(SocketAddress, address));
>  }
>  
> -static bool migrate_uri_parse(const char *uri,
> -  MigrationChannel **channel,
> -  Error **errp)
> +bool migrate_uri_parse(const char *uri, MigrationChannel **channel,
> +   Error **errp)
>  {
>  g_autoptr(MigrationChannel) val = g_new0(MigrationChannel, 1);
>  g_autoptr(MigrationAddress) addr = g_new0(MigrationAddress, 1);
> diff --git a/migration/migration.h b/migration/migration.h
> index b7c8b67542..a8268394ca 100644
> --- a/migration/migration.h
> +++ b/migration/migration.h
> @@ -501,7 +501,8 @@ bool check_dirty_bitmap_mig_alias_map(const 
> BitmapMigrationNodeAliasList *bbm,
>Error **errp);
>  
>  void migrate_add_address(SocketAddress *address);
> -
> +bool migrate_uri_parse(const char *uri, MigrationChannel **channel,
> +   Error **errp);
>  int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque);
>  
>  #define qemu_ram_foreach_block \
> -- 
> 2.22.3
> 

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PULL 10/10] target/tricore: Rename tricore_feature

2023-07-25 Thread Bastian Koppelmann

Hi Phil,

On Tue, Jul 25, 2023 at 04:58:29PM +0200, Philippe Mathieu-Daudé wrote:
> From: Bastian Koppelmann 
> 
> this name is used by capstone and will lead to a build failure of QEMU,
> when capstone is enabled. So we rename it to tricore_has_feature(), to
> match has_feature() in translate.c.
> 
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1774
> Signed-off-by: Bastian Koppelmann 
> Reviewed-by: Philippe Mathieu-Daudé 
> Reviewed-by: Thomas Huth 
> Message-Id: <20230721060605.76636-1-kbast...@mail.uni-paderborn.de>
> ---
>  target/tricore/cpu.h   | 2 +-
>  target/tricore/cpu.c   | 8 
>  target/tricore/helper.c| 4 ++--
>  target/tricore/op_helper.c | 4 ++--
>  4 files changed, 9 insertions(+), 9 deletions(-)

+CC: m...@tls.msk.ru

Michael Tokarev has already picked it up. See 
https://lore.kernel.org/qemu-devel/20230725145829.37782-11-phi...@linaro.org/T/#u

Cheers,
Bastian

Re: [PULL 0/2] Misc next patches

2023-07-25 Thread Peter Maydell

On Tue, 25 Jul 2023 at 17:26, Daniel P. Berrangé  wrote:
>
> The following changes since commit a279ca4ea07383314b2d2b2f1d550be9482f148e:
>
>   Merge tag 'pull-target-arm-20230725' of 
> https://git.linaro.org/people/pmaydell/qemu-arm into staging (2023-07-25 
> 12:44:39 +0100)
>
> are available in the Git repository at:
>
>   https://gitlab.com/berrange/qemu tags/misc-next-pull-request
>
> for you to fetch changes up to 0e6b20b9656174e815751cf8b21f5e326148bb99:
>
>   hw/usb/canokey: change license to GPLv2+ (2023-07-25 17:24:12 +0100)
>
> 
> Miscellaneous fixes
>
>  * Switch canokey license from Apache to GPLv2+
>  * Fix uninitialized variable in LUKS driver

FYI, this just missed rc1, but will go into rc2.

-- PMM

Re: [PULL 0/8] trivial-patches 25-07-2023

2023-07-25 Thread Peter Maydell

On Tue, 25 Jul 2023 at 15:57, Michael Tokarev  wrote:
>
> The following changes since commit 3ee44ec72753ec0ff05ad1569dfa609203d722b2:
>
>   Merge tag 'pull-request-2023-07-24' of https://gitlab.com/thuth/qemu into 
> staging (2023-07-24 18:06:36 +0100)
>
> are available in the Git repository at:
>
>   https://gitlab.com/mjt0k/qemu.git/ tags/pull-trivial-patches
>
> for you to fetch changes up to ff62c210165cf61b15f18c8a9835a5a5ce6c5a53:
>
>   qapi: Correct "eg." to "e.g." in documentation (2023-07-25 17:20:32 +0300)
>
> 
> trivial-patches 25-07-2023
> 
>
> Ani Sinha (1):
>   hw/pci: add comment to explain checking for available function 0 in pci 
> hotplug
>
> Bastian Koppelmann (1):
>   target/tricore: Rename tricore_feature
>
> Markus Armbruster (1):
>   qapi: Correct "eg." to "e.g." in documentation
>
> Michael Tokarev (5):
>   migration: spelling fixes
>   s390x: spelling fixes
>   arm: spelling fixes
>   other architectures: spelling fixes
>   hw/9pfs: spelling fixes


Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/8.1
for any user-visible changes.

-- PMM

Re: [PULL 00/10] Misc fixes for 2023-07-25

2023-07-25 Thread Peter Maydell

On Tue, 25 Jul 2023 at 15:58, Philippe Mathieu-Daudé  wrote:
>
> The following changes since commit 3ee44ec72753ec0ff05ad1569dfa609203d722b2:
>
>   Merge tag 'pull-request-2023-07-24' of https://gitlab.com/thuth/qemu into 
> staging (2023-07-24 18:06:36 +0100)
>
> are available in the Git repository at:
>
>   https://github.com/philmd/qemu.git tags/misc-fixes-20230725
>
> for you to fetch changes up to f8cfdd2038c1823301e6df753242e465b1dc8539:
>
>   target/tricore: Rename tricore_feature (2023-07-25 14:42:00 +0200)
>
> 
> Misc patches queue
>
> hw/sd/sdhci: Default I/O ops to little endian
> hw/mips/loongson3-virt: Only use default USB if available
> hw/char/escc: Implement loopback mode to allow self-testing
> target/mips: Avoid overruns and shifts by negative number
> target/sparc: Handle FPRS correctly on big-endian hosts
> target/tricore: Rename tricore_feature to avoid clash with libcapstone
>


Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/8.1
for any user-visible changes.

-- PMM

Re: [PATCH v3 0/8] misc AHCI cleanups

2023-07-25 Thread John Snow

On Tue, Jul 25, 2023 at 9:04 AM Philippe Mathieu-Daudé
 wrote:
>
> Hi Niklas, John, Paolo, Kevin,
>
> On 19/7/23 12:47, Niklas Cassel wrote:
>
> >> Niklas Cassel (8):
> >>hw/ide/ahci: remove stray backslash
> >>hw/ide/core: set ERR_STAT in unsupported command completion
> >>hw/ide/ahci: write D2H FIS when processing NCQ command
> >>hw/ide/ahci: simplify and document PxCI handling
> >>hw/ide/ahci: PxSACT and PxCI is cleared when PxCMD.ST is cleared
> >>hw/ide/ahci: PxCI should not get cleared when ERR_STAT is set
> >>hw/ide/ahci: fix ahci_write_fis_sdb()
> >>hw/ide/ahci: fix broken SError handling
> >>
> >>   hw/ide/ahci.c | 112 +++---
> >>   hw/ide/core.c |   2 +-
> >>   tests/qtest/libqos/ahci.c | 106 +++-
> >>   tests/qtest/libqos/ahci.h |   8 +--
> >>   4 files changed, 164 insertions(+), 64 deletions(-)
> >>
> >> --
> >> 2.40.1
> >>
> >>
> >
> > Hello Philippe,
> >
> > Considering that you picked up my patch,
> > "hw/ide/ahci: remove stray backslash" (patch 1/8 in this series),
> > and since John seems to have gone silent for 40+ days,
> > could you please consider taking this series through your misc tree?
>

40 days, ouch. I kept thinking it had been a week. Don't trust me with time.

> (First patch was a cleanup)
>
> Niklas, I don't feel confident enough :/
>
> John, Paolo, Kevin, do you Ack?
>
> Regards,
>
> Phil.

I'm staging it, but it's for next release. We'll get it in early and
it gives us a chance to fix anything that's amiss before the next RC
window.

Re: [RFC] risc-v vector (RVV) emulation performance issues

2023-07-25 Thread Richard Henderson


On 7/24/23 06:40, Daniel Henrique Barboza wrote:

Hi,

As some of you are already aware the current RVV emulation could be faster.
We have at least one commit (bc0ec52eb2, "target/riscv/vector_helper.c:
skip set tail when vta is zero") that tried to address at least part of the
problem.

Running a simple program like this:

---

#define SZ 1000

int main ()
{
   int *a = malloc (SZ * sizeof (int));
   int *b = malloc (SZ * sizeof (int));
   int *c = malloc (SZ * sizeof (int));

   for (int i = 0; i < SZ; i++)
     c[i] = a[i] + b[i];
   return c[SZ - 1];
}

---

And then compiling it without RVV support will run in 50 milis or so:

$ time ~/work/qemu/build/qemu-riscv64 -cpu rv64,debug=false,vext_spec=v1.0,v=true,vlen=128 
./foo-novect.out


real    0m0.043s
user    0m0.025s
sys    0m0.018s

Building the same program with RVV support slows it 4-5 times:

$ time ~/work/qemu/build/qemu-riscv64 -cpu 
rv64,debug=false,vext_spec=v1.0,v=true,vlen=1024 ./foo.out


real    0m0.196s
user    0m0.177s
sys    0m0.018s

Using the lowest 'vlen' val allowed (128) will slow down things even further, 
taking it to
~0.260s.


'perf record' shows the following profile on the aforementioned binary:

   23.27%  qemu-riscv64  qemu-riscv64 [.] do_ld4_mmu
   21.11%  qemu-riscv64  qemu-riscv64 [.] vext_ldst_us
   14.05%  qemu-riscv64  qemu-riscv64 [.] cpu_ldl_le_data_ra
   11.51%  qemu-riscv64  qemu-riscv64 [.] cpu_stl_le_data_ra
    8.18%  qemu-riscv64  qemu-riscv64 [.] cpu_mmu_lookup
    8.04%  qemu-riscv64  qemu-riscv64 [.] do_st4_mmu
    2.04%  qemu-riscv64  qemu-riscv64 [.] ste_w
    1.15%  qemu-riscv64  qemu-riscv64 [.] lde_w
    1.02%  qemu-riscv64  [unknown]    [k] 0xb3001260
    0.90%  qemu-riscv64  qemu-riscv64 [.] cpu_get_tb_cpu_state
    0.64%  qemu-riscv64  qemu-riscv64 [.] tb_lookup
    0.64%  qemu-riscv64  qemu-riscv64 [.] riscv_cpu_mmu_index
    0.39%  qemu-riscv64  qemu-riscv64 [.] object_dynamic_cast_assert


First thing that caught my attention is vext_ldst_us from 
target/riscv/vector_helper.c:

     /* load bytes from guest memory */
     for (i = env->vstart; i < evl; i++, env->vstart++) {
     k = 0;
     while (k < nf) {
     target_ulong addr = base + ((i * nf + k) << log2_esz);
     ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
     k++;
     }
     }
     env->vstart = 0;

Given that this is a unit-stride load that access contiguous elements in memory 
it
seems that this loop could be optimized/removed since it's loading/storing bytes
one by one. I didn't find any TCG op to do that though. I assume that ARM SVE 
might
have something of the sorts. Richard, care to comment?


Yes, SVE optimizes this case -- see

https://gitlab.com/qemu-project/qemu/-/blob/master/target/arm/tcg/sve_helper.c?ref_type=heads#L5651

It's not possible to do this generically, due to the predication. There's quite a lot of 
machinery that goes into expanding this such that each helper uses the correct host 
load/store insn in the fast case.



r~

Re: [PATCH 3/6] vhost: Do not reset suspended devices on stop

2023-07-25 Thread Eugenio Perez Martin

On Tue, Jul 25, 2023 at 3:09 PM Hanna Czenczek  wrote:
>
> On 25.07.23 12:03, Eugenio Perez Martin wrote:
> > On Tue, Jul 25, 2023 at 9:53 AM Hanna Czenczek  wrote:
> >> On 24.07.23 17:48, Eugenio Perez Martin wrote:
> >>> On Fri, Jul 21, 2023 at 6:07 PM Hanna Czenczek  wrote:
>  On 21.07.23 17:25, Eugenio Perez Martin wrote:
> > On Tue, Jul 11, 2023 at 5:52 PM Hanna Czenczek  
> > wrote:
> >> Move the `suspended` field from vhost_vdpa into the global vhost_dev
> >> struct, so vhost_dev_stop() can check whether the back-end has been
> >> suspended by `vhost_ops->vhost_dev_start(hdev, false)`.  If it has,
> >> there is no need to reset it; the reset is just a fall-back to stop
> >> device operations for back-ends that do not support suspend.
> >>
> >> Unfortunately, for vDPA specifically, RESUME is not yet implemented, so
> >> when the device is re-started, we still have to do the reset to have it
> >> un-suspend.
> >>
> >> Signed-off-by: Hanna Czenczek 
> >> ---
> >> include/hw/virtio/vhost-vdpa.h |  2 --
> >> include/hw/virtio/vhost.h  |  8 
> >> hw/virtio/vhost-vdpa.c | 11 +++
> >> hw/virtio/vhost.c  |  8 +++-
> >> 4 files changed, 22 insertions(+), 7 deletions(-)
> >>
> >> diff --git a/include/hw/virtio/vhost-vdpa.h 
> >> b/include/hw/virtio/vhost-vdpa.h
> >> index e64bfc7f98..72c3686b7f 100644
> >> --- a/include/hw/virtio/vhost-vdpa.h
> >> +++ b/include/hw/virtio/vhost-vdpa.h
> >> @@ -42,8 +42,6 @@ typedef struct vhost_vdpa {
> >> bool shadow_vqs_enabled;
> >> /* Vdpa must send shadow addresses as IOTLB key for data 
> >> queues, not GPA */
> >> bool shadow_data;
> >> -/* Device suspended successfully */
> >> -bool suspended;
> >> /* IOVA mapping used by the Shadow Virtqueue */
> >> VhostIOVATree *iova_tree;
> >> GPtrArray *shadow_vqs;
> >> diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
> >> index 6a173cb9fa..69bf59d630 100644
> >> --- a/include/hw/virtio/vhost.h
> >> +++ b/include/hw/virtio/vhost.h
> >> @@ -120,6 +120,14 @@ struct vhost_dev {
> >> uint64_t backend_cap;
> >> /* @started: is the vhost device started? */
> >> bool started;
> >> +/**
> >> + * @suspended: Whether the vhost device is currently suspended.  
> >> Set
> >> + * and reset by implementations (vhost-user, vhost-vdpa, ...), 
> >> which
> >> + * are supposed to automatically suspend/resume in their
> >> + * vhost_dev_start handlers as required.  Must also be cleared 
> >> when
> >> + * the device is reset.
> >> + */
> >> +bool suspended;
> >> bool log_enabled;
> >> uint64_t log_size;
> >> Error *migration_blocker;
> >> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> >> index 7b7dee468e..f7fd19a203 100644
> >> --- a/hw/virtio/vhost-vdpa.c
> >> +++ b/hw/virtio/vhost-vdpa.c
> >> @@ -858,13 +858,12 @@ static int vhost_vdpa_get_device_id(struct 
> >> vhost_dev *dev,
> >>
> >> static int vhost_vdpa_reset_device(struct vhost_dev *dev)
> >> {
> >> -struct vhost_vdpa *v = dev->opaque;
> >> int ret;
> >> uint8_t status = 0;
> >>
> >> ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, );
> >> trace_vhost_vdpa_reset_device(dev);
> >> -v->suspended = false;
> >> +dev->suspended = false;
> >> return ret;
> >> }
> >>
> >> @@ -1278,7 +1277,7 @@ static void vhost_vdpa_suspend(struct vhost_dev 
> >> *dev)
> >> if (unlikely(r)) {
> >> error_report("Cannot suspend: %s(%d)", 
> >> g_strerror(errno), errno);
> >> } else {
> >> -v->suspended = true;
> >> +dev->suspended = true;
> >> return;
> >> }
> >> }
> >> @@ -1313,6 +1312,10 @@ static int vhost_vdpa_dev_start(struct 
> >> vhost_dev *dev, bool started)
> >> return -1;
> >> }
> >> vhost_vdpa_set_vring_ready(dev);
> >> +if (dev->suspended) {
> >> +/* TODO: When RESUME is available, use it instead of 
> >> resetting */
> >> +vhost_vdpa_reset_status(dev);
> > How is that we reset the status at each vhost_vdpa_dev_start? That
> > will clean all the vqs configured, features negotiated, etc. in the
> > vDPA device. Or am I missing something?
>  What alternative do you propose?  We don’t have RESUME for vDPA in qemu,
>  but we somehow need to lift the previous SUSPEND so the device will
>  again respond to guest requests, do we not?
>

[Qemu PATCH v2 2/9] hw/cxl/cxl-mailbox-utils: Add dynamic capacity region representative and mailbox command support

2023-07-25 Thread Fan Ni

From: Fan Ni 

Per cxl spec 3.0, add dynamic capacity region representative based on
Table 8-126 and extend the cxl type3 device definition to include dc region
information. Also, based on info in 8.2.9.8.9.1, add 'Get Dynamic Capacity
Configuration' mailbox support.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  | 72 +
 hw/mem/cxl_type3.c  |  6 
 include/hw/cxl/cxl_device.h | 17 +
 3 files changed, 95 insertions(+)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index b013e30314..0fe9f3eb5d 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -81,6 +81,8 @@ enum {
 #define GET_POISON_LIST0x0
 #define INJECT_POISON  0x1
 #define CLEAR_POISON   0x2
+DCD_CONFIG  = 0x48, /*r3.0: 8.2.9.8.9*/
+#define GET_DC_CONFIG  0x0
 PHYSICAL_SWITCH = 0x51
 #define IDENTIFY_SWITCH_DEVICE  0x0
 };
@@ -939,6 +941,71 @@ static CXLRetCode cmd_media_clear_poison(struct cxl_cmd 
*cmd,
 return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * cxl spec 3.0: 8.2.9.8.9.1
+ * Get Dynamic Capacity Configuration
+ **/
+static CXLRetCode cmd_dcd_get_dyn_cap_config(struct cxl_cmd *cmd,
+CXLDeviceState *cxl_dstate,
+uint16_t *len)
+{
+struct get_dyn_cap_config_in_pl {
+uint8_t region_cnt;
+uint8_t start_region_id;
+} QEMU_PACKED;
+
+struct get_dyn_cap_config_out_pl {
+uint8_t num_regions;
+uint8_t rsvd1[7];
+struct {
+uint64_t base;
+uint64_t decode_len;
+uint64_t region_len;
+uint64_t block_size;
+uint32_t dsmadhandle;
+uint8_t flags;
+uint8_t rsvd2[3];
+} QEMU_PACKED records[];
+} QEMU_PACKED;
+
+struct get_dyn_cap_config_in_pl *in = (void *)cmd->payload;
+struct get_dyn_cap_config_out_pl *out = (void *)cmd->payload;
+struct CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev,
+cxl_dstate);
+uint16_t record_count = 0, i;
+uint16_t out_pl_len;
+uint8_t start_region_id = in->start_region_id;
+
+if (start_region_id >= ct3d->dc.num_regions) {
+return CXL_MBOX_INVALID_INPUT;
+}
+
+record_count = MIN(ct3d->dc.num_regions - in->start_region_id,
+in->region_cnt);
+
+out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
+assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
+
+memset(out, 0, out_pl_len);
+out->num_regions = record_count;
+for (i = 0; i < record_count; i++) {
+stq_le_p(>records[i].base,
+ct3d->dc.regions[start_region_id + i].base);
+stq_le_p(>records[i].decode_len,
+ct3d->dc.regions[start_region_id + i].decode_len);
+stq_le_p(>records[i].region_len,
+ct3d->dc.regions[start_region_id + i].len);
+stq_le_p(>records[i].block_size,
+ct3d->dc.regions[start_region_id + i].block_size);
+stl_le_p(>records[i].dsmadhandle,
+ct3d->dc.regions[start_region_id + i].dsmadhandle);
+out->records[i].flags = ct3d->dc.regions[start_region_id + i].flags;
+}
+
+*len = out_pl_len;
+return CXL_MBOX_SUCCESS;
+}
+
 #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
 #define IMMEDIATE_DATA_CHANGE (1 << 2)
 #define IMMEDIATE_POLICY_CHANGE (1 << 3)
@@ -977,6 +1044,8 @@ static struct cxl_cmd cxl_cmd_set[256][256] = {
 cmd_media_inject_poison, 8, 0 },
 [MEDIA_AND_POISON][CLEAR_POISON] = { "MEDIA_AND_POISON_CLEAR_POISON",
 cmd_media_clear_poison, 72, 0 },
+[DCD_CONFIG][GET_DC_CONFIG] = { "DCD_GET_DC_CONFIG",
+cmd_dcd_get_dyn_cap_config, 2, 0 },
 };
 
 static struct cxl_cmd cxl_cmd_set_sw[256][256] = {
@@ -1164,6 +1233,9 @@ void cxl_initialize_mailbox(CXLDeviceState *cxl_dstate, 
bool switch_cci)
 }
 for (int set = 0; set < 256; set++) {
 for (int cmd = 0; cmd < 256; cmd++) {
+if (!cxl_dstate->is_dcd && set == DCD_CONFIG) {
+continue;
+}
 if (cxl_dstate->cxl_cmd_set[set][cmd].handler) {
 struct cxl_cmd *c = _dstate->cxl_cmd_set[set][cmd];
 struct cel_log *log =
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 862107c5ef..4d68824dfe 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -1046,6 +1046,12 @@ static void ct3d_reset(DeviceState *dev)
 uint32_t *reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
 uint32_t *write_msk = ct3d->cxl_cstate.crb.cache_mem_regs_write_mask;
 
+if (ct3d->dc.num_regions) {
+ct3d->cxl_dstate.is_dcd = true;
+} else {
+ct3d->cxl_dstate.is_dcd = false;
+}
+
 cxl_component_register_init_common(reg_state, write_msk, 
CXL2_TYPE3_DEVICE);
 cxl_device_register_init_common(>cxl_dstate);
 }
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index

[Qemu PATCH v2 9/9] hw/mem/cxl_type3: Add dpa range validation for accesses to dc regions

2023-07-25 Thread Fan Ni

From: Fan Ni 

Not all dpa range in the dc regions is valid to access until an extent
covering the range has been added. Add a bitmap for each region to
record whether a dc block in the region has been backed by dc extent.
For the bitmap, a bit in the bitmap represents a dc block. When a dc
extent is added, all the bits of the blocks in the extent will be set,
which will be cleared when the extent is released.

Signed-off-by: Fan Ni 
---
 hw/mem/cxl_type3.c  | 155 
 include/hw/cxl/cxl_device.h |   1 +
 2 files changed, 156 insertions(+)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 41a828598a..51943a36fc 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -787,13 +787,37 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
 /* dsmad_handle is set when creating cdat table entries */
 region->flags = 0;
 
+region->blk_bitmap = bitmap_new(region->len / region->block_size);
+if (!region->blk_bitmap) {
+break;
+}
+
 region_base += region->len;
 }
+
+if (i < ct3d->dc.num_regions) {
+while (--i >= 0) {
+g_free(ct3d->dc.regions[i].blk_bitmap);
+}
+return -1;
+}
+
 QTAILQ_INIT(>dc.extents);
 
 return 0;
 }
 
+static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
+{
+int i;
+struct CXLDCD_Region *region;
+
+for (i = 0; i < ct3d->dc.num_regions; i++) {
+region = >dc.regions[i];
+g_free(region->blk_bitmap);
+}
+}
+
 static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
 {
 DeviceState *ds = DEVICE(ct3d);
@@ -1021,6 +1045,7 @@ err_free_special_ops:
 g_free(regs->special_ops);
 err_address_space_free:
 if (ct3d->dc.host_dc) {
+cxl_destroy_dc_regions(ct3d);
 address_space_destroy(>dc.host_dc_as);
 }
 if (ct3d->hostpmem) {
@@ -1043,6 +1068,7 @@ static void ct3_exit(PCIDevice *pci_dev)
 spdm_sock_fini(ct3d->doe_spdm.socket);
 g_free(regs->special_ops);
 if (ct3d->dc.host_dc) {
+cxl_destroy_dc_regions(ct3d);
 address_space_destroy(>dc.host_dc_as);
 }
 if (ct3d->hostpmem) {
@@ -1053,6 +1079,110 @@ static void ct3_exit(PCIDevice *pci_dev)
 }
 }
 
+/*
+ * This function will marked the dpa range [dpa, dap + len) to be backed and
+ * accessible, this happens when a dc extent is added and accepted by the
+ * host.
+ */
+static void set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+uint64_t len)
+{
+int i;
+CXLDCD_Region *region = >dc.regions[0];
+
+if (dpa < region->base
+|| dpa >= region->base + ct3d->dc.total_capacity)
+return;
+
+/*
+ * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
+ * Region 0 being used for the lowest DPA of Dynamic Capacity and
+ * Region 7 for the highest DPA.
+ * So we check from the last region to find where the dpa belongs.
+ * access across multiple regions is not allowed.
+ **/
+for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
+region = >dc.regions[i];
+if (dpa >= region->base) {
+break;
+}
+}
+
+bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
+len / region->block_size);
+}
+
+/*
+ * This function check whether a dpa range [dpa, dpa + len) has been backed
+ * with dc extents, used when validating read/write to dc regions
+ */
+static bool test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+uint64_t len)
+{
+int i;
+CXLDCD_Region *region = >dc.regions[0];
+uint64_t nbits;
+long nr;
+
+if (dpa < region->base
+|| dpa >= region->base + ct3d->dc.total_capacity)
+return false;
+
+/*
+ * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
+ * Region 0 being used for the lowest DPA of Dynamic Capacity and
+ * Region 7 for the highest DPA.
+ * So we check from the last region to find where the dpa belongs.
+ * access across multiple regions is not allowed.
+ */
+for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
+region = >dc.regions[i];
+if (dpa >= region->base) {
+break;
+}
+}
+
+nr = (dpa - region->base) / region->block_size;
+nbits = len / region->block_size;
+return find_next_zero_bit(region->blk_bitmap, nbits, nr) >= nr + nbits;
+}
+
+/*
+ * This function will marked the dpa range [dpa, dap + len) to be unbacked and
+ * inaccessible, this happens when a dc extent is added and accepted by the
+ * host.
+ */
+static void clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+uint64_t len)
+{
+int i;
+CXLDCD_Region *region = >dc.regions[0];
+uint64_t nbits;
+long nr;
+
+if (dpa < region->base
+|| dpa >= region->base + ct3d->dc.total_capacity)
+return;
+
+/*
+ * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
+

[Qemu PATCH v2 3/9] include/hw/cxl/cxl_device: Rename mem_size as static_mem_size for type3 memory devices

2023-07-25 Thread Fan Ni

From: Fan Ni 

Rename mem_size as static_mem_size for type3 memdev to cover static RAM and
pmem capacity, preparing for the introduction of dynamic capacity to support
dynamic capacity devices.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  | 5 +++--
 hw/mem/cxl_type3.c  | 8 
 include/hw/cxl/cxl_device.h | 2 +-
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 0fe9f3eb5d..dd5ea95af8 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -540,7 +540,8 @@ static CXLRetCode cmd_identify_memory_device(struct cxl_cmd 
*cmd,
 
 snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0);
 
-stq_le_p(>total_capacity, cxl_dstate->mem_size / 
CXL_CAPACITY_MULTIPLIER);
+stq_le_p(>total_capacity,
+cxl_dstate->static_mem_size / CXL_CAPACITY_MULTIPLIER);
 stq_le_p(>persistent_capacity, cxl_dstate->pmem_size / 
CXL_CAPACITY_MULTIPLIER);
 stq_le_p(>volatile_capacity, cxl_dstate->vmem_size / 
CXL_CAPACITY_MULTIPLIER);
 stl_le_p(>lsa_size, cvc->get_lsa_size(ct3d));
@@ -879,7 +880,7 @@ static CXLRetCode cmd_media_clear_poison(struct cxl_cmd 
*cmd,
 struct clear_poison_pl *in = (void *)cmd->payload;
 
 dpa = ldq_le_p(>dpa);
-if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->mem_size) {
+if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size) {
 return CXL_MBOX_INVALID_PA;
 }
 
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 4d68824dfe..3d7acffcb7 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -748,7 +748,7 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error 
**errp)
 }
 address_space_init(>hostvmem_as, vmr, v_name);
 ct3d->cxl_dstate.vmem_size = memory_region_size(vmr);
-ct3d->cxl_dstate.mem_size += memory_region_size(vmr);
+ct3d->cxl_dstate.static_mem_size += memory_region_size(vmr);
 g_free(v_name);
 }
 
@@ -771,7 +771,7 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error 
**errp)
 }
 address_space_init(>hostpmem_as, pmr, p_name);
 ct3d->cxl_dstate.pmem_size = memory_region_size(pmr);
-ct3d->cxl_dstate.mem_size += memory_region_size(pmr);
+ct3d->cxl_dstate.static_mem_size += memory_region_size(pmr);
 g_free(p_name);
 }
 
@@ -984,7 +984,7 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
 return -EINVAL;
 }
 
-if (*dpa_offset > ct3d->cxl_dstate.mem_size) {
+if (*dpa_offset > ct3d->cxl_dstate.static_mem_size) {
 return -EINVAL;
 }
 
@@ -1148,7 +1148,7 @@ static bool set_cacheline(CXLType3Dev *ct3d, uint64_t 
dpa_offset, uint8_t *data)
 return false;
 }
 
-if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.mem_size) {
+if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.static_mem_size) {
 return false;
 }
 
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index dae39da438..503c344326 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -209,7 +209,7 @@ typedef struct cxl_device_state {
 } timestamp;
 
 /* memory region size, HDM */
-uint64_t mem_size;
+uint64_t static_mem_size;
 uint64_t pmem_size;
 uint64_t vmem_size;
 bool is_dcd;
-- 
2.25.1

[Qemu PATCH v2 5/9] hw/mem/cxl_type3: Add host backend and address space handling for DC regions

2023-07-25 Thread Fan Ni

From: Fan Ni 

Add (file/memory backed) host backend, all the dynamic capacity regions
will share a single, large enough host backend. Set up address space for
DC regions to support read/write operations to dynamic capacity for DCD.

With the change, following supports are added:
1. add a new property to type3 device "nonvolatile-dc-memdev" to point to host
   memory backend for dynamic capacity;
2. add namespace for dynamic capacity for read/write support;
3. create cdat entries for each dynamic capacity region;
4. fix dvsec range registers to include DC regions.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  |  19 +++-
 hw/mem/cxl_type3.c  | 203 +---
 include/hw/cxl/cxl_device.h |   4 +
 3 files changed, 185 insertions(+), 41 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index dd5ea95af8..0511b8e6f7 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -388,9 +388,11 @@ static CXLRetCode cmd_firmware_update_get_info(struct 
cxl_cmd *cmd,
 char fw_rev4[0x10];
 } QEMU_PACKED *fw_info;
 QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50);
+CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
 
 if ((cxl_dstate->vmem_size < CXL_CAPACITY_MULTIPLIER) ||
-(cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER)) {
+(cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER) ||
+(ct3d->dc.total_capacity < CXL_CAPACITY_MULTIPLIER)) {
 return CXL_MBOX_INTERNAL_ERROR;
 }
 
@@ -531,7 +533,8 @@ static CXLRetCode cmd_identify_memory_device(struct cxl_cmd 
*cmd,
 CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
 
 if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
-(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
+(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
+(!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
 return CXL_MBOX_INTERNAL_ERROR;
 }
 
@@ -566,9 +569,11 @@ static CXLRetCode cmd_ccls_get_partition_info(struct 
cxl_cmd *cmd,
 uint64_t next_pmem;
 } QEMU_PACKED *part_info = (void *)cmd->payload;
 QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20);
+CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
 
 if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
-(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
+(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
+(!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
 return CXL_MBOX_INTERNAL_ERROR;
 }
 
@@ -880,7 +885,13 @@ static CXLRetCode cmd_media_clear_poison(struct cxl_cmd 
*cmd,
 struct clear_poison_pl *in = (void *)cmd->payload;
 
 dpa = ldq_le_p(>dpa);
-if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size) {
+if (dpa + CXL_CACHE_LINE_SIZE >= cxl_dstate->static_mem_size
+&& ct3d->dc.num_regions == 0) {
+return CXL_MBOX_INVALID_PA;
+}
+
+if (ct3d->dc.num_regions && dpa + CXL_CACHE_LINE_SIZE >=
+cxl_dstate->static_mem_size + ct3d->dc.total_capacity) {
 return CXL_MBOX_INVALID_PA;
 }
 
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index b29bb2309a..76bbd9f785 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -20,6 +20,7 @@
 #include "hw/pci/spdm.h"
 
 #define DWORD_BYTE 4
+#define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
 
 /* Default CDAT entries for a memory region */
 enum {
@@ -33,8 +34,8 @@ enum {
 };
 
 static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
- int dsmad_handle, MemoryRegion *mr,
- bool is_pmem, uint64_t dpa_base)
+int dsmad_handle, uint8_t flags,
+uint64_t dpa_base, uint64_t size)
 {
 g_autofree CDATDsmas *dsmas = NULL;
 g_autofree CDATDslbis *dslbis0 = NULL;
@@ -53,9 +54,9 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader 
**cdat_table,
 .length = sizeof(*dsmas),
 },
 .DSMADhandle = dsmad_handle,
-.flags = is_pmem ? CDAT_DSMAS_FLAG_NV : 0,
+.flags = flags,
 .DPA_base = dpa_base,
-.DPA_length = memory_region_size(mr),
+.DPA_length = size,
 };
 
 /* For now, no memory side cache, plausiblish numbers */
@@ -137,9 +138,9 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader 
**cdat_table,
  * NV: Reserved - the non volatile from DSMAS matters
  * V: EFI_MEMORY_SP
  */
-.EFI_memory_type_attr = is_pmem ? 2 : 1,
+.EFI_memory_type_attr = flags ? 2 : 1,
 .DPA_offset = 0,
-.DPA_length = memory_region_size(mr),
+.DPA_length = size,
 };
 
 /* Header always at start of structure */
@@ -158,21 +159,28 @@ static int ct3_build_cdat_table(CDATSubHeader

[Qemu PATCH v2 8/9] hw/cxl/events: Add qmp interfaces to add/release dynamic capacity extents

2023-07-25 Thread Fan Ni

From: Fan Ni 

Since fabric manager emulation is not supported yet, the change implements
the functions to add/release dynamic capacity extents as QMP interfaces.

1. Add dynamic capacity extents:

For example, the command to add two continuous extents (each is 128MB long)
to region 0 (starting at dpa offset 0 and 128MB) looks like below:

{ "execute": "qmp_capabilities" }

{ "execute": "cxl-add-dynamic-capacity-event",
  "arguments": {
  "path": "/machine/peripheral/cxl-dcd0",
  "extents": [
  {
  "region-id": 0,
  "dpa": 0,
  "len": 128
  },
  {
  "region-id": 0,
  "dpa": 128,
  "len": 128
  }
  ]
  }
}

2. Release dynamic capacity extents:

For example, the command to release an extent of size 128MB from region 0
(starting at dpa offset 128MB) look like below:

{ "execute": "cxl-release-dynamic-capacity-event",
  "arguments": {
  "path": "/machine/peripheral/cxl-dcd0",
  "extents": [
  {
  "region-id": 0,
  "dpa": 128,
  "len": 128
  }
  ]
  }
}

Signed-off-by: Fan Ni 
---
 hw/mem/cxl_type3.c  | 145 
 hw/mem/cxl_type3_stubs.c|   6 ++
 include/hw/cxl/cxl_events.h |  16 
 qapi/cxl.json   |  49 
 4 files changed, 216 insertions(+)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index f1170b8047..41a828598a 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -1817,6 +1817,151 @@ void qmp_cxl_inject_memory_module_event(const char 
*path, CxlEventLog log,
 }
 }
 
+static const QemuUUID dynamic_capacity_uuid = {
+.data = UUID(0xca95afa7, 0xf183, 0x4018, 0x8c, 0x2f,
+0x95, 0x26, 0x8e, 0x10, 0x1a, 0x2a),
+};
+
+/*
+ * cxl r3.0: Table 8-47
+ * 00h: add capacity
+ * 01h: release capacity
+ * 02h: forced capacity release
+ * 03h: region configuration updated
+ * 04h: Add capacity response
+ * 05h: capacity released
+ */
+enum DC_Event_Type {
+DC_EVENT_ADD_CAPACITY,
+DC_EVENT_RELEASE_CAPACITY,
+DC_EVENT_FORCED_RELEASE_CAPACITY,
+DC_EVENT_REGION_CONFIG_UPDATED,
+DC_EVENT_ADD_CAPACITY_RSP,
+DC_EVENT_CAPACITY_RELEASED,
+DC_EVENT_NUM
+};
+
+#define MEM_BLK_SIZE_MB 128
+static void qmp_cxl_process_dynamic_capacity_event(const char *path,
+CxlEventLog log, enum DC_Event_Type type,
+uint16_t hid, CXLDCExtentRecordList *records, Error **errp)
+{
+Object *obj = object_resolve_path(path, NULL);
+CXLEventDynamicCapacity dCap;
+CXLEventRecordHdr *hdr = 
+CXLDeviceState *cxlds;
+CXLType3Dev *dcd;
+uint8_t flags = 1 << CXL_EVENT_TYPE_INFO;
+uint32_t num_extents = 0;
+CXLDCExtentRecordList *list = records;
+CXLDCExtent_raw *extents;
+uint64_t dpa, len;
+uint8_t rid = 0;
+int i;
+
+if (!obj) {
+error_setg(errp, "Unable to resolve path");
+return;
+}
+if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
+error_setg(errp, "Path not point to a valid CXL type3 device");
+return;
+}
+
+dcd = CXL_TYPE3(obj);
+cxlds = >cxl_dstate;
+memset(, 0, sizeof(dCap));
+
+if (!dcd->dc.num_regions) {
+error_setg(errp, "No dynamic capacity support from the device");
+return;
+}
+
+while (list) {
+dpa = list->value->dpa * 1024 * 1024;
+len = list->value->len * 1024 * 1024;
+rid = list->value->region_id;
+
+if (rid >= dcd->dc.num_regions) {
+error_setg(errp, "region id is too large");
+return;
+}
+
+if (dpa % dcd->dc.regions[rid].block_size
+|| len % dcd->dc.regions[rid].block_size) {
+error_setg(errp, "dpa or len is not aligned to region block size");
+return;
+}
+
+if (dpa + len > dcd->dc.regions[rid].decode_len * 256 * 1024 * 1024) {
+error_setg(errp, "extent range is beyond the region end");
+return;
+}
+
+num_extents++;
+list = list->next;
+}
+
+i = 0;
+list = records;
+extents = g_new0(CXLDCExtent_raw, num_extents);
+while (list) {
+dpa = list->value->dpa * 1024 * 1024;
+len = list->value->len * 1024 * 1024;
+rid = list->value->region_id;
+
+extents[i].start_dpa = dpa + dcd->dc.regions[rid].base;
+extents[i].len = len;
+memset(extents[i].tag, 0, 0x10);
+extents[i].shared_seq = 0;
+
+list = list->next;
+i++;
+}
+
+/*
+ * 8.2.9.1.5
+ * All Dynamic Capacity event records shall set the Event Record
+ * Severity field in the Common Event Record Format to Informational
+ * Event. All Dynamic Capacity related events shall be logged in the
+ * Dynamic Capacity Event Log.
+ */
+cxl_assign_event_header(hdr, _capacity_uuid, flags, sizeof(dCap),
+cxl_device_get_timestamp(>cxl_dstate));
+
+dCap.type = type;
+stw_le_p(_id, hid);
+/* only

[Qemu PATCH v2 0/9] Enabling DCD emulation support in Qemu

2023-07-25 Thread Fan Ni

v1[1]->v2:

1. fix a regression issue reported by Ira[2]:
2. fix a compile warning due to uninitialized 'rip' in qmp processing function.


[1] 
https://lore.kernel.org/linux-cxl/20230724162313.34196-1-fan...@samsung.com/T/#t
[2] 
https://lore.kernel.org/linux-cxl/64bfe7b090843_12757b2945b@iweiny-mobl.notmuch/T/#m09983a3dbaa9135a850e345d86714bf2ab957ef6

Fan Ni (9):
  hw/cxl/cxl-mailbox-utils: Add dc_event_log_size field to output
payload of identify memory device command
  hw/cxl/cxl-mailbox-utils: Add dynamic capacity region representative
and mailbox command support
  include/hw/cxl/cxl_device: Rename mem_size as static_mem_size for
type3 memory devices
  hw/mem/cxl_type3: Add support to create DC regions to type3 memory
devices
  hw/mem/cxl_type3: Add host backend and address space handling for DC
regions
  hw/mem/cxl_type3: Add DC extent list representative and get DC extent
list mailbox support
  hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release
dynamic capacity response
  hw/cxl/events: Add qmp interfaces to add/release dynamic capacity
extents
  hw/mem/cxl_type3: Add dpa range validation for accesses to dc regions

 hw/cxl/cxl-mailbox-utils.c  | 424 +++-
 hw/mem/cxl_type3.c  | 545 +---
 hw/mem/cxl_type3_stubs.c|   6 +
 include/hw/cxl/cxl_device.h |  50 +++-
 include/hw/cxl/cxl_events.h |  16 ++
 qapi/cxl.json   |  49 
 6 files changed, 1044 insertions(+), 46 deletions(-)

-- 
2.25.1

[Qemu PATCH v2 7/9] hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release dynamic capacity response

2023-07-25 Thread Fan Ni

From: Fan Ni 

Per CXL spec 3.0, two mailbox commands are implemented:
Add Dynamic Capacity Response (Opcode 4802h) 8.2.9.8.9.3, and
Release Dynamic Capacity (Opcode 4803h) 8.2.9.8.9.4.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  | 253 
 include/hw/cxl/cxl_device.h |   3 +-
 2 files changed, 255 insertions(+), 1 deletion(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 3d25a9697e..1e4944da95 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -84,6 +84,8 @@ enum {
 DCD_CONFIG  = 0x48, /*r3.0: 8.2.9.8.9*/
 #define GET_DC_CONFIG  0x0
 #define GET_DYN_CAP_EXT_LIST   0x1
+#define ADD_DYN_CAP_RSP0x2
+#define RELEASE_DYN_CAP0x3
 PHYSICAL_SWITCH = 0x51
 #define IDENTIFY_SWITCH_DEVICE  0x0
 };
@@ -1086,6 +1088,251 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(struct 
cxl_cmd *cmd,
 return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * Check whether the bits at addr between [nr, nr+size) are all set,
+ * return 1 if all 1s, else return 0
+ */
+static inline int test_bits(const unsigned long *addr, int nr, int size)
+{
+unsigned long res = find_next_zero_bit(addr, size + nr, nr);
+
+return (res >= nr + size) ? 1 : 0;
+}
+
+/*
+ * Find dynamic capacity region id based on dpa range [dpa, dpa+len)
+ */
+static uint8_t find_region_id(struct CXLType3Dev *dev, uint64_t dpa,
+uint64_t len)
+{
+int8_t i = dev->dc.num_regions - 1;
+
+while (i > 0 && dpa < dev->dc.regions[i].base) {
+i--;
+}
+
+if (dpa < dev->dc.regions[i].base
+|| dpa + len > dev->dc.regions[i].base + dev->dc.regions[i].len) {
+return dev->dc.num_regions;
+}
+
+return i;
+}
+
+static void insert_extent_to_extent_list(CXLDCDExtentList *list, uint64_t dpa,
+uint64_t len, uint8_t *tag, uint16_t shared_seq)
+{
+CXLDCD_Extent *extent;
+extent = g_new0(CXLDCD_Extent, 1);
+extent->start_dpa = dpa;
+extent->len = len;
+if (tag) {
+memcpy(extent->tag, tag, 0x10);
+} else {
+memset(extent->tag, 0, 0x10);
+}
+extent->shared_seq = shared_seq;
+
+QTAILQ_INSERT_TAIL(list, extent, node);
+}
+
+typedef struct updated_dc_extent_list_in_pl {
+uint32_t num_entries_updated;
+uint8_t rsvd[4];
+struct { /* r3.0: Table 8-130 */
+uint64_t start_dpa;
+uint64_t len;
+uint8_t rsvd[8];
+} QEMU_PACKED updated_entries[];
+} QEMU_PACKED updated_dc_extent_list_in_pl;
+
+/*
+ * The function only check the input extent list against itself.
+ */
+static CXLRetCode detect_malformed_extent_list(CXLType3Dev *dev,
+const updated_dc_extent_list_in_pl *in)
+{
+unsigned long *blk_bitmap;
+uint64_t min_block_size = dev->dc.regions[0].block_size;
+struct CXLDCD_Region *region = >dc.regions[0];
+uint32_t i;
+uint64_t dpa, len;
+uint8_t rid;
+CXLRetCode ret;
+
+for (i = 1; i < dev->dc.num_regions; i++) {
+region = >dc.regions[i];
+if (min_block_size > region->block_size) {
+min_block_size = region->block_size;
+}
+}
+
+blk_bitmap = bitmap_new((region->len + region->base
+- dev->dc.regions[0].base) / min_block_size);
+
+for (i = 0; i < in->num_entries_updated; i++) {
+dpa = in->updated_entries[i].start_dpa;
+len = in->updated_entries[i].len;
+
+rid = find_region_id(dev, dpa, len);
+if (rid == dev->dc.num_regions) {
+ret = CXL_MBOX_INVALID_PA;
+goto out;
+}
+
+region = >dc.regions[rid];
+if (dpa % region->block_size || len % region->block_size) {
+ret = CXL_MBOX_INVALID_EXTENT_LIST;
+goto out;
+}
+/* the dpa range already covered by some other extents in the list */
+if (test_bits(blk_bitmap, dpa / min_block_size, len / min_block_size)) 
{
+ret = CXL_MBOX_INVALID_EXTENT_LIST;
+goto out;
+}
+bitmap_set(blk_bitmap, dpa / min_block_size, len / min_block_size);
+   }
+
+ret = CXL_MBOX_SUCCESS;
+
+out:
+g_free(blk_bitmap);
+return ret;
+}
+
+/*
+ * cxl spec 3.0: 8.2.9.8.9.3
+ * Add Dynamic Capacity Response (opcode 4802h)
+ * Assume an extent is added only after the response is processed successfully
+ * TODO: for better extent list validation, a better solution would be
+ * maintaining a pending extent list and use it to verify the extent list in
+ * the response.
+ */
+static CXLRetCode cmd_dcd_add_dyn_cap_rsp(struct cxl_cmd *cmd,
+CXLDeviceState *cxl_dstate, uint16_t *len_unused)
+{
+updated_dc_extent_list_in_pl *in = (void *)cmd->payload;
+struct CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev,
+cxl_dstate);
+CXLDCDExtentList *extent_list = >dc.extents;
+CXLDCD_Extent *ent;
+uint32_t i;
+uint64_t dpa, len;
+CXLRetCode ret;
+ 
+if

[Qemu PATCH v2 6/9] hw/mem/cxl_type3: Add DC extent list representative and get DC extent list mailbox support

2023-07-25 Thread Fan Ni

From: Fan Ni 

Add dynamic capacity extent list representative to the definition of
CXLType3Dev and add get DC extent list mailbox command per
CXL.spec.3.0:.8.2.9.8.9.2.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  | 71 +
 hw/mem/cxl_type3.c  |  1 +
 include/hw/cxl/cxl_device.h | 23 
 3 files changed, 95 insertions(+)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 0511b8e6f7..3d25a9697e 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -83,6 +83,7 @@ enum {
 #define CLEAR_POISON   0x2
 DCD_CONFIG  = 0x48, /*r3.0: 8.2.9.8.9*/
 #define GET_DC_CONFIG  0x0
+#define GET_DYN_CAP_EXT_LIST   0x1
 PHYSICAL_SWITCH = 0x51
 #define IDENTIFY_SWITCH_DEVICE  0x0
 };
@@ -1018,6 +1019,73 @@ static CXLRetCode cmd_dcd_get_dyn_cap_config(struct 
cxl_cmd *cmd,
 return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * cxl spec 3.0: 8.2.9.8.9.2
+ * Get Dynamic Capacity Extent List (Opcode 4810h)
+ */
+static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(struct cxl_cmd *cmd,
+CXLDeviceState *cxl_dstate,
+uint16_t *len)
+{
+struct get_dyn_cap_ext_list_in_pl {
+uint32_t extent_cnt;
+uint32_t start_extent_id;
+} QEMU_PACKED;
+
+struct get_dyn_cap_ext_list_out_pl {
+uint32_t count;
+uint32_t total_extents;
+uint32_t generation_num;
+uint8_t rsvd[4];
+CXLDCExtent_raw records[];
+} QEMU_PACKED;
+
+struct get_dyn_cap_ext_list_in_pl *in = (void *)cmd->payload;
+struct get_dyn_cap_ext_list_out_pl *out = (void *)cmd->payload;
+struct CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev,
+cxl_dstate);
+uint16_t record_count = 0, i = 0, record_done = 0;
+CXLDCDExtentList *extent_list = >dc.extents;
+CXLDCD_Extent *ent;
+uint16_t out_pl_len;
+uint32_t start_extent_id = in->start_extent_id;
+
+if (start_extent_id > ct3d->dc.total_extent_count) {
+return CXL_MBOX_INVALID_INPUT;
+}
+
+record_count = MIN(in->extent_cnt,
+ct3d->dc.total_extent_count - start_extent_id);
+
+out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
+/* May need more processing here in the future */
+assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
+
+memset(out, 0, out_pl_len);
+stl_le_p(>count, record_count);
+stl_le_p(>total_extents, ct3d->dc.total_extent_count);
+stl_le_p(>generation_num, ct3d->dc.ext_list_gen_seq);
+
+if (record_count > 0) {
+QTAILQ_FOREACH(ent, extent_list, node) {
+if (i++ < start_extent_id) {
+continue;
+}
+stq_le_p(>records[record_done].start_dpa, ent->start_dpa);
+stq_le_p(>records[record_done].len, ent->len);
+memcpy(>records[record_done].tag, ent->tag, 0x10);
+stw_le_p(>records[record_done].shared_seq, ent->shared_seq);
+record_done++;
+if (record_done == record_count) {
+break;
+}
+}
+}
+
+*len = out_pl_len;
+return CXL_MBOX_SUCCESS;
+}
+
 #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
 #define IMMEDIATE_DATA_CHANGE (1 << 2)
 #define IMMEDIATE_POLICY_CHANGE (1 << 3)
@@ -1058,6 +1126,9 @@ static struct cxl_cmd cxl_cmd_set[256][256] = {
 cmd_media_clear_poison, 72, 0 },
 [DCD_CONFIG][GET_DC_CONFIG] = { "DCD_GET_DC_CONFIG",
 cmd_dcd_get_dyn_cap_config, 2, 0 },
+[DCD_CONFIG][GET_DYN_CAP_EXT_LIST] = {
+"DCD_GET_DYNAMIC_CAPACITY_EXTENT_LIST", cmd_dcd_get_dyn_cap_ext_list,
+8, 0 },
 };
 
 static struct cxl_cmd cxl_cmd_set_sw[256][256] = {
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 76bbd9f785..f1170b8047 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -789,6 +789,7 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
 
 region_base += region->len;
 }
+QTAILQ_INIT(>dc.extents);
 
 return 0;
 }
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 1c99b05a66..3a338b3b37 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -385,6 +385,25 @@ typedef QLIST_HEAD(, CXLPoison) CXLPoisonList;
 
 #define DCD_MAX_REGION_NUM 8
 
+typedef struct CXLDCD_Extent_raw {
+uint64_t start_dpa;
+uint64_t len;
+uint8_t tag[0x10];
+uint16_t shared_seq;
+uint8_t rsvd[0x6];
+} QEMU_PACKED CXLDCExtent_raw;
+
+typedef struct CXLDCD_Extent {
+uint64_t start_dpa;
+uint64_t len;
+uint8_t tag[0x10];
+uint16_t shared_seq;
+uint8_t rsvd[0x6];
+
+QTAILQ_ENTRY(CXLDCD_Extent) node;
+} CXLDCD_Extent;
+typedef QTAILQ_HEAD(, CXLDCD_Extent) CXLDCDExtentList;
+
 typedef struct CXLDCD_Region {
 uint64_t base;
 uint64_t decode_len; /* in multiples of 256MB */
@@ -433,6 +452,10 @@ struct CXLType3Dev {
 
 uint8_t num_regions; /* 0-8 regions */
 struct

[Qemu PATCH v2 1/9] hw/cxl/cxl-mailbox-utils: Add dc_event_log_size field to output payload of identify memory device command

2023-07-25 Thread Fan Ni

From: Fan Ni 

Based on CXL spec 3.0 Table 8-94 (Identify Memory Device Output
Payload), dynamic capacity event log size should be part of
output of the Identify command.
Add dc_event_log_size to the output payload for the host to get the info.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index ad7a6116e4..b013e30314 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -21,6 +21,8 @@
 #include "sysemu/hostmem.h"
 
 #define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
+/* Experimental value: dynamic capacity event log size */
+#define CXL_DC_EVENT_LOG_SIZE 8
 
 /*
  * How to add a new command, example. The command set FOO, with cmd BAR.
@@ -519,8 +521,9 @@ static CXLRetCode cmd_identify_memory_device(struct cxl_cmd 
*cmd,
 uint16_t inject_poison_limit;
 uint8_t poison_caps;
 uint8_t qos_telemetry_caps;
+uint16_t dc_event_log_size;
 } QEMU_PACKED *id;
-QEMU_BUILD_BUG_ON(sizeof(*id) != 0x43);
+QEMU_BUILD_BUG_ON(sizeof(*id) != 0x45);
 
 CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
 CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
@@ -543,6 +546,7 @@ static CXLRetCode cmd_identify_memory_device(struct cxl_cmd 
*cmd,
 st24_le_p(id->poison_list_max_mer, 256);
 /* No limit - so limited by main poison record limit */
 stw_le_p(>inject_poison_limit, 0);
+stw_le_p(>dc_event_log_size, CXL_DC_EVENT_LOG_SIZE);
 
 *len = sizeof(*id);
 return CXL_MBOX_SUCCESS;
-- 
2.25.1

[Qemu PATCH v2 4/9] hw/mem/cxl_type3: Add support to create DC regions to type3 memory devices

2023-07-25 Thread Fan Ni

From: Fan Ni 

With the change, when setting up memory for type3 memory device, we can
create DC regions
A property 'num-dc-regions' is added to ct3_props to allow users to pass the
number of DC regions to create. To make it easier, other region parameters
like region base, length, and block size are hard coded. If needed,
these parameters can be added easily.

With the change, we can create DC regions with proper kernel side
support as below:

region=$(cat /sys/bus/cxl/devices/decoder0.0/create_dc_region)
echo $region> /sys/bus/cxl/devices/decoder0.0/create_dc_region
echo 256 > /sys/bus/cxl/devices/$region/interleave_granularity
echo 1 > /sys/bus/cxl/devices/$region/interleave_ways

echo "dc0" >/sys/bus/cxl/devices/decoder2.0/mode
echo 0x4000 >/sys/bus/cxl/devices/decoder2.0/dpa_size

echo 0x4000 > /sys/bus/cxl/devices/$region/size
echo  "decoder2.0" > /sys/bus/cxl/devices/$region/target0
echo 1 > /sys/bus/cxl/devices/$region/commit
echo $region > /sys/bus/cxl/drivers/cxl_region/bind

Signed-off-by: Fan Ni 
---
 hw/mem/cxl_type3.c | 33 +
 1 file changed, 33 insertions(+)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 3d7acffcb7..b29bb2309a 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -707,6 +707,34 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, 
uint64_t value,
 }
 }
 
+/*
+ * Create a dc region to test "Get Dynamic Capacity Configuration" command.
+ */
+static int cxl_create_dc_regions(CXLType3Dev *ct3d)
+{
+int i;
+uint64_t region_base = (ct3d->hostvmem ? ct3d->hostvmem->size : 0)
++ (ct3d->hostpmem ? ct3d->hostpmem->size : 0);
+uint64_t region_len = (uint64_t)2 * 1024 * 1024 * 1024;
+uint64_t decode_len = 4; /* 4*256MB */
+uint64_t blk_size = 2 * 1024 * 1024;
+struct CXLDCD_Region *region;
+
+for (i = 0; i < ct3d->dc.num_regions; i++) {
+region = >dc.regions[i];
+region->base = region_base;
+region->decode_len = decode_len;
+region->len = region_len;
+region->block_size = blk_size;
+/* dsmad_handle is set when creating cdat table entries */
+region->flags = 0;
+
+region_base += region->len;
+}
+
+return 0;
+}
+
 static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
 {
 DeviceState *ds = DEVICE(ct3d);
@@ -775,6 +803,10 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error 
**errp)
 g_free(p_name);
 }
 
+if (cxl_create_dc_regions(ct3d)) {
+return false;
+}
+
 return true;
 }
 
@@ -1068,6 +1100,7 @@ static Property ct3_props[] = {
 DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL),
 DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
 DEFINE_PROP_UINT16("spdm", CXLType3Dev, spdm_port, 0),
+DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
2.25.1

Re: [PATCH v9 08/10] migration: Implement MigrateChannelList to qmp migration flow.

2023-07-25 Thread Daniel P . Berrangé

On Fri, Jul 21, 2023 at 02:49:34PM +, Het Gala wrote:
> Integrate MigrateChannelList with all transport backends
> (socket, exec and rdma) for both src and dest migration
> endpoints for qmp migration.
> 
> For current series, limit the size of MigrateChannelList
> to single element (single interface) as runtime check.
> 
> Suggested-by: Aravind Retnakaran 
> Signed-off-by: Het Gala 
> ---
>  migration/migration.c | 77 ---
>  1 file changed, 50 insertions(+), 27 deletions(-)
> 
> diff --git a/migration/migration.c b/migration/migration.c
> index 6e0a8beaf2..acf80b3590 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -426,9 +426,10 @@ void migrate_add_address(SocketAddress *address)
>  }
>  
>  static bool migrate_uri_parse(const char *uri,
> -  MigrationAddress **channel,
> +  MigrationChannel **channel,
>Error **errp)
>  {
> +g_autoptr(MigrationChannel) val = g_new0(MigrationChannel, 1);
>  g_autoptr(MigrationAddress) addr = g_new0(MigrationAddress, 1);
>  SocketAddress *saddr = >u.socket;
>  InetSocketAddress *isock = >u.rdma;
> @@ -465,7 +466,9 @@ static bool migrate_uri_parse(const char *uri,
>  return false;
>  }
>  
> -*channel = addr;
> +val->channel_type = MIGRATION_CHANNEL_TYPE_MAIN;
> +val->addr = addr;
> +*channel = val;
>  return true;
>  }
>  
> @@ -473,7 +476,8 @@ static void qemu_start_incoming_migration(const char 
> *uri, bool has_channels,
>MigrationChannelList *channels,
>Error **errp)
>  {
> -g_autoptr(MigrationAddress) channel = g_new0(MigrationAddress, 1);
> +g_autoptr(MigrationChannel) channel = g_new0(MigrationChannel, 1);
> +g_autoptr(MigrationAddress) addr = g_new0(MigrationAddress, 1);
>  
>  /*
>   * Having preliminary checks for uri and channel
> @@ -483,20 +487,29 @@ static void qemu_start_incoming_migration(const char 
> *uri, bool has_channels,
> "exclusive; exactly one of the two should be present in "
> "'migrate-incoming' qmp command ");
>  return;
> +} else if (channels) {
> +/* To verify that Migrate channel list has only item */
> +if (channels->next) {
> +error_setg(errp, "Channel list has more than one entries");
> +return;
> +}
> +channel = channels->value;
> +} else {
> +/* caller uses the old URI syntax */
> +if (uri && !migrate_uri_parse(uri, , errp)) {
> +return;
> +}
>  }
> -
> -if (uri && !migrate_uri_parse(uri, , errp)) {
> -return;
> -}
> +addr = channel->addr;
>  
>  /* transport mechanism not suitable for migration? */
> -if (!migration_channels_and_transport_compatible(channel, errp)) {
> +if (!migration_channels_and_transport_compatible(addr, errp)) {
>  return;
>  }
>  
>  qapi_event_send_migration(MIGRATION_STATUS_SETUP);
> -if (channel->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
> -SocketAddress *saddr = >u.socket;
> +if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
> +SocketAddress *saddr = >u.socket;
>  if (saddr->type == SOCKET_ADDRESS_TYPE_INET ||
>  saddr->type == SOCKET_ADDRESS_TYPE_UNIX ||
>  saddr->type == SOCKET_ADDRESS_TYPE_VSOCK) {
> @@ -505,11 +518,11 @@ static void qemu_start_incoming_migration(const char 
> *uri, bool has_channels,
>  fd_start_incoming_migration(saddr->u.fd.str, errp);
>  }
>  #ifdef CONFIG_RDMA
> -} else if (channel->transport == MIGRATION_ADDRESS_TYPE_RDMA) {
> -rdma_start_incoming_migration(>u.rdma, errp);
> -#endif
> -} else if (channel->transport == MIGRATION_ADDRESS_TYPE_EXEC) {
> -exec_start_incoming_migration(channel->u.exec.args, errp);
> +} else if (addr->transport == MIGRATION_ADDRESS_TYPE_RDMA) {
> +rdma_start_incoming_migration(>u.rdma, errp);
> + #endif
> +} else if (addr->transport == MIGRATION_ADDRESS_TYPE_EXEC) {
> +exec_start_incoming_migration(addr->u.exec.args, errp);
>  } else {
>  error_setg(errp, "unknown migration protocol: %s", uri);
>  }
> @@ -1709,7 +1722,8 @@ void qmp_migrate(const char *uri, bool has_channels,
>  bool resume_requested;
>  Error *local_err = NULL;
>  MigrationState *s = migrate_get_current();
> -g_autoptr(MigrationAddress) channel = g_new0(MigrationAddress, 1);
> +g_autoptr(MigrationChannel) channel = g_new0(MigrationChannel, 1);
> +g_autoptr(MigrationAddress) addr = g_new0(MigrationAddress, 1);
>  
>  /*
>   * Having preliminary checks for uri and channel
> @@ -1719,14 +1733,23 @@ void qmp_migrate(const char *uri, bool has_channels,
> "exclusive; exactly one of the two should

Re: [PATCH v9 06/10] migration: New migrate and migrate-incoming argument 'channels'

2023-07-25 Thread Daniel P . Berrangé

On Tue, Jul 25, 2023 at 07:34:09PM +0100, Daniel P. Berrangé wrote:
> On Fri, Jul 21, 2023 at 02:49:31PM +, Het Gala wrote:
> > MigrateChannelList allows to connect accross multiple interfaces.
> > Add MigrateChannelList struct as argument to migration QAPIs.
> > 
> > We plan to include multiple channels in future, to connnect
> > multiple interfaces. Hence, we choose 'MigrateChannelList'
> > as the new argument over 'MigrateChannel' to make migration
> > QAPIs future proof.
> > 
> > Suggested-by: Aravind Retnakaran 
> > Signed-off-by: Het Gala 
> > Acked-by: Markus Armbruster 
> > ---
> >  migration/migration-hmp-cmds.c |   6 +-
> >  migration/migration.c  |  34 --
> >  qapi/migration.json| 109 -
> >  softmmu/vl.c   |   2 +-
> >  4 files changed, 139 insertions(+), 12 deletions(-)
> > 
> > diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
> > index 9885d7c9f7..49b150f33f 100644
> > --- a/migration/migration-hmp-cmds.c
> > +++ b/migration/migration-hmp-cmds.c
> > @@ -424,7 +424,7 @@ void hmp_migrate_incoming(Monitor *mon, const QDict 
> > *qdict)
> >  Error *err = NULL;
> >  const char *uri = qdict_get_str(qdict, "uri");
> >  
> > -qmp_migrate_incoming(uri, );
> > +qmp_migrate_incoming(uri, false, NULL, );
> >  
> >  hmp_handle_error(mon, err);
> >  }
> > @@ -705,8 +705,8 @@ void hmp_migrate(Monitor *mon, const QDict *qdict)
> >  const char *uri = qdict_get_str(qdict, "uri");
> >  Error *err = NULL;
> >  
> > -qmp_migrate(uri, !!blk, blk, !!inc, inc,
> > -false, false, true, resume, );
> > +qmp_migrate(uri, false, NULL, !!blk, blk, !!inc, inc,
> > + false, false, true, resume, );
> >  if (hmp_handle_error(mon, err)) {
> >  return;
> >  }
> > diff --git a/migration/migration.c b/migration/migration.c
> > index f37b388876..bd3a93fc8c 100644
> > --- a/migration/migration.c
> > +++ b/migration/migration.c
> > @@ -466,10 +466,22 @@ static bool migrate_uri_parse(const char *uri,
> >  return true;
> >  }
> >  
> > -static void qemu_start_incoming_migration(const char *uri, Error **errp)
> > +static void qemu_start_incoming_migration(const char *uri, bool 
> > has_channels,
> > +  MigrationChannelList *channels,
> > +  Error **errp)
> >  {
> >  g_autoptr(MigrationAddress) channel = g_new0(MigrationAddress, 1);
> >  
> > +/*
> > + * Having preliminary checks for uri and channel
> > + */
> > +if (uri && has_channels) {
> > +error_setg(errp, "'uri' and 'channels' arguments are mutually "
> > +   "exclusive; exactly one of the two should be present in 
> > "
> > +   "'migrate-incoming' qmp command ");
> > +return;
> > +}
> 
> This checks is both are present.
> 
> Also needs a check if neither are present as that's invalid.

Also it should (temporarily) raise an error if "has_channels" is
set, as while we've added the parameter in QAPI, we've not
implemented it yet. IOW, raise an error now, and remove the
error in a later patch.

> 
> 
> > @@ -1694,6 +1708,16 @@ void qmp_migrate(const char *uri, bool has_blk, bool 
> > blk,
> >  MigrationState *s = migrate_get_current();
> >  g_autoptr(MigrationAddress) channel = g_new0(MigrationAddress, 1);
> >  
> > +/*
> > + * Having preliminary checks for uri and channel
> > + */
> > +if (uri && has_channels) {
> > +error_setg(errp, "'uri' and 'channels' arguments are mutually "
> > +   "exclusive; exactly one of the two should be present in 
> > "
> > +   "'migrate' qmp command ");
> > +return;
> > +}
> 
> Same here 
> 
> 
> With regards,
> Daniel
> -- 
> |: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|
> 
> 

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH v2 6/6] python/machine: remove unused sock_dir argument

2023-07-25 Thread Daniel P . Berrangé

On Tue, Jul 25, 2023 at 02:33:36PM -0400, John Snow wrote:
> On Tue, Jul 25, 2023 at 2:26 PM Daniel P. Berrangé  
> wrote:
> >
> > On Tue, Jul 25, 2023 at 02:03:37PM -0400, John Snow wrote:
> > > By using a socketpair for all of the sockets managed by the VM class and
> > > its extensions, we don't need the sock_dir argument anymore, so remove
> > > it.
> > >
> > > We only added this argument so that we could specify a second, shorter
> > > temporary directory for cases where the temp/log dirs were "too long" as
> > > a socket name on macOS. We don't need it for this class now. In one
> > > case, avocado testing takes over responsibility for creating an
> > > appropriate sockdir.
> > >
> > > Signed-off-by: John Snow 
> > > ---
> > >  python/qemu/machine/machine.py | 18 --
> > >  python/qemu/machine/qtest.py   |  5 +
> > >  tests/avocado/acpi-bits.py |  5 +
> > >  tests/avocado/avocado_qemu/__init__.py |  2 +-
> > >  tests/avocado/machine_aspeed.py|  5 -
> > >  tests/qemu-iotests/iotests.py  |  2 +-
> > >  tests/qemu-iotests/tests/copy-before-write |  3 +--
> > >  7 files changed, 9 insertions(+), 31 deletions(-)
> >
> > Reviewed-by: Daniel P. Berrangé 
> >
> 
> Thanks!
> 
> I don't know if we want this for *this* cycle or not, it's "only
> testing code" and it should hopefully be harmless. If it makes the
> tests more reliable, it might be worth it. I don't have strong
> feelings one way or the other, we've lived without it for so long
> as-is.
> 
> I'll see what Peter says.

Although it does affect end users, the biggest impact is our own
CI. Once the release is out it would help CI on stable trees,
but the big win is CI on master.

I'd verge towards skipping this during freeze and applying to
master after release. Then propose cherry-pick to stable once
it has had some soak time in our real CI.


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH 06/14] target/arm/ptw: Pass an ARMSecuritySpace to arm_hcr_el2_eff_secstate()

2023-07-25 Thread Richard Henderson


On 7/24/23 07:38, Peter Maydell wrote:

Does it? HCR_EL2 says "behaves as 0 if EL2 is not enabled in the
current Security state". If the current Security state is Root then
EL2 isn't enabled (because there's no such thing as EL2 Root), so the
function should return 0, shouldn't it?


I guess there's an argument that what the spec really means is
"the security state described by the current effective value
of SCR_EL3.{NSE,NS}" (to steal language from the docs of the
AT operations), though.


Yes, that's how I read it.


r~

Re: [PATCH v2 6/6] python/machine: remove unused sock_dir argument

2023-07-25 Thread John Snow

On Tue, Jul 25, 2023 at 2:26 PM Daniel P. Berrangé  wrote:
>
> On Tue, Jul 25, 2023 at 02:03:37PM -0400, John Snow wrote:
> > By using a socketpair for all of the sockets managed by the VM class and
> > its extensions, we don't need the sock_dir argument anymore, so remove
> > it.
> >
> > We only added this argument so that we could specify a second, shorter
> > temporary directory for cases where the temp/log dirs were "too long" as
> > a socket name on macOS. We don't need it for this class now. In one
> > case, avocado testing takes over responsibility for creating an
> > appropriate sockdir.
> >
> > Signed-off-by: John Snow 
> > ---
> >  python/qemu/machine/machine.py | 18 --
> >  python/qemu/machine/qtest.py   |  5 +
> >  tests/avocado/acpi-bits.py |  5 +
> >  tests/avocado/avocado_qemu/__init__.py |  2 +-
> >  tests/avocado/machine_aspeed.py|  5 -
> >  tests/qemu-iotests/iotests.py  |  2 +-
> >  tests/qemu-iotests/tests/copy-before-write |  3 +--
> >  7 files changed, 9 insertions(+), 31 deletions(-)
>
> Reviewed-by: Daniel P. Berrangé 
>

Thanks!

I don't know if we want this for *this* cycle or not, it's "only
testing code" and it should hopefully be harmless. If it makes the
tests more reliable, it might be worth it. I don't have strong
feelings one way or the other, we've lived without it for so long
as-is.

I'll see what Peter says.

--js

> With regards,
> Daniel
> --
> |: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|
>

Re: [PATCH v9 06/10] migration: New migrate and migrate-incoming argument 'channels'

2023-07-25 Thread Daniel P . Berrangé

On Fri, Jul 21, 2023 at 02:49:31PM +, Het Gala wrote:
> MigrateChannelList allows to connect accross multiple interfaces.
> Add MigrateChannelList struct as argument to migration QAPIs.
> 
> We plan to include multiple channels in future, to connnect
> multiple interfaces. Hence, we choose 'MigrateChannelList'
> as the new argument over 'MigrateChannel' to make migration
> QAPIs future proof.
> 
> Suggested-by: Aravind Retnakaran 
> Signed-off-by: Het Gala 
> Acked-by: Markus Armbruster 
> ---
>  migration/migration-hmp-cmds.c |   6 +-
>  migration/migration.c  |  34 --
>  qapi/migration.json| 109 -
>  softmmu/vl.c   |   2 +-
>  4 files changed, 139 insertions(+), 12 deletions(-)
> 
> diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
> index 9885d7c9f7..49b150f33f 100644
> --- a/migration/migration-hmp-cmds.c
> +++ b/migration/migration-hmp-cmds.c
> @@ -424,7 +424,7 @@ void hmp_migrate_incoming(Monitor *mon, const QDict 
> *qdict)
>  Error *err = NULL;
>  const char *uri = qdict_get_str(qdict, "uri");
>  
> -qmp_migrate_incoming(uri, );
> +qmp_migrate_incoming(uri, false, NULL, );
>  
>  hmp_handle_error(mon, err);
>  }
> @@ -705,8 +705,8 @@ void hmp_migrate(Monitor *mon, const QDict *qdict)
>  const char *uri = qdict_get_str(qdict, "uri");
>  Error *err = NULL;
>  
> -qmp_migrate(uri, !!blk, blk, !!inc, inc,
> -false, false, true, resume, );
> +qmp_migrate(uri, false, NULL, !!blk, blk, !!inc, inc,
> + false, false, true, resume, );
>  if (hmp_handle_error(mon, err)) {
>  return;
>  }
> diff --git a/migration/migration.c b/migration/migration.c
> index f37b388876..bd3a93fc8c 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -466,10 +466,22 @@ static bool migrate_uri_parse(const char *uri,
>  return true;
>  }
>  
> -static void qemu_start_incoming_migration(const char *uri, Error **errp)
> +static void qemu_start_incoming_migration(const char *uri, bool has_channels,
> +  MigrationChannelList *channels,
> +  Error **errp)
>  {
>  g_autoptr(MigrationAddress) channel = g_new0(MigrationAddress, 1);
>  
> +/*
> + * Having preliminary checks for uri and channel
> + */
> +if (uri && has_channels) {
> +error_setg(errp, "'uri' and 'channels' arguments are mutually "
> +   "exclusive; exactly one of the two should be present in "
> +   "'migrate-incoming' qmp command ");
> +return;
> +}

This checks is both are present.

Also needs a check if neither are present as that's invalid.


> @@ -1694,6 +1708,16 @@ void qmp_migrate(const char *uri, bool has_blk, bool 
> blk,
>  MigrationState *s = migrate_get_current();
>  g_autoptr(MigrationAddress) channel = g_new0(MigrationAddress, 1);
>  
> +/*
> + * Having preliminary checks for uri and channel
> + */
> +if (uri && has_channels) {
> +error_setg(errp, "'uri' and 'channels' arguments are mutually "
> +   "exclusive; exactly one of the two should be present in "
> +   "'migrate' qmp command ");
> +return;
> +}

Same here 


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH v2 6/6] python/machine: remove unused sock_dir argument

2023-07-25 Thread Daniel P . Berrangé

On Tue, Jul 25, 2023 at 02:03:37PM -0400, John Snow wrote:
> By using a socketpair for all of the sockets managed by the VM class and
> its extensions, we don't need the sock_dir argument anymore, so remove
> it.
> 
> We only added this argument so that we could specify a second, shorter
> temporary directory for cases where the temp/log dirs were "too long" as
> a socket name on macOS. We don't need it for this class now. In one
> case, avocado testing takes over responsibility for creating an
> appropriate sockdir.
> 
> Signed-off-by: John Snow 
> ---
>  python/qemu/machine/machine.py | 18 --
>  python/qemu/machine/qtest.py   |  5 +
>  tests/avocado/acpi-bits.py |  5 +
>  tests/avocado/avocado_qemu/__init__.py |  2 +-
>  tests/avocado/machine_aspeed.py|  5 -
>  tests/qemu-iotests/iotests.py  |  2 +-
>  tests/qemu-iotests/tests/copy-before-write |  3 +--
>  7 files changed, 9 insertions(+), 31 deletions(-)

Reviewed-by: Daniel P. Berrangé 

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH v2 5/6] python/machine: use socketpair() for qtest connection

2023-07-25 Thread Daniel P . Berrangé

On Tue, Jul 25, 2023 at 02:03:36PM -0400, John Snow wrote:
> Like the QMP and console sockets, begin using socketpairs for the qtest
> connection, too. After this patch, we'll be able to remove the vestigial
> sock_dir argument, but that cleanup is best done in its own patch.
> 
> Signed-off-by: John Snow 
> ---
>  python/qemu/machine/qtest.py | 49 +---
>  1 file changed, 40 insertions(+), 9 deletions(-)

Reviewed-by: Daniel P. Berrangé 

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH v2 0/7] migration: Better error handling in return path thread

2023-07-25 Thread Fabiano Rosas

Fabiano Rosas  writes:

> Peter Xu  writes:
>
>> v2:
>> - Patch "migration: Provide explicit error message for file shutdowns"
>>   - Touched up qapi doc [Fabiano]
>>   - Added Bugzilla link to commit which I didn't even notice that I was
>> fixing a bug.. but rightfully pointed out by Laszlo.
>>   - Moved it to the 1st patch because it fixes a bug, please consider
>> review and merge it earlier.
>>
>> This is a small series that reworks error handling of postcopy return path
>> threads.
>>
>> We used to contain a bunch of error_report(), converting them into
>> error_setg() properly and deliver any of those errors to migration generic
>> error reports (via migrate_set_error()).  Then these errors can also be
>> observed in query-migrate after postcopy is paused.
>>
>> Dropped the return-path specific error reporting: mark_source_rp_bad(),
>> because it's a duplication if we can always use migrate_set_error().
>>
>> Please have a look, thanks.
>>
>> Peter Xu (7):
>>   migration: Display error in query-migrate irrelevant of status
>>   migration: Let migrate_set_error() take ownership
>>   migration: Introduce migrate_has_error()
>>   migration: Refactor error handling in source return path
>>   migration: Deliver return path file error to migrate state too
>>   qemufile: Always return a verbose error
>>   migration: Provide explicit error message for file shutdowns
>>
>>  qapi/migration.json  |   5 +-
>>  migration/migration.h|   8 +-
>>  migration/ram.h  |   5 +-
>>  migration/channel.c  |   1 -
>>  migration/migration.c| 168 +++
>>  migration/multifd.c  |  10 +--
>>  migration/postcopy-ram.c |   1 -
>>  migration/qemu-file.c|  20 -
>>  migration/ram.c  |  42 +-
>>  migration/trace-events   |   2 +-
>>  10 files changed, 149 insertions(+), 113 deletions(-)
>
> Hi Peter,
>
> Were you aiming at solving any specific bug with this series? I'm seeing
> a bug on master (361d5397355) with the
> /x86_64/migration/postcopy/preempt/recovery/plain test around the areas
> that this series touches.
>
> It happens very rarely and I'm still investigating, but in case you have
> any thoughts:
>
> 
> It seems there's a race condition between postcopy resume and the return
> path cleanup.
>
> It is possible for open_return_path_on_source() to setup the new
> QEMUFile *before* the cleanup path at source_return_path_thread() has
> had a chance to run, so we end up calling migration_release_dst_files()
> on the new file and ms->rp_state.from_dst_file gets set to NULL again,
> leading to a SIGSEGV at qemu_file_get_error(rp) due to rp being NULL.

I did some more digging and this is indeed what happens. When we pause
on the incoming side, the to_src_file is closed and the source return
path sees an error (EBADFD) which leads to the cleanup (from_dst_file =
NULL). This happens independently and without any synchronization with a
potential concurrent resume operation.

Is there a reason for not closing the return path thread and starting a
new one for resume? The from_dst_file is the only thing being changed
anyway. It would allow us to remove the retry logic along with the
problematic cleanup path and not need another synchronization point
between qmp_migrate() and the return path.

Here's the race (important bit is open_return_path happening before
migration_release_dst_files):

migration | qmp | return path
--+-+-
qmp_migrate_pause()
 shutdown(ms->to_dst_file)
  f->last_error = -EIO
migrate_detect_error()
 postcopy_pause()
  set_state(PAUSED)
  wait(postcopy_pause_sem)
qmp_migrate(resume)
migrate_fd_connect()
 resume = state == PAUSED
 open_return_path <-- TOO SOON!
 set_state(RECOVER)
 post(postcopy_pause_sem)
(incoming closes 
to_src_file)
res = 
qemu_file_get_error(rp)

migration_release_dst_files()

ms->rp_state.from_dst_file = NULL
  post(postcopy_pause_rp_sem)

postcopy_pause_return_path_thread()
  
wait(postcopy_pause_rp_sem)
rp = 
ms->rp_state.from_dst_file
goto retry
qemu_file_get_error(rp)
SIGSEGV

Re: [PATCH v2 4/6] python/machine: use socketpair() for console connections

2023-07-25 Thread Daniel P . Berrangé

On Tue, Jul 25, 2023 at 02:03:35PM -0400, John Snow wrote:
> Create a socketpair for the console output. This should help eliminate
> race conditions around console text early in the boot process that might
> otherwise have been dropped on the floor before being able to connect to
> QEMU under "server,nowait".
> 
> Signed-off-by: John Snow 
> ---
>  python/qemu/machine/machine.py | 30 +++---
>  1 file changed, 27 insertions(+), 3 deletions(-)

Reviewed-by: Daniel P. Berrangé 

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH v2 3/6] python/console_socket: accept existing FD in initializer

2023-07-25 Thread Daniel P . Berrangé

On Tue, Jul 25, 2023 at 02:03:34PM -0400, John Snow wrote:
> Useful if we want to use ConsoleSocket() for a socket created by
> socketpair().
> 
> Signed-off-by: John Snow 
> ---
>  python/qemu/machine/console_socket.py | 29 +++
>  1 file changed, 21 insertions(+), 8 deletions(-)

Reviewed-by: Daniel P. Berrangé 

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH v2 2/6] python/machine: close sock_pair in cleanup path

2023-07-25 Thread Daniel P . Berrangé

On Tue, Jul 25, 2023 at 02:03:33PM -0400, John Snow wrote:
> If everything has gone smoothly, we'll already have closed the socket we
> gave to the child during post_launch. The other half of the pair that we
> gave to the QMP connection should, likewise, be definitively closed by
> now.
> 
> However, in the cleanup path, it's possible we've created the socketpair
> but flubbed the launch and need to clean up resources. These resources
> *would* be handled by the garbage collector, but that can happen at
> unpredictable times. Nicer to just clean them up synchronously on the
> exit path, here.
> 
> Signed-off-by: John Snow 
> ---
>  python/qemu/machine/machine.py | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py
> index 8be0f684fe..26f0fb8a81 100644
> --- a/python/qemu/machine/machine.py
> +++ b/python/qemu/machine/machine.py
> @@ -395,6 +395,11 @@ def _post_shutdown(self) -> None:
>  finally:
>  assert self._qmp_connection is None
>  
> +if self._sock_pair:
> +self._sock_pair[0].close()
> +self._sock_pair[1].close()
> +self._sock_pair = None
> +

Reviewed-by: Daniel P. Berrangé 

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH v2 1/6] python/machine: move socket setup out of _base_args property

2023-07-25 Thread Daniel P . Berrangé

On Tue, Jul 25, 2023 at 02:03:32PM -0400, John Snow wrote:
> This property isn't meant to do much else besides return a list of
> strings, so move this setup back out into _pre_launch().
> 
> Signed-off-by: John Snow 
> ---
>  python/qemu/machine/machine.py | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)

Reviewed-by: Daniel P. Berrangé 


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH v6 4/6] qapi: Add HvBalloonDeviceInfo sub-type to MemoryDeviceInfo

2023-07-25 Thread Maciej S. Szmigiero


On 25.07.2023 10:25, Markus Armbruster wrote:

"Maciej S. Szmigiero"  writes:


From: "Maciej S. Szmigiero" 

Used by the hv-balloon driver to report its provided memory state
information.

Co-developed-by: David Hildenbrand 
Signed-off-by: Maciej S. Szmigiero 
---
  hw/core/machine-hmp-cmds.c | 15 +++
  qapi/machine.json  | 39 --
  2 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c
index c3e55ef9e9cd..7b06ed35decb 100644
--- a/hw/core/machine-hmp-cmds.c
+++ b/hw/core/machine-hmp-cmds.c
@@ -247,6 +247,7 @@ void hmp_info_memory_devices(Monitor *mon, const QDict 
*qdict)
  MemoryDeviceInfo *value;
  PCDIMMDeviceInfo *di;
  SgxEPCDeviceInfo *se;
+HvBalloonDeviceInfo *hi;
  
  for (info = info_list; info; info = info->next) {

  value = info->value;
@@ -304,6 +305,20 @@ void hmp_info_memory_devices(Monitor *mon, const QDict 
*qdict)
  monitor_printf(mon, "  node: %" PRId64 "\n", se->node);
  monitor_printf(mon, "  memdev: %s\n", se->memdev);
  break;
+case MEMORY_DEVICE_INFO_KIND_HV_BALLOON:
+hi = value->u.hv_balloon.data;
+monitor_printf(mon, "Memory device [%s]: \"%s\"\n",
+   MemoryDeviceInfoKind_str(value->type),
+   hi->id ? hi->id : "");
+if (hi->has_memaddr) {
+monitor_printf(mon, "  memaddr: 0x%" PRIx64 "\n",
+   hi->memaddr);
+}
+monitor_printf(mon, "  max-size: %" PRIu64 "\n", hi->max_size);
+if (hi->memdev) {
+monitor_printf(mon, "  memdev: %s\n", hi->memdev);
+}
+break;
  default:
  g_assert_not_reached();
  }
diff --git a/qapi/machine.json b/qapi/machine.json
index a08b6576cac6..5ede977cf2bc 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -1265,6 +1265,29 @@
}
  }
  
+##

+# @HvBalloonDeviceInfo:
+#
+# hv-balloon provided memory state information
+#
+# @id: device's ID
+#
+# @memaddr: physical address in memory, where device is mapped
+#
+# @max-size: the maximum size of memory that the device can provide
+#
+# @memdev: memory backend linked with device
+#
+# Since: TBD


I understand why you put in TBD here (aiming for a moving target is a
hassle), but patches not marked RFC should have no known issues that
should be fixed before merging them.


Will change TBD to 8.2 then.


+##
+{ 'struct': 'HvBalloonDeviceInfo',
+  'data': { '*id': 'str',
+'*memaddr': 'size',
+'max-size': 'size',
+'*memdev': 'str'
+  }
+}
+
  ##
  # @MemoryDeviceInfoKind:
  #
@@ -1276,10 +1299,13 @@
  #
  # @sgx-epc: since 6.2.
  #
+# @hv-balloon: since TBD.
+#
  # Since: 2.1
  ##
  { 'enum': 'MemoryDeviceInfoKind',
-  'data': [ 'dimm', 'nvdimm', 'virtio-pmem', 'virtio-mem', 'sgx-epc' ] }
+  'data': [ 'dimm', 'nvdimm', 'virtio-pmem', 'virtio-mem', 'sgx-epc',
+'hv-balloon' ] }
  
  ##

  # @PCDIMMDeviceInfoWrapper:
@@ -1313,6 +1339,14 @@
  { 'struct': 'SgxEPCDeviceInfoWrapper',
'data': { 'data': 'SgxEPCDeviceInfo' } }
  
+##

+# @HvBalloonDeviceInfoWrapper:
+#
+# Since: TBD
+##
+{ 'struct': 'HvBalloonDeviceInfoWrapper',
+  'data': { 'data': 'HvBalloonDeviceInfo' } }
+
  ##
  # @MemoryDeviceInfo:
  #
@@ -1327,7 +1361,8 @@
  'nvdimm': 'PCDIMMDeviceInfoWrapper',
  'virtio-pmem': 'VirtioPMEMDeviceInfoWrapper',
  'virtio-mem': 'VirtioMEMDeviceInfoWrapper',
-'sgx-epc': 'SgxEPCDeviceInfoWrapper'
+'sgx-epc': 'SgxEPCDeviceInfoWrapper',
+'hv-balloon': 'HvBalloonDeviceInfoWrapper'
}
  }
  


The organization of the series feels a bit awkward.

In this patch, you define QAPI types and add a bit of code reading them,
but the code creating them is left for later.

In the next patch, you define a QMP event, but the code sending it is
left for later.

In the final, huge patch, you fill in the blanks.

Adding definitions before their uses can be the least awkward solution.
But then the commit messages should point out that uses come later.
Describing these future uses briefly may be necessary to help the reader
understand the patch on its own.

Perhaps you can restructure the series instead.


Will make use of your suggestion in the other e-mail to refactor
it like this:

0. The driver with QMP stuff omitted / stubbed out

1. Enable query-memory-devices

2. Add HV_BALLOON_STATUS_REPORT event


Thanks,
Maciej

Re: [PATCH v6 5/6] qapi: Add HV_BALLOON_STATUS_REPORT event

2023-07-25 Thread Maciej S. Szmigiero


On 25.07.2023 10:04, Markus Armbruster wrote:

"Maciej S. Szmigiero"  writes:


From: "Maciej S. Szmigiero" 

Used by the hv-balloon driver for (optional) guest memory status reports.


Inhowfar optional? What enables / triggers it?


They are enabled by "status-report=on" device property, hence they don't
need to be enabled if unwanted.

As you have written below, each status report is generated by the guest
sending a DM_STATUS_REPORT message (which guests do periodically).



Use case for the event?


To monitor memory state in the guest, for example for some QEMU
auto-ballooning controller.


Could a status event make sense for other balloon drivers as well?


virtio-balloon has some guest memory stats support, too, but
with important differences, because in virtio-balloon:
1) Stats retrieval is driven by the QEMU process (essentially
polling the guest),

2) There's no notification mechanism for QEMU controller to know
that new stats have arrived from the guest,

3) The list of available individual stats is not constant,
rather it's an array of (TAG, VALUE) pairs.


Signed-off-by: Maciej S. Szmigiero 
---
  qapi/machine.json | 25 +
  1 file changed, 25 insertions(+)

diff --git a/qapi/machine.json b/qapi/machine.json
index 5ede977cf2bc..9649616b9ed2 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -1113,6 +1113,31 @@
  { 'event': 'BALLOON_CHANGE',
'data': { 'actual': 'int' } }
  
+##

+# @HV_BALLOON_STATUS_REPORT:
+#
+# Emitted when the hv-balloon driver receives a "STATUS" message from
+# the guest.


Aha, the event is triggered by the guest.  It must therefore be
rate-limited, just like BALLOON_CHANGE.  To do that, add it to
monitor_qapi_event_conf[] in monitor/monitor.c, and document it as noted
below.


Ack.


+#
+# @commited: the amount of memory in use inside the guest plus the amount
+#of the memory unusable inside the guest (ballooned out,
+#offline, etc.)
+#
+# @available: the amount of the memory inside the guest available for new
+# allocations ("free")


Spelling: committed.  Remember to update the example, too.


Ack.


Please format like

# @committed: the amount of memory in use inside the guest plus the
# amount of the memory unusable inside the guest (ballooned out,
# offline, etc.)
#
# @available: the amount of the memory inside the guest available for
# new allocations ("free")

to blend in with recent commit a937b6aa739 (qapi: Reformat doc comments
to conform to current conventions).


Ack.


+#


To document rate-limiting, add:

# Note: this event is rate-limited.
#


Ack.


+# Since: TBD
+#
+# Example:
+#
+# <- { "event": "HV_BALLOON_STATUS_REPORT",
+#  "data": { "commited": 81664, "available": 054464 },
+#  "timestamp": { "seconds": 1600295492, "microseconds": 661044 } }
+#
+##
+{ 'event': 'HV_BALLOON_STATUS_REPORT',
+  'data': { 'commited': 'size', 'available': 'size' } }
+
  ##
  # @MemoryInfo:
  #


An event is commonly paired with a query command, so that QMP clients
can resynchronize state after missing events, e.g. when reconnecting
after a client restart.

query-balloon isn't such a query: it returns less than the event.

If a paired query doesn't make sense, explain why.



Will add a query command that returns the last STATUS
event data.

Thanks,
Maciej

Re: [PATCH v6 0/6] Hyper-V Dynamic Memory Protocol driver (hv-balloon ️)

2023-07-25 Thread David Hildenbrand


That commit would be called something like "pc: Support hv-balloon".


If I remove the driver from Kconfig in the initial patch then AFAIK
this initial patch will add a dead driver file that it is not possible
to build yet, right?


Yes, that's also what we did for virtio-mem:

(bottom to top)

0ed48fd32e pc: Support for virtio-mem-pci
16647a8224 numa: Handle virtio-mem in NUMA stats
2e70874b16 hmp: Handle virtio-mem when printing memory device info
751c7bdd04 MAINTAINERS: Add myself as virtio-mem maintainer
0b9a2443a4 virtio-pci: Proxy for virtio-mem
910b25766b virtio-mem: Paravirtualized memory hot(un)plug

And virtio-pmem:

(bottom to top)

a0a49813f7 pc: Support for virtio-pmem-pci
cae02c3480 numa: Handle virtio-pmem in NUMA stats
d766b22bbd hmp: Handle virtio-pmem when printing memory device infos
adf0748a49 virtio-pci: Proxy for virtio-pmem
9f583bdd47 virtio-pmem: sync linux headers
5f503cd9f3 virtio-pmem: add virtio device


As you're adding all in a single series, that's perfectly fine.

--
Cheers,

David / dhildenb

Re: [PATCH v6 0/6] Hyper-V Dynamic Memory Protocol driver (hv-balloon ️)

2023-07-25 Thread Maciej S. Szmigiero


On 24.07.2023 16:42, David Hildenbrand wrote:

On 20.07.23 12:12, Maciej S. Szmigiero wrote:

From: "Maciej S. Szmigiero" 

This is a continuation of the v5 of the patch series located here:
https://lore.kernel.org/qemu-devel/cover.1686577753.git.maciej.szmigi...@oracle.com/



We're now in QEMU soft-freeze, which means the memslot series might take a bit 
to land. I'm going to follow-up on that soonish.


Ack, [1] even says that we're in a hard-freeze already.



Changes from v5:
* Incorporate David's rework of the driver on top of his virtio-mem-memslots
patches (specifically, commit 6769107d1a4f), making use of a memory region
container created upfront to avoid calling memory_device{,_pre}_plug()
functions from the driver and introducing a driver-specific MemoryDeviceInfo
sub-type.

* Include two additional David's memory-device patches necessary for the
aforementioned conversion in this patch set.

* Use multiple memslots to cover the hot-add memory backend in order to
reduce metadata size for the not-yet-hot-added part of the memory backend.

* Add David's "Co-developed-by:" to patches where he contributed some changes.

* Use OBJECT_DEFINE_TYPE_WITH_INTERFACES() and OBJECT_DECLARE_SIMPLE_TYPE()
macros instead of open-coding the equivalent functionality.

* Drop no longer necessary patch adding g_autoptr() cleanup function for the
Error type.


David Hildenbrand (2):
   memory-device: Support empty memory devices
   memory-device: Drop size alignment check

Maciej S. Szmigiero (4):
   Add Hyper-V Dynamic Memory Protocol definitions
   qapi: Add HvBalloonDeviceInfo sub-type to MemoryDeviceInfo
   qapi: Add HV_BALLOON_STATUS_REPORT event
   Add a Hyper-V Dynamic Memory Protocol driver (hv-balloon)


That is still a gigantic patch. Is there any way to split that into reasonable 
chunks? For example, move the whole hotplug/memslot part into
a dedicated patch?


Will move hot-add support from the initial driver patch to a separate one.


See below on splitting off the PC changes.



  Kconfig.host |    3 +
  hw/core/machine-hmp-cmds.c   |   15 +
  hw/hyperv/Kconfig    |    5 +
  hw/hyperv/hv-balloon.c   | 2246 ++
  hw/hyperv/meson.build    |    1 +
  hw/hyperv/trace-events   |   18 +
  hw/i386/pc.c |   22 +
  hw/mem/memory-device.c   |   45 +-
  include/hw/hyperv/dynmem-proto.h |  423 ++
  include/hw/hyperv/hv-balloon.h   |   18 +
  include/hw/mem/memory-device.h   |    7 +-
  meson.build  |   28 +-
  meson_options.txt    |    2 +
  qapi/machine.json    |   64 +-
  scripts/meson-buildoptions.sh    |    3 +


It's probably best to separate the actual device implementation from wiring up 
the machine. That is, have a HV_BALLOON_SUPPORTED kconfig
(like VIRTIO_MEM_SUPPORTED), and activate that in a single commit for
PC, where you also modify hw/i386/pc.c.

That commit would be called something like "pc: Support hv-balloon".


If I remove the driver from Kconfig in the initial patch then AFAIK
this initial patch will add a dead driver file that it is not possible
to build yet, right?

Or is there some configure-time override for lack of specific Kconfig option?

Thanks,
Maciej

[1]: https://wiki.qemu.org/Planning/8.1

[PATCH v2 0/6] python/machine: use socketpair() for console socket

2023-07-25 Thread John Snow

Like we did for the QMP socket, use socketpair() for the console socket
so that hopefully there isn't a race condition during early boot where
data might get dropped on the floor.

May or may not help with various race conditions where early console
output is not showing up in the logs and/or potentially being missed by
wait_for_console_pattern.

V2:
  - Fixed some Socket ownership/garbage collection problems
  - Fixed callers of now-dropped VM arguments/properties
  - added a dedicated sock_fd arg to ConsoleSocket()
  - now using socketpair() for qtest console, too.
  - dropped sock_dir arg from *all* machine.py classes
  - Tested quite a bit more thoroughly ...

CI: https://gitlab.com/jsnow/qemu/-/pipelines/945067498

John Snow (6):
  python/machine: move socket setup out of _base_args property
  python/machine: close sock_pair in cleanup path
  python/console_socket: accept existing FD in initializer
  python/machine: use socketpair() for console connections
  python/machine: use socketpair() for qtest connection
  python/machine: remove unused sock_dir argument

 python/qemu/machine/console_socket.py  | 29 ---
 python/qemu/machine/machine.py | 58 +-
 python/qemu/machine/qtest.py   | 54 +++-
 tests/avocado/acpi-bits.py |  5 +-
 tests/avocado/avocado_qemu/__init__.py |  2 +-
 tests/avocado/machine_aspeed.py|  5 +-
 tests/qemu-iotests/iotests.py  |  2 +-
 tests/qemu-iotests/tests/copy-before-write |  3 +-
 8 files changed, 104 insertions(+), 54 deletions(-)

-- 
2.41.0

[PATCH v2 4/6] python/machine: use socketpair() for console connections

2023-07-25 Thread John Snow

Create a socketpair for the console output. This should help eliminate
race conditions around console text early in the boot process that might
otherwise have been dropped on the floor before being able to connect to
QEMU under "server,nowait".

Signed-off-by: John Snow 
---
 python/qemu/machine/machine.py | 30 +++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py
index 26f0fb8a81..09f214c95c 100644
--- a/python/qemu/machine/machine.py
+++ b/python/qemu/machine/machine.py
@@ -159,6 +159,8 @@ def __init__(self,
 
 self._name = name or f"{id(self):x}"
 self._sock_pair: Optional[Tuple[socket.socket, socket.socket]] = None
+self._cons_sock_pair: Optional[
+Tuple[socket.socket, socket.socket]] = None
 self._temp_dir: Optional[str] = None
 self._base_temp_dir = base_temp_dir
 self._sock_dir = sock_dir
@@ -315,8 +317,9 @@ def _base_args(self) -> List[str]:
 for _ in range(self._console_index):
 args.extend(['-serial', 'null'])
 if self._console_set:
-chardev = ('socket,id=console,path=%s,server=on,wait=off' %
-   self._console_address)
+assert self._cons_sock_pair is not None
+fd = self._cons_sock_pair[0].fileno()
+chardev = f"socket,id=console,fd={fd}"
 args.extend(['-chardev', chardev])
 if self._console_device_type is None:
 args.extend(['-serial', 'chardev:console'])
@@ -351,6 +354,10 @@ def _pre_launch(self) -> None:
 nickname=self._name
 )
 
+if self._console_set:
+self._cons_sock_pair = socket.socketpair()
+os.set_inheritable(self._cons_sock_pair[0].fileno(), True)
+
 # NOTE: Make sure any opened resources are *definitely* freed in
 # _post_shutdown()!
 # pylint: disable=consider-using-with
@@ -368,6 +375,9 @@ def _pre_launch(self) -> None:
 def _post_launch(self) -> None:
 if self._sock_pair:
 self._sock_pair[0].close()
+if self._cons_sock_pair:
+self._cons_sock_pair[0].close()
+
 if self._qmp_connection:
 if self._sock_pair:
 self._qmp.connect()
@@ -518,6 +528,11 @@ def _early_cleanup(self) -> None:
 self._console_socket.close()
 self._console_socket = None
 
+if self._cons_sock_pair:
+self._cons_sock_pair[0].close()
+self._cons_sock_pair[1].close()
+self._cons_sock_pair = None
+
 def _hard_shutdown(self) -> None:
 """
 Perform early cleanup, kill the VM, and wait for it to terminate.
@@ -878,10 +893,19 @@ def console_socket(self) -> socket.socket:
 Returns a socket connected to the console
 """
 if self._console_socket is None:
+if not self._console_set:
+raise QEMUMachineError(
+"Attempt to access console socket with no connection")
+assert self._cons_sock_pair is not None
+# os.dup() is used here for sock_fd because otherwise we'd
+# have two rich python socket objects that would each try to
+# close the same underlying fd when either one gets garbage
+# collected.
 self._console_socket = console_socket.ConsoleSocket(
-self._console_address,
+sock_fd=os.dup(self._cons_sock_pair[1].fileno()),
 file=self._console_log_path,
 drain=self._drain_console)
+self._cons_sock_pair[1].close()
 return self._console_socket
 
 @property
-- 
2.41.0

[PATCH v2 6/6] python/machine: remove unused sock_dir argument

2023-07-25 Thread John Snow

By using a socketpair for all of the sockets managed by the VM class and
its extensions, we don't need the sock_dir argument anymore, so remove
it.

We only added this argument so that we could specify a second, shorter
temporary directory for cases where the temp/log dirs were "too long" as
a socket name on macOS. We don't need it for this class now. In one
case, avocado testing takes over responsibility for creating an
appropriate sockdir.

Signed-off-by: John Snow 
---
 python/qemu/machine/machine.py | 18 --
 python/qemu/machine/qtest.py   |  5 +
 tests/avocado/acpi-bits.py |  5 +
 tests/avocado/avocado_qemu/__init__.py |  2 +-
 tests/avocado/machine_aspeed.py|  5 -
 tests/qemu-iotests/iotests.py  |  2 +-
 tests/qemu-iotests/tests/copy-before-write |  3 +--
 7 files changed, 9 insertions(+), 31 deletions(-)

diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py
index 09f214c95c..1dd2de6da8 100644
--- a/python/qemu/machine/machine.py
+++ b/python/qemu/machine/machine.py
@@ -127,7 +127,6 @@ def __init__(self,
  name: Optional[str] = None,
  base_temp_dir: str = "/var/tmp",
  monitor_address: Optional[SocketAddrT] = None,
- sock_dir: Optional[str] = None,
  drain_console: bool = False,
  console_log: Optional[str] = None,
  log_dir: Optional[str] = None,
@@ -141,7 +140,6 @@ def __init__(self,
 @param name: prefix for socket and log file names (default: qemu-PID)
 @param base_temp_dir: default location where temp files are created
 @param monitor_address: address for QMP monitor
-@param sock_dir: where to create socket (defaults to base_temp_dir)
 @param drain_console: (optional) True to drain console socket to buffer
 @param console_log: (optional) path to console log file
 @param log_dir: where to create and keep log files
@@ -163,7 +161,6 @@ def __init__(self,
 Tuple[socket.socket, socket.socket]] = None
 self._temp_dir: Optional[str] = None
 self._base_temp_dir = base_temp_dir
-self._sock_dir = sock_dir
 self._log_dir = log_dir
 
 self._monitor_address = monitor_address
@@ -189,9 +186,6 @@ def __init__(self,
 self._console_index = 0
 self._console_set = False
 self._console_device_type: Optional[str] = None
-self._console_address = os.path.join(
-self.sock_dir, f"{self._name}.con"
-)
 self._console_socket: Optional[socket.socket] = None
 self._remove_files: List[str] = []
 self._user_killed = False
@@ -334,9 +328,6 @@ def args(self) -> List[str]:
 return self._args
 
 def _pre_launch(self) -> None:
-if self._console_set:
-self._remove_files.append(self._console_address)
-
 if self._qmp_set:
 if self._monitor_address is None:
 self._sock_pair = socket.socketpair()
@@ -918,15 +909,6 @@ def temp_dir(self) -> str:
   dir=self._base_temp_dir)
 return self._temp_dir
 
-@property
-def sock_dir(self) -> str:
-"""
-Returns the directory used for sockfiles by this machine.
-"""
-if self._sock_dir:
-return self._sock_dir
-return self.temp_dir
-
 @property
 def log_dir(self) -> str:
 """
diff --git a/python/qemu/machine/qtest.py b/python/qemu/machine/qtest.py
index 8180d3ab01..4f5ede85b2 100644
--- a/python/qemu/machine/qtest.py
+++ b/python/qemu/machine/qtest.py
@@ -135,17 +135,14 @@ def __init__(self,
  wrapper: Sequence[str] = (),
  name: Optional[str] = None,
  base_temp_dir: str = "/var/tmp",
- sock_dir: Optional[str] = None,
  qmp_timer: Optional[float] = None):
 # pylint: disable=too-many-arguments
 
 if name is None:
 name = "qemu-%d" % os.getpid()
-if sock_dir is None:
-sock_dir = base_temp_dir
 super().__init__(binary, args, wrapper=wrapper, name=name,
  base_temp_dir=base_temp_dir,
- sock_dir=sock_dir, qmp_timer=qmp_timer)
+ qmp_timer=qmp_timer)
 self._qtest: Optional[QEMUQtestProtocol] = None
 self._qtest_sock_pair: Optional[
 Tuple[socket.socket, socket.socket]] = None
diff --git a/tests/avocado/acpi-bits.py b/tests/avocado/acpi-bits.py
index 3ed286dcbd..bc2b29671e 100644
--- a/tests/avocado/acpi-bits.py
+++ b/tests/avocado/acpi-bits.py
@@ -92,17 +92,14 @@ def __init__(self,
  base_temp_dir: str = "/var/tmp",
  debugcon_log: str = "debugcon-log.txt",
  debugcon_addr: str = "0x403",
- sock_dir: Optional[str]

[PATCH v2 2/6] python/machine: close sock_pair in cleanup path

2023-07-25 Thread John Snow

If everything has gone smoothly, we'll already have closed the socket we
gave to the child during post_launch. The other half of the pair that we
gave to the QMP connection should, likewise, be definitively closed by
now.

However, in the cleanup path, it's possible we've created the socketpair
but flubbed the launch and need to clean up resources. These resources
*would* be handled by the garbage collector, but that can happen at
unpredictable times. Nicer to just clean them up synchronously on the
exit path, here.

Signed-off-by: John Snow 
---
 python/qemu/machine/machine.py | 5 +
 1 file changed, 5 insertions(+)

diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py
index 8be0f684fe..26f0fb8a81 100644
--- a/python/qemu/machine/machine.py
+++ b/python/qemu/machine/machine.py
@@ -395,6 +395,11 @@ def _post_shutdown(self) -> None:
 finally:
 assert self._qmp_connection is None
 
+if self._sock_pair:
+self._sock_pair[0].close()
+self._sock_pair[1].close()
+self._sock_pair = None
+
 self._close_qemu_log_file()
 
 self._load_io_log()
-- 
2.41.0

[PATCH v2 1/6] python/machine: move socket setup out of _base_args property

2023-07-25 Thread John Snow

This property isn't meant to do much else besides return a list of
strings, so move this setup back out into _pre_launch().

Signed-off-by: John Snow 
---
 python/qemu/machine/machine.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py
index c16a0b6fed..8be0f684fe 100644
--- a/python/qemu/machine/machine.py
+++ b/python/qemu/machine/machine.py
@@ -300,9 +300,7 @@ def _base_args(self) -> List[str]:
 
 if self._qmp_set:
 if self._sock_pair:
-fd = self._sock_pair[0].fileno()
-os.set_inheritable(fd, True)
-moncdev = f"socket,id=mon,fd={fd}"
+moncdev = f"socket,id=mon,fd={self._sock_pair[0].fileno()}"
 elif isinstance(self._monitor_address, tuple):
 moncdev = "socket,id=mon,host={},port={}".format(
 *self._monitor_address
@@ -339,6 +337,7 @@ def _pre_launch(self) -> None:
 if self._qmp_set:
 if self._monitor_address is None:
 self._sock_pair = socket.socketpair()
+os.set_inheritable(self._sock_pair[0].fileno(), True)
 sock = self._sock_pair[1]
 if isinstance(self._monitor_address, str):
 self._remove_files.append(self._monitor_address)
-- 
2.41.0

[PATCH v2 5/6] python/machine: use socketpair() for qtest connection

2023-07-25 Thread John Snow

Like the QMP and console sockets, begin using socketpairs for the qtest
connection, too. After this patch, we'll be able to remove the vestigial
sock_dir argument, but that cleanup is best done in its own patch.

Signed-off-by: John Snow 
---
 python/qemu/machine/qtest.py | 49 +---
 1 file changed, 40 insertions(+), 9 deletions(-)

diff --git a/python/qemu/machine/qtest.py b/python/qemu/machine/qtest.py
index 1c46138bd0..8180d3ab01 100644
--- a/python/qemu/machine/qtest.py
+++ b/python/qemu/machine/qtest.py
@@ -24,6 +24,7 @@
 Optional,
 Sequence,
 TextIO,
+Tuple,
 )
 
 from qemu.qmp import SocketAddrT
@@ -38,23 +39,41 @@ class QEMUQtestProtocol:
 :param address: QEMU address, can be either a unix socket path (string)
 or a tuple in the form ( address, port ) for a TCP
 connection
-:param server: server mode, listens on the socket (bool)
+:param sock: An existing socket can be provided as an alternative to
+ an address. One of address or sock must be provided.
+:param server: server mode, listens on the socket. Only meaningful
+   in conjunction with an address and not an existing
+   socket.
+
 :raise socket.error: on socket connection errors
 
 .. note::
No connection is established by __init__(), this is done
by the connect() or accept() methods.
 """
-def __init__(self, address: SocketAddrT,
+def __init__(self,
+ address: Optional[SocketAddrT] = None,
+ sock: Optional[socket.socket] = None,
  server: bool = False):
+if address is None and sock is None:
+raise ValueError("Either 'address' or 'sock' must be specified")
+if address is not None and sock is not None:
+raise ValueError(
+"Either 'address' or 'sock' must be specified, but not both")
+if sock is not None and server:
+raise ValueError("server=True is meaningless when passing socket")
+
 self._address = address
-self._sock = self._get_sock()
+self._sock = sock or self._get_sock()
 self._sockfile: Optional[TextIO] = None
+
 if server:
+assert self._address is not None
 self._sock.bind(self._address)
 self._sock.listen(1)
 
 def _get_sock(self) -> socket.socket:
+assert self._address is not None
 if isinstance(self._address, tuple):
 family = socket.AF_INET
 else:
@@ -67,7 +86,8 @@ def connect(self) -> None:
 
 @raise socket.error on socket connection errors
 """
-self._sock.connect(self._address)
+if self._address is not None:
+self._sock.connect(self._address)
 self._sockfile = self._sock.makefile(mode='r')
 
 def accept(self) -> None:
@@ -127,29 +147,40 @@ def __init__(self,
  base_temp_dir=base_temp_dir,
  sock_dir=sock_dir, qmp_timer=qmp_timer)
 self._qtest: Optional[QEMUQtestProtocol] = None
-self._qtest_path = os.path.join(sock_dir, name + "-qtest.sock")
+self._qtest_sock_pair: Optional[
+Tuple[socket.socket, socket.socket]] = None
 
 @property
 def _base_args(self) -> List[str]:
 args = super()._base_args
+assert self._qtest_sock_pair is not None
+fd = self._qtest_sock_pair[0].fileno()
 args.extend([
-'-qtest', f"unix:path={self._qtest_path}",
+'-chardev', f"socket,id=qtest,fd={fd}",
+'-qtest', 'chardev:qtest',
 '-accel', 'qtest'
 ])
 return args
 
 def _pre_launch(self) -> None:
+self._qtest_sock_pair = socket.socketpair()
+os.set_inheritable(self._qtest_sock_pair[0].fileno(), True)
 super()._pre_launch()
-self._qtest = QEMUQtestProtocol(self._qtest_path, server=True)
+self._qtest = QEMUQtestProtocol(sock=self._qtest_sock_pair[1])
 
 def _post_launch(self) -> None:
 assert self._qtest is not None
 super()._post_launch()
-self._qtest.accept()
+if self._qtest_sock_pair:
+self._qtest_sock_pair[0].close()
+self._qtest.connect()
 
 def _post_shutdown(self) -> None:
+if self._qtest_sock_pair:
+self._qtest_sock_pair[0].close()
+self._qtest_sock_pair[1].close()
+self._qtest_sock_pair = None
 super()._post_shutdown()
-self._remove_if_exists(self._qtest_path)
 
 def qtest(self, cmd: str) -> str:
 """
-- 
2.41.0

[PATCH v2 3/6] python/console_socket: accept existing FD in initializer

2023-07-25 Thread John Snow

Useful if we want to use ConsoleSocket() for a socket created by
socketpair().

Signed-off-by: John Snow 
---
 python/qemu/machine/console_socket.py | 29 +++
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/python/qemu/machine/console_socket.py 
b/python/qemu/machine/console_socket.py
index 4e28ba9bb2..0a4e09ffc7 100644
--- a/python/qemu/machine/console_socket.py
+++ b/python/qemu/machine/console_socket.py
@@ -24,19 +24,32 @@ class ConsoleSocket(socket.socket):
 """
 ConsoleSocket represents a socket attached to a char device.
 
-Optionally (if drain==True), drains the socket and places the bytes
-into an in memory buffer for later processing.
-
-Optionally a file path can be passed in and we will also
-dump the characters to this file for debugging purposes.
+:param address: An AF_UNIX path or address.
+:param sock_fd: Optionally, an existing socket file descriptor.
+One of address or sock_fd must be specified.
+:param file: Optionally, a filename to log to.
+:param drain: Optionally, drains the socket and places the bytes
+  into an in memory buffer for later processing.
 """
-def __init__(self, address: str, file: Optional[str] = None,
+def __init__(self,
+ address: Optional[str] = None,
+ sock_fd: Optional[int] = None,
+ file: Optional[str] = None,
  drain: bool = False):
+if address is None and sock_fd is None:
+raise ValueError("one of 'address' or 'sock_fd' must be specified")
+if address is not None and sock_fd is not None:
+raise ValueError("can't specify both 'address' and 'sock_fd'")
+
 self._recv_timeout_sec = 300.0
 self._sleep_time = 0.5
 self._buffer: Deque[int] = deque()
-socket.socket.__init__(self, socket.AF_UNIX, socket.SOCK_STREAM)
-self.connect(address)
+if address is not None:
+socket.socket.__init__(self, socket.AF_UNIX, socket.SOCK_STREAM)
+self.connect(address)
+else:
+assert sock_fd is not None
+socket.socket.__init__(self, fileno=sock_fd)
 self._logfile = None
 if file:
 # pylint: disable=consider-using-with
-- 
2.41.0

[PATCH] softmmu/vl: improve select_machine() function

2023-07-25 Thread Vladimir Sementsov-Ogievskiy

 - put machine name into error message (helps debugging CI)
 - fix style (over-80 lines)
 - use g_autoptr
 - drop extra error propagation

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 softmmu/vl.c | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/softmmu/vl.c b/softmmu/vl.c
index b0b96f67fa..77fe9e52ea 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -1596,27 +1596,25 @@ static const QEMUOption *lookup_opt(int argc, char 
**argv,
 static MachineClass *select_machine(QDict *qdict, Error **errp)
 {
 const char *optarg = qdict_get_try_str(qdict, "type");
-GSList *machines = object_class_get_list(TYPE_MACHINE, false);
+g_autoptr(GSList) machines = object_class_get_list(TYPE_MACHINE, false);
 MachineClass *machine_class;
-Error *local_err = NULL;
 
 if (optarg) {
 machine_class = find_machine(optarg, machines);
-qdict_del(qdict, "type");
 if (!machine_class) {
-error_setg(_err, "unsupported machine type");
+error_setg(errp, "unsupported machine type: \"%s\"", optarg);
 }
+qdict_del(qdict, "type");
 } else {
 machine_class = find_default_machine(machines);
 if (!machine_class) {
-error_setg(_err, "No machine specified, and there is no 
default");
+error_setg(errp, "No machine specified, and there is no default");
 }
 }
 
-g_slist_free(machines);
-if (local_err) {
-error_append_hint(_err, "Use -machine help to list supported 
machines\n");
-error_propagate(errp, local_err);
+if (!machine_class) {
+error_append_hint(errp,
+  "Use -machine help to list supported machines\n");
 }
 return machine_class;
 }
-- 
2.34.1

[PATCH v4] block-jobs: flush target at the end of .run()

2023-07-25 Thread Vladimir Sementsov-Ogievskiy

From: Vladimir Sementsov-Ogievskiy 

Actually block job is not completed without this final flush. It's
rather unexpected to have broken target when job was successfully
completed long ago and now we fail to flush or process just
crashed/killed.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/backup.c   |  7 +--
 block/commit.c   |  2 +-
 block/mirror.c   |  4 
 block/stream.c   |  7 ++-
 blockjob.c   | 18 ++
 include/block/blockjob_int.h | 11 +++
 6 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index db3791f4d1..b9ff63359a 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -295,10 +295,13 @@ static int coroutine_fn backup_run(Job *job, Error **errp)
 job_yield(job);
 }
 } else {
-return backup_loop(s);
+ret = backup_loop(s);
+if (ret < 0) {
+return ret;
+}
 }
 
-return 0;
+return block_job_final_target_flush(>common, s->target_bs);
 }
 
 static void coroutine_fn backup_pause(Job *job)
diff --git a/block/commit.c b/block/commit.c
index aa45beb0f0..15df96b4f3 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -187,7 +187,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
 }
 }
 
-return 0;
+return block_job_final_target_flush(>common, blk_bs(s->base));
 }
 
 static const BlockJobDriver commit_job_driver = {
diff --git a/block/mirror.c b/block/mirror.c
index d3cacd1708..cd19b49f7f 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1143,6 +1143,10 @@ immediate_exit:
 g_free(s->in_flight_bitmap);
 bdrv_dirty_iter_free(s->dbi);
 
+if (ret >= 0) {
+ret = block_job_final_target_flush(>common, blk_bs(s->target));
+}
+
 if (need_drain) {
 s->in_drain = true;
 bdrv_drained_begin(bs);
diff --git a/block/stream.c b/block/stream.c
index e522bbdec5..f7e8b35e94 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -131,6 +131,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
 int64_t len;
 int64_t offset = 0;
+int ret;
 int error = 0;
 int64_t n = 0; /* bytes */
 
@@ -149,7 +150,6 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 
 for ( ; offset < len; offset += n) {
 bool copy;
-int ret;
 
 /* Note that even when no rate limit is applied we need to yield
  * with no pending I/O here so that bdrv_drain_all() returns.
@@ -207,6 +207,11 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 }
 }
 
+ret = block_job_final_target_flush(>common, s->target_bs);
+if (error == 0) {
+error = ret;
+}
+
 /* Do not remove the backing file if an error was there but ignored. */
 return error;
 }
diff --git a/blockjob.c b/blockjob.c
index 25fe8e625d..313e586b0d 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -611,3 +611,21 @@ AioContext *block_job_get_aio_context(BlockJob *job)
 GLOBAL_STATE_CODE();
 return job->job.aio_context;
 }
+
+int coroutine_fn
+block_job_final_target_flush(BlockJob *job, BlockDriverState *target_bs)
+{
+int ret;
+
+WITH_GRAPH_RDLOCK_GUARD() {
+ret = bdrv_co_flush(target_bs);
+}
+
+if (ret < 0 && !block_job_is_internal(job)) {
+qapi_event_send_block_job_error(job->job.id,
+IO_OPERATION_TYPE_WRITE,
+BLOCK_ERROR_ACTION_REPORT);
+}
+
+return ret;
+}
diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
index 104824040c..617e40b916 100644
--- a/include/block/blockjob_int.h
+++ b/include/block/blockjob_int.h
@@ -152,4 +152,15 @@ void block_job_ratelimit_sleep(BlockJob *job);
 BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
 int is_read, int error);
 
+/**
+ * block_job_final_target_flush:
+ * @job: The job to signal an error for if flush failed.
+ * @target_bs: The bs to flush.
+ *
+ * The function is intended to be called at the end of .run() for any data
+ * copying job.
+ */
+int coroutine_fn
+block_job_final_target_flush(BlockJob *job, BlockDriverState *target_bs);
+
 #endif
-- 
2.34.1

avocado test failing INTERRUPTED for "Missing asset"

2023-07-25 Thread Peter Maydell

Currently this CI job is failing:

https://gitlab.com/qemu-project/qemu/-/jobs/4737819946

because:

(05/59) 
tests/avocado/boot_linux_console.py:BootLinuxConsole.test_arm_exynos4210_initrd:
INTERRUPTED: Missing asset
https://snapshot.debian.org/archive/debian/20190928T224601Z/pool/main/l/linux/linux-image-4.19.0-6-armmp_4.19.67-2+deb10u1_armhf.deb\nRunner
error occurred: Timeout reached\nOriginal status: CANCEL\n{'name':
'05-tests/avocado/boot_linux_console... (90.67 s)

Why is a "Missing asset" causing a timeout after 90 seconds,
rather than being accounted as a "SKIP" ("missing requirements
in the test environment" sounds like what we have here) ?

I don't understand the debug.log, because it says all of
 * that it retrieved the URL
 * that it wanted to cancel the test
 * that the test timed out

Here it is:

16:03:16 DEBUG| PARAMS (key=arch, path=*, default=arm) => 'arm'
16:03:16 DEBUG| PARAMS (key=cpu, path=*, default=None) => None
16:03:16 DEBUG| PARAMS (key=qemu_bin, path=*,
default=./qemu-system-arm) => './qemu-system-arm'
16:03:16 DEBUG| PARAMS (key=machine, path=*, default=smdkc210) => 'smdkc210'
16:03:16 INFO | Asset not in cache, fetching it.
16:03:16 INFO | Fetching
https://snapshot.debian.org/archive/debian/20190928T224601Z/pool/main/l/linux/linux-image-4.19.0-6-armmp_4.19.67-2+deb10u1_armhf.deb
-> 
/builds/qemu-project/qemu/avocado-cache/by_location/5f20376efeb69c8898caaff3edf7de45b4540163/linux-image-4.19.0-6-armmp_4.19.67-2+deb10u1_armhf.deb.ooffovd_
16:04:05 DEBUG| Retrieved URL
"https://snapshot.debian.org/archive/debian/20190928T224601Z/pool/main/l/linux/linux-image-4.19.0-6-armmp_4.19.67-2+deb10u1_armhf.deb":
content-length 33882084, date: "Tue, 25 Jul 2023 16:03:16 GMT",
last-modified: "Tue, 24 Sep 2019 22:31:23 GMT"
16:04:46 ERROR| RuntimeError: Test interrupted by SIGTERM
16:04:46 ERROR|
16:04:46 ERROR| Reproduced traceback from:
/builds/qemu-project/qemu/build/tests/venv/lib/python3.9/site-packages/avocado/core/test.py:767
16:04:46 ERROR| Traceback (most recent call last):
16:04:46 ERROR|   File
"/builds/qemu-project/qemu/build/tests/venv/lib/python3.9/site-packages/avocado/core/test.py",
line 1043, in fetch_asset
16:04:46 ERROR| return asset_obj.fetch()
16:04:46 ERROR|   File
"/builds/qemu-project/qemu/build/tests/venv/lib/python3.9/site-packages/avocado/utils/asset.py",
line 381, in fetch
16:04:46 ERROR| raise OSError("Failed to fetch %s (%s)." %
(self.asset_name, error))
16:04:46 ERROR| OSError: Failed to fetch
linux-image-4.19.0-6-armmp_4.19.67-2+deb10u1_armhf.deb (Test
interrupted by SIGTERM).
16:04:46 ERROR|
16:04:46 ERROR| During handling of the above exception, another
exception occurred:
16:04:46 ERROR|
16:04:46 ERROR| Traceback (most recent call last):
16:04:46 ERROR|   File
"/builds/qemu-project/qemu/build/tests/avocado/boot_linux_console.py",
line 514, in test_arm_exynos4210_initrd
16:04:46 ERROR| deb_path = self.fetch_asset(deb_url, asset_hash=deb_hash)
16:04:46 ERROR|   File
"/builds/qemu-project/qemu/build/tests/avocado/avocado_qemu/__init__.py",
line 260, in fetch_asset
16:04:46 ERROR| return super().fetch_asset(name,
16:04:46 ERROR|   File
"/builds/qemu-project/qemu/build/tests/venv/lib/python3.9/site-packages/avocado/core/test.py",
line 1049, in fetch_asset
16:04:46 ERROR| self.cancel("Missing asset {}".format(name))
16:04:46 ERROR|   File
"/builds/qemu-project/qemu/build/tests/venv/lib/python3.9/site-packages/avocado/core/test.py",
line 988, in cancel
16:04:46 ERROR| raise exceptions.TestCancel(message)
16:04:46 ERROR| avocado.core.exceptions.TestCancel: Missing asset
https://snapshot.debian.org/archive/debian/20190928T224601Z/pool/main/l/linux/linux-image-4.19.0-6-armmp_4.19.67-2+deb10u1_armhf.deb
16:04:46 ERROR|
16:04:46 ERROR| CANCEL
05-tests/avocado/boot_linux_console.py:BootLinuxConsole.test_arm_exynos4210_initrd
-> TestCancel: Missing asset
https://snapshot.debian.org/archive/debian/20190928T224601Z/pool/main/l/linux/linux-image-4.19.0-6-armmp_4.19.67-2+deb10u1_armhf.deb
16:04:46 INFO |

Runner error occurred: Timeout reached
Original status: CANCEL
{'name': 
'05-tests/avocado/boot_linux_console.py:BootLinuxConsole.test_arm_exynos4210_initrd',
'logdir': 
'/builds/qemu-project/qemu/build/tests/results/job-2023-07-25T16.00-c6ec778/test-results/05-tests_avocado_boot_linux_console.py_BootLinuxConsole.test_arm_exynos4210_initrd',
'logfile': 
'/builds/qemu-project/qemu/build/tests/results/job-2023-07-25T16.00-c6ec778/test-results/05-tests_avocado_boot_linux_console.py_BootLinuxConsole.test_arm_exynos4210_initrd/debug.log',
'status': 'CANCEL', 'running': False, 'paused': False, 'time_start':
1690300996.270224, 'time_elapsed': 90.66501116752625, 'time_end':
1690301086.9352353, 'fail_reason': 'Missing asset
https://snapshot.debian.org/archive/debian/20190928T224601Z/pool/main/l/linux/linux-image-4.19.0-6-armmp_4.19.67-2+deb10u1_armhf.deb',
'fail_class': 'TestCancel', 'traceback': 'Traceback (most recent call
last):\n  File

Re: [PATCH] migration: Allow user to specify migration available bandwidth

2023-07-25 Thread Daniel P . Berrangé

On Tue, Jul 25, 2023 at 12:38:23PM -0400, Peter Xu wrote:
> I see you used "convergance" explicitly even after PeterM's reply, is that
> what you prefer over "convergence"?  I do see more occurances of
> "convergence" as a word in migration context, though.

Ignore my speling erors :-)

>   Any better name you
> can come up with, before I just go with "max-convergence-bandwidth" (I
> really cannot come up with anything better than this or available-bandwidth
> for now)?

Anothre idea could be 'max-switchover-bandwidth'  ?


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH] migrate/multifd: fix coredump when the multifd thread cleanup

2023-07-25 Thread Peter Xu

On Tue, Jul 25, 2023 at 04:43:28PM +0800, chenyuhui (A) wrote:
> @Peter Xu @Fabiano Rosas
> Kindly ping on this.

Ah I see what's missing - please copy maintainer (Juan) for any migration
patches, especially multifd ones..  I'm doing that for this one, but I'd
suggest you repost with a whole patch and information put into commit msg.

Thanks.

> 
> On 2023/6/27 9:11, chenyuhui (A) wrote:
> > 
> > On 2023/6/26 21:16, chenyuhui (A) wrote:
> >>
> >> On 2023/6/21 22:22, Fabiano Rosas wrote:
> >>> Jianguo Zhang via  writes:
> >>>
>  From: Yuhui Chen 
> 
>  There is a coredump while trying to destroy mutex when
>  p->running is false but p->mutex is not unlock.
>  Make sure all mutexes has been released before destroy them.
> 
>  Signed-off-by: Yuhui Chen 
>  ---
>   migration/multifd.c | 6 ++
>   1 file changed, 2 insertions(+), 4 deletions(-)
> 
>  diff --git a/migration/multifd.c b/migration/multifd.c
>  index b7ad7002e0..7dcdb2d3a0 100644
>  --- a/migration/multifd.c
>  +++ b/migration/multifd.c
>  @@ -523,9 +523,7 @@ void multifd_save_cleanup(void)
>   for (i = 0; i < migrate_multifd_channels(); i++) {
>   MultiFDSendParams *p = _send_state->params[i];
>   
>  -if (p->running) {
> >>>
> >>> The need for this flag is dubious IMO. Commit 10351fbad1
> >>> ("migration/multifd: Join all multifd threads in order to avoid leaks")
> >>> already moved the other join outside of it. If we figure out another way
> >>> to deal with the sem_sync lockup we could probably remove this
> >>> altogether.
> >>
> >>
> >> I've seen this commit 10351fbad1, and it's seems to have the same
> >> problem in function multifd_save_cleanup.
> >>
> >> So that may my patch only need to modify multifd_save_cleanup.
> >>
> >> __
> >>
> >>
> >> On 2023/6/21 21:24, Peter Xu wrote:
> >>> On Wed, Jun 21, 2023 at 04:18:26PM +0800, Jianguo Zhang via wrote:
>  From: Yuhui Chen
> 
>  There is a coredump while trying to destroy mutex when
>  p->running is false but p->mutex is not unlock.
>  Make sure all mutexes has been released before destroy them.
> >>>
> >>> It'll be nice to add a backtrace of the coredump here, and also copy
> >>> maintainer (Juan Quintela, copied now).
> >>>
> >>
> >> The following is coredump, and my code is base on
> >> https://github.com/qemu/qemu.git tag v6.2.0.
> >>
> > (gdb) bt
> > #0  0xabe3b2b8 in  () at /usr/lib64/libc.so.6
> > #1  0xabdf6d7c in raise () at /usr/lib64/libc.so.6
> > #2  0xabde4d2c in abort () at /usr/lib64/libc.so.6
> > #3  0xc67fcc10 in error_exit (err=, 
> > msg=msg@entry=0xc6dc52b8 <__func__.33> "qemu_mutex_destroy") at 
> > ../util/qemu-thread-posix.c:38
> > #4  0xc67fce38 in qemu_mutex_destroy 
> > (mutex=mutex@entry=0xfa1a4250) at ../util/qemu-thread-posix.c:71
> > #5  0xc6055688 in multifd_save_cleanup () at 
> > ../migration/multifd.c:555
> > #6  0xc6050198 in migrate_fd_cleanup (s=s@entry=0xf7518800) at 
> > ../migration/migration.c:1808
> > #7  0xc6050384 in migrate_fd_cleanup_bh (opaque=0xf7518800) at 
> > ../migration/migration.c:1850
> > #8  0xc680d790 in aio_bh_call (bh=0xa0004c40) at 
> > ../util/async.c:141
> > #9  aio_bh_poll (ctx=ctx@entry=0xf73285a0) at ../util/async.c:169
> > #10 0xc67f9e18 in aio_dispatch (ctx=0xf73285a0) at 
> > ../util/aio-posix.c:381
> > #11 0xc680d414 in aio_ctx_dispatch (source=, 
> > callback=, user_data=) at ../util/async.c:311
> > #12 0xac44cf88 in g_main_context_dispatch () at 
> > /usr/lib64/libglib-2.0.so.0
> > #13 0xc6819214 in glib_pollfds_poll () at ../util/main-loop.c:232
> > #14 os_host_main_loop_wait (timeout=73500) at ../util/main-loop.c:255
> > #15 main_loop_wait (nonblocking=nonblocking@entry=0) at 
> > ../util/main-loop.c:531
> > #16 0xc65005cc in qemu_main_loop () at ../softmmu/runstate.c:726
> > #17 0xc5fe2030 in main (argc=, argv=, 
> > envp=) at ../softmmu/main.c:50
> > (gdb) q
> > 
> >> How reproducible:
> >> 1、And sleep time to produce p->running is false but p->mutex is
> >>  not unlock.(apply following patch)
> >> 2、Do migration with --parallel-connections.
>  From: Yuhui Chen 
> >> Date: Mon, 26 Jun 2023 14:24:35 +0800
> >> Subject: [DEBUG][PATCH] And sleep time to produce p->running is false but 
> >> p->mutex is
> >>  not unlock.
> >>
> >> ---
> >>  migration/multifd.c | 2 ++
> >>  1 file changed, 2 insertions(+)
> >>
> >> diff --git a/migration/multifd.c b/migration/multifd.c
> >> index 7c9deb1921..09a7b0748a 100644
> >> --- a/migration/multifd.c
> >> +++ b/migration/multifd.c
> >> @@ -538,6 +538,7 @@ void multifd_save_cleanup(void)
> >>  for (i = 0; i < migrate_multifd_channels(); i++) {
> >>  MultiFDSendParams *p = _send_state->params[i];
> >>
> >> +sleep(2);
> >>

Re: [PATCH 01/10] hw/arm/virt-acpi-build.c: Move fw_cfg and virtio to common location

2023-07-25 Thread Sunil V L

On Mon, Jul 24, 2023 at 05:18:59PM +0200, Igor Mammedov wrote:
> On Wed, 12 Jul 2023 22:09:34 +0530
> Sunil V L  wrote:
> 
> > The functions which add fw_cfg and virtio to DSDT are same for ARM
> > and RISC-V. So, instead of duplicating in RISC-V, move them from
> > hw/arm/virt-acpi-build.c to common aml-build.c.
> > 
> > Signed-off-by: Sunil V L 
> > ---
> >  hw/acpi/aml-build.c | 41 
> >  hw/arm/virt-acpi-build.c| 42 -
> >  hw/riscv/virt-acpi-build.c  | 16 --
> >  include/hw/acpi/aml-build.h |  6 ++
> >  4 files changed, 47 insertions(+), 58 deletions(-)
> > 
> > diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
> 
> patch looks fine modulo,
> I'd put these into respective device files instead of generic
> aml-build.c which was intended for basic AML primitives
> (it 's got polluted over time with device specific functions
> but that's not the reason to continue doing that).
> 
> Also having those functions along with devices models
> goes along with self enumerating ACPI devices (currently
> it works for x86 PCI/ISA device but there is no reason
> that it can't work with other types as well when
> I get there)
> 
Thanks!, Igor. Let me add them to device specific files as per your
recommendation.

Thanks!
Sunil

Re: [Qemu RFC 0/7] Early enabling of DCD emulation in Qemu

2023-07-25 Thread Fan Ni

On Tue, Jul 25, 2023 at 08:18:08AM -0700, Ira Weiny wrote:

> Fan Ni wrote:
> > On Thu, May 11, 2023 at 05:56:40PM +, Fan Ni wrote:
> > 
> > FYI.
> > 
> > I have updated the patch series and sent out again.
> > 
> > I suggested anyone who are interested in DCD and using this patch series to
> > use the new series. Quite a few things has been fixed.
> > 
> > https://urldefense.com/v3/__https://lore.kernel.org/linux-cxl/20230724162313.34196-1-fan...@samsung.com/T/*t__;Iw!!EwVzqGoTKBqv-0DWAJBm!V8kDTpT5yLUAyWm3sFm7XIgN0QdNUyQYZd9vYLHjUVkMkhDT14F8avgNBh23KPAtsS_dGm2LZuHJ102mgIg$
> >  
> > 
> > Also, if you want to use the code repo directly, you can try
> > 
> > https://urldefense.com/v3/__https://protect2.fireeye.com/v1/url?k=0600fd6b-678be820-06017624-74fe485fb305-f529279062b02b73=1=5df65010-e62f-40a1-9a21-609eb1400921=https*3A*2F*2Fgithub.com*2Fmoking*2Fqemu-dcd-preview-latest*2Ftree*2Fdcd-dev__;JSUlJSUlJQ!!EwVzqGoTKBqv-0DWAJBm!V8kDTpT5yLUAyWm3sFm7XIgN0QdNUyQYZd9vYLHjUVkMkhDT14F8avgNBh23KPAtsS_dGm2LZuHJpB0SmNs$
> >  
> 
> Thanks for the branch!
> 
> I took a quick look and I don't see a resolution to the problem I
> mentioned with non DCD devices being supported.[1]
> 
> [1] 
> https://urldefense.com/v3/__https://lore.kernel.org/all/6483946e8152f_f1132294a2@iweiny-mobl.notmuch/__;!!EwVzqGoTKBqv-0DWAJBm!V8kDTpT5yLUAyWm3sFm7XIgN0QdNUyQYZd9vYLHjUVkMkhDT14F8avgNBh23KPAtsS_dGm2LZuHJ4geSEAg$
>  
> 
> Did you fix this in a different way?  If I don't add DC to my mem devices they
> don't get probed properly.  I'm still looking into this with your new branch,
> but I don't think DC commands should be in the CEL if the device does not
> support it.
> 
> Also I get a build warning on this branch I had to fix[3] as my build is
> treating warnings as errors.[2]
> 
> I don't think this fix is technically necessary as 'list' should never be NULL
> that I can see.  But might be nice to check or just use my fix.
> 
> I'll try and get to a review once I get the DCD stuff out on the list again.
> 
> Ira
> 

Oh, I missed your previous comments, let me look into it and fix
accordingly and send out a new version.

Btw, when I did the DCD test with the last DCD kernel code, I found
some issue there.

When I add a DCD extent for the first time, it will be recognized as
system RAM automatically and show up with command lsmem.

However, when I release it and try to re-add the same extent again.
The adding seems normal and the device will show up under /dev/ as
dax0.X. But it will not show up with lsmem command and I have to use
daxctl reconfigure command to turn it to system ram and then it can
show up with lsmem command. I would expect the behavior for the
first add and second add be the same.

Fan.


> 
> [2]
> ../hw/mem/cxl_type3.c: In function 
> ‘qmp_cxl_process_dynamic_capacity_event.constprop’:
> ../hw/mem/cxl_type3.c:2063:28: error: ‘rid’ may be used uninitialized 
> [-Werror=maybe-uninitialized]
>  2063 | dCap.updated_region_id = rid;
>   | ~~~^
> ../hw/mem/cxl_type3.c:1987:13: note: ‘rid’ was declared here
>  1987 | uint8_t rid;
>   | ^~~
> cc1: all warnings being treated as errors
> 
> [3]
> 
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index e67328780407..d25e6064f6c9 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -1984,7 +1984,7 @@ static void 
> qmp_cxl_process_dynamic_capacity_event(const char *path,
>  CXLDCExtentRecordList *list = records;
>  CXLDCExtent_raw *extents;
>  uint64_t dpa, len;
> -uint8_t rid;
> +uint8_t rid = 0;
>  int i;
>  
>  if (!obj) {

s390 intermittent test failure in qemu:block / io-qcow2-copy-before-write

2023-07-25 Thread Peter Maydell

There seems to be an intermittent failure on the s390 host in
the qemu:block / io-qcow2-copy-before-write test:
https://gitlab.com/qemu-project/qemu/-/jobs/4737819873

The log says the test was expecting to do some reading
and writing but got an unexpected 'permission denied'
error on the read. Any idea why this might happen ?

768/835 qemu:block / io-qcow2-copy-before-write ERROR 12.05s exit status 1
>>> PYTHON=/home/gitlab-runner/builds/-LCfcJ2T/0/qemu-project/qemu/build/pyvenv/bin/python3
>>>  MALLOC_PERTURB_=101 
>>> /home/gitlab-runner/builds/-LCfcJ2T/0/qemu-project/qemu/build/pyvenv/bin/python3
>>>  
>>> /home/gitlab-runner/builds/-LCfcJ2T/0/qemu-project/qemu/build/../tests/qemu-iotests/check
>>>  -tap -qcow2 copy-before-write --source-dir 
>>> /home/gitlab-runner/builds/-LCfcJ2T/0/qemu-project/qemu/tests/qemu-iotests 
>>> --build-dir 
>>> /home/gitlab-runner/builds/-LCfcJ2T/0/qemu-project/qemu/build/tests/qemu-iotests
― ✀ ―
stderr:
--- 
/home/gitlab-runner/builds/-LCfcJ2T/0/qemu-project/qemu/tests/qemu-iotests/tests/copy-before-write.out
+++ 
/home/gitlab-runner/builds/-LCfcJ2T/0/qemu-project/qemu/build/scratch/qcow2-file-copy-before-write/copy-before-write.out.bad
@@ -1,5 +1,21 @@
-
+...F
+==
+FAIL: test_timeout_break_snapshot (__main__.TestCbwError)
+--
+Traceback (most recent call last):
+ File 
"/home/gitlab-runner/builds/-LCfcJ2T/0/qemu-project/qemu/tests/qemu-iotests/tests/copy-before-write",
line 210, in test_timeout_break_snapshot
+ self.assertEqual(log, """\
+AssertionError: 'wrot[195 chars]read 1048576/1048576 bytes at offset
0\n1 MiB,[46 chars]c)\n' != 'wrot[195 chars]read failed: Permission
denied\n'
+ wrote 524288/524288 bytes at offset 0
+ 512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+ wrote 524288/524288 bytes at offset 524288
+ 512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++ read failed: Permission denied
+- read 1048576/1048576 bytes at offset 0
+- 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+
--
Ran 4 tests
-OK
+FAILED (failures=1)
(test program exited with status code 1)
――

Same failure, previous job:

https://gitlab.com/qemu-project/qemu/-/jobs/4736463062

This one's a "Failed to get write lock" in io-qcow2-161:

https://gitlab.com/qemu-project/qemu/-/jobs/4734846533

(I'm not sure whether there's something up with the s390
at the moment -- it seems to be producing these odd
failures, not always in the iotests. I'm wondering if
it's just running too slowly and we're hitting race
conditions in test cases.)

thanks
-- PMM

Re: [PATCH] migration: Allow user to specify migration available bandwidth

2023-07-25 Thread Peter Xu

Hi, Markus,

On Tue, Jul 25, 2023 at 01:10:01PM +0200, Markus Armbruster wrote:
> Peter Xu  writes:
> 
> > Migration bandwidth is a very important value to live migration.  It's
> > because it's one of the major factors that we'll make decision on when to
> > switchover to destination in a precopy process.
> >
> > This value is currently estimated by QEMU during the whole live migration
> > process by monitoring how fast we were sending the data.  This can be the
> > most accurate bandwidth if in the ideal world, where we're always feeding
> > unlimited data to the migration channel, and then it'll be limited to the
> > bandwidth that is available.
> >
> > However in reality it may be very different, e.g., over a 10Gbps network we
> > can see query-migrate showing migration bandwidth of only a few tens of
> > MB/s just because there are plenty of other things the migration thread
> > might be doing.  For example, the migration thread can be busy scanning
> > zero pages, or it can be fetching dirty bitmap from other external dirty
> > sources (like vhost or KVM).  It means we may not be pushing data as much
> > as possible to migration channel, so the bandwidth estimated from "how many
> > data we sent in the channel" can be dramatically inaccurate sometimes,
> > e.g., that a few tens of MB/s even if 10Gbps available, and then the
> > decision to switchover will be further affected by this.
> >
> > The migration may not even converge at all with the downtime specified,
> > with that wrong estimation of bandwidth.
> >
> > The issue is QEMU itself may not be able to avoid those uncertainties on
> > measuing the real "available migration bandwidth".  At least not something
> > I can think of so far.
> >
> > One way to fix this is when the user is fully aware of the available
> > bandwidth, then we can allow the user to help providing an accurate value.
> >
> > For example, if the user has a dedicated channel of 10Gbps for migration
> > for this specific VM, the user can specify this bandwidth so QEMU can
> > always do the calculation based on this fact, trusting the user as long as
> > specified.
> >
> > When the user wants to have migration only use 5Gbps out of that 10Gbps,
> > one can set max-bandwidth to 5Gbps, along with available-bandwidth to 5Gbps
> > so it'll never use over 5Gbps too (so the user can have the rest 5Gbps for
> > other things).  So it can be useful even if the network is not dedicated,
> > but as long as the user can know a solid value.
> >
> > A new parameter "available-bandwidth" is introduced just for this. So when
> > the user specified this parameter, instead of trusting the estimated value
> > from QEMU itself (based on the QEMUFile send speed), let's trust the user
> > more.
> >
> > This can resolve issues like "unconvergence migration" which is caused by
> > hilarious low "migration bandwidth" detected for whatever reason.
> >
> > Reported-by: Zhiyi Guo 
> > Signed-off-by: Peter Xu 
> > ---
> >  qapi/migration.json| 20 +++-
> >  migration/migration.h  |  2 +-
> >  migration/options.h|  1 +
> >  migration/migration-hmp-cmds.c | 14 ++
> >  migration/migration.c  | 19 +++
> >  migration/options.c| 28 
> >  migration/trace-events |  2 +-
> >  7 files changed, 79 insertions(+), 7 deletions(-)
> >
> > diff --git a/qapi/migration.json b/qapi/migration.json
> > index 47dfef0278..fdc269e0a1 100644
> > --- a/qapi/migration.json
> > +++ b/qapi/migration.json
> > @@ -730,6 +730,16 @@
> >  # @max-bandwidth: to set maximum speed for migration.  maximum speed
> >  # in bytes per second.  (Since 2.8)
> >  #
> > +# @available-bandwidth: to set available bandwidth for migration.  By
> > +# default, this value is zero, means the user is not aware of the
> > +# available bandwidth that can be used by QEMU migration, so QEMU will
> > +# estimate the bandwidth automatically.  This can be set when the
> > +# estimated value is not accurate, while the user is able to guarantee
> > +# such bandwidth is available for migration purpose during the
> > +# migration procedure.  When specified correctly, this can make the
> > +# switchover decision much more accurate, which will also be based on
> > +# the max downtime specified.  (Since 8.2)
> 
> Humor me: break lines slightly earlier, like
> 
># @available-bandwidth: to set available bandwidth for migration.  By
># default, this value is zero, means the user is not aware of the
># available bandwidth that can be used by QEMU migration, so QEMU
># will estimate the bandwidth automatically.  This can be set when
># the estimated value is not accurate, while the user is able to
># guarantee such bandwidth is available for migration purpose
># during the migration procedure.  When specified correctly, this
># can make the switchover

Re: [PATCH v1] block/stream:add flush l2_table_cache,ensure data integrity

2023-07-25 Thread Vladimir Sementsov-Ogievskiy


On 25.07.23 18:13, Denis V. Lunev wrote:

On 7/25/23 16:25, Vladimir Sementsov-Ogievskiy wrote:

On 24.07.23 10:30, Evanzhang wrote:

block_stream will not actively flush l2_table_cache,when qemu
process exception exit,causing disk data loss

Signed-off-by: Evanzhang 
---
  block/stream.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/block/stream.c b/block/stream.c
index e522bbd..a5e08da 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -207,6 +207,12 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
  }
  }
  +    /*
+ * Complete stream_populate,force flush l2_table_cache,to
+ * avoid unexpected termination of process, l2_table loss
+ */
+    qcow2_cache_flush(bs, ((BDRVQcow2State *)bs->opaque)->l2_table_cache);
+
  /* Do not remove the backing file if an error was there but ignored. */
  return error;
  }


Hi!

I think, it's more correct just call bdrv_co_flush(bs), which should do all the 
job. Also, stream_run() should fail if flush fails.

Also, I remember I've done it for all (or at least several) blockjobs 
generically, so that any blockjob must succesfully flush target to report 
success.. But now I can find neither my patches nor the code :( Den, Kevin, 
Hanna, don't you remember this topic?


This was a part of compressed write cache series, which was postponed.

https://lore.kernel.org/all/20210305173507.393137-1-vsement...@virtuozzo.com/T/#m87315593ed5ab16e5d0e4e7a5ae6d776fbbaec77

We have it ported to 7.0 QEMU.

Not a problem to port to master and resend.
Will this make a sense?



O, thanks! Patch 01 applies with a little conflict to master, so I'll just 
resend it myself.

--
Best regards,
Vladimir

Re: [PATCH] migration: Allow user to specify migration available bandwidth

2023-07-25 Thread Peter Xu

On Tue, Jul 25, 2023 at 05:09:57PM +0100, Daniel P. Berrangé wrote:
> On Tue, Jul 25, 2023 at 11:54:52AM -0400, Peter Xu wrote:
> > We can make the semantics specific, no strong opinion here.  I wished it
> > can be as generic / easy as possible but maybe I went too far.
> > 
> > Though, is there anything else we can choose from besides
> > "max-convergence-bandwidth"? Or am I the only one that thinks it's hard to
> > understand when put "max" and "convergence" together?
> > 
> > When I take one step back to look at the whole "bandwidth" parameters, I am
> > not sure why we'd even need both "convergence" and "postcopy" bandwidth
> > being separate.  With my current understanding of migration, we may
> > actually need:
> > 
> >   - One bandwidth that we may want to run the background migration, aka,
> > precopy migration, where we don't rush on pushing data.
> > 
> >   - One bandwidth that is whatever we can have maximum; for dedicated NIC
> > that's the line speed.  We should always use this full speed for
> > important things.  I'd say postcopy falls into this, and this
> > "convergence" calculation should also rely on this.
> 
> I don't think postcopy should be assumed to run at line speed.
> 
> At the point where you flip to post-copy mode, there could
> conceivably still be GB's worth of data still dirty and
> pending transfer.
> 
> The migration convergance step is reasonable to put at line
> speed, because the max downtime parameter caps how long this
> burst will be, genrally to some fraction of a second.
> 
> Once in post-copy mode, while the remaining data to transfer
> is finite, the wall clock time to complete that transfer may
> still be huge. It is unreasonable to assume users want to
> run at max linespeed for many minutes to finish post-copy
> at least in terms of the background transfer. You could make
> a  case for the page fault handling to run at a higher bandwidth
> cap than the background transfer, but I think it is still probably
> not reasonable to run page fault fetches at line speed by default.
> 
> IOW, I don't think we can put the same bandwidth limit on the
> short convergance operation, as on the longer post-copy operation.

Postcopy still heavily affects the performance of the VM for the whole
duration, and afaiu that's so far the major issue (after we fix postcopy
interruptions with recovery capability) that postcopy may not be wanted in
many cases.

If I am the admin I'd want it to run at full speed even if the pages were
not directly requested just to shrink the duration of postcopy; I'd just
want to make sure requested pages are queued sooner.

But that's okay if any of us still thinks that three values would be
helpful here, because we can simply have the latter two having the same
value when we want.  Three is the superset of two anyway.

I see you used "convergance" explicitly even after PeterM's reply, is that
what you prefer over "convergence"?  I do see more occurances of
"convergence" as a word in migration context, though.  Any better name you
can come up with, before I just go with "max-convergence-bandwidth" (I
really cannot come up with anything better than this or available-bandwidth
for now)?

Thanks,

-- 
Peter Xu

Re: [PATCH v2] kvm: Remove KVM_CREATE_IRQCHIP support assumption

2023-07-25 Thread Daniel Henrique Barboza





On 7/25/23 09:26, Andrew Jones wrote:

Since Linux commit 00f918f61c56 ("RISC-V: KVM: Skeletal in-kernel AIA
irqchip support") checking KVM_CAP_IRQCHIP returns non-zero when the
RISC-V platform has AIA. The cap indicates KVM supports at least one
of the following ioctls:

   KVM_CREATE_IRQCHIP
   KVM_IRQ_LINE
   KVM_GET_IRQCHIP
   KVM_SET_IRQCHIP
   KVM_GET_LAPIC
   KVM_SET_LAPIC

but the cap doesn't imply that KVM must support any of those ioctls
in particular. However, QEMU was assuming the KVM_CREATE_IRQCHIP
ioctl was supported. Stop making that assumption by introducing a
KVM parameter that each architecture which supports KVM_CREATE_IRQCHIP
sets. Adding parameters isn't awesome, but given how the
KVM_CAP_IRQCHIP isn't very helpful on its own, we don't have a lot of
options.

Signed-off-by: Andrew Jones 
---


Reviewed-by: Daniel Henrique Barboza 



While this fixes booting guests on riscv KVM with AIA it's unlikely
to get merged before the QEMU support for KVM AIA[1] lands, which
would also fix the issue. I think this patch is still worth considering
though since QEMU's assumption is wrong.

[1] https://lore.kernel.org/all/20230714084429.22349-1-yongxuan.w...@sifive.com/

v2:
   - Move the s390x code to an s390x file. [Thomas]
   - Drop the KVM_CAP_IRQCHIP check from the top of kvm_irqchip_create(),
 as it's no longer necessary.

  accel/kvm/kvm-all.c| 16 
  include/sysemu/kvm.h   |  1 +
  target/arm/kvm.c   |  3 +++
  target/i386/kvm/kvm.c  |  2 ++
  target/s390x/kvm/kvm.c | 11 +++
  5 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 373d876c0580..cddcb6eca641 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -86,6 +86,7 @@ struct KVMParkedVcpu {
  };
  
  KVMState *kvm_state;

+bool kvm_has_create_irqchip;
  bool kvm_kernel_irqchip;
  bool kvm_split_irqchip;
  bool kvm_async_interrupts_allowed;
@@ -2358,17 +2359,6 @@ static void kvm_irqchip_create(KVMState *s)
  int ret;
  
  assert(s->kernel_irqchip_split != ON_OFF_AUTO_AUTO);

-if (kvm_check_extension(s, KVM_CAP_IRQCHIP)) {
-;
-} else if (kvm_check_extension(s, KVM_CAP_S390_IRQCHIP)) {
-ret = kvm_vm_enable_cap(s, KVM_CAP_S390_IRQCHIP, 0);
-if (ret < 0) {
-fprintf(stderr, "Enable kernel irqchip failed: %s\n", 
strerror(-ret));
-exit(1);
-}
-} else {
-return;
-}
  
  /* First probe and see if there's a arch-specific hook to create the

   * in-kernel irqchip for us */
@@ -2377,8 +2367,10 @@ static void kvm_irqchip_create(KVMState *s)
  if (s->kernel_irqchip_split == ON_OFF_AUTO_ON) {
  error_report("Split IRQ chip mode not supported.");
  exit(1);
-} else {
+} else if (kvm_has_create_irqchip) {
  ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP);
+} else {
+return;
  }
  }
  if (ret < 0) {
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 115f0cca79d1..84b1bb3dc91e 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -32,6 +32,7 @@
  #ifdef CONFIG_KVM_IS_POSSIBLE
  
  extern bool kvm_allowed;

+extern bool kvm_has_create_irqchip;
  extern bool kvm_kernel_irqchip;
  extern bool kvm_split_irqchip;
  extern bool kvm_async_interrupts_allowed;
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index b4c7654f4980..2fa87b495d68 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -250,6 +250,9 @@ int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool 
*fixed_ipa)
  int kvm_arch_init(MachineState *ms, KVMState *s)
  {
  int ret = 0;
+
+kvm_has_create_irqchip = kvm_check_extension(s, KVM_CAP_IRQCHIP);
+
  /* For ARM interrupt delivery is always asynchronous,
   * whether we are using an in-kernel VGIC or not.
   */
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index ebfaf3d24c79..6363e67f092d 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -2771,6 +2771,8 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
  }
  }
  
+kvm_has_create_irqchip = kvm_check_extension(s, KVM_CAP_IRQCHIP);

+
  return 0;
  }
  
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c

index a9e5880349d9..bcc735227f7d 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -391,6 +391,17 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
  }
  
  kvm_set_max_memslot_size(KVM_SLOT_MAX_BYTES);

+
+kvm_has_create_irqchip = kvm_check_extension(s, KVM_CAP_S390_IRQCHIP);
+if (kvm_has_create_irqchip) {
+int ret = kvm_vm_enable_cap(s, KVM_CAP_S390_IRQCHIP, 0);
+
+if (ret < 0) {
+fprintf(stderr, "Enable kernel irqchip failed: %s\n", 
strerror(-ret));
+exit(1);
+}
+}
+
  return 0;
  }

[PULL 1/2] crypto: Always initialize splitkeylen

2023-07-25 Thread Daniel P . Berrangé

From: Akihiko Odaki 

When _FORTIFY_SOURCE=2, glibc version is 2.35, and GCC version is
12.1.0, the compiler complains as follows:

In file included from /usr/include/string.h:535,
 from /home/alarm/q/var/qemu/include/qemu/osdep.h:99,
 from ../crypto/block-luks.c:21:
In function 'memset',
inlined from 'qcrypto_block_luks_store_key' at ../crypto/block-luks.c:843:9:
/usr/include/bits/string_fortified.h:59:10: error: 'splitkeylen' may be used 
uninitialized [-Werror=maybe-uninitialized]
   59 |   return __builtin___memset_chk (__dest, __ch, __len,
  |  ^~~~
   60 |  __glibc_objsize0 (__dest));
  |  ~~
../crypto/block-luks.c: In function 'qcrypto_block_luks_store_key':
../crypto/block-luks.c:699:12: note: 'splitkeylen' was declared here
  699 | size_t splitkeylen;
  |^~~

It seems the compiler cannot see that splitkeylen will not be used
when splitkey is NULL. Suppress the warning by initializing splitkeylen
even when splitkey stays NULL.

Signed-off-by: Akihiko Odaki 
Signed-off-by: Daniel P. Berrangé 
---
 crypto/block-luks.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crypto/block-luks.c b/crypto/block-luks.c
index 5688783ab1..2f59c3a625 100644
--- a/crypto/block-luks.c
+++ b/crypto/block-luks.c
@@ -706,14 +706,14 @@ qcrypto_block_luks_store_key(QCryptoBlock *block,
 
 assert(slot_idx < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS);
 slot = >header.key_slots[slot_idx];
+splitkeylen = luks->header.master_key_len * slot->stripes;
+
 if (qcrypto_random_bytes(slot->salt,
  QCRYPTO_BLOCK_LUKS_SALT_LEN,
  errp) < 0) {
 goto cleanup;
 }
 
-splitkeylen = luks->header.master_key_len * slot->stripes;
-
 /*
  * Determine how many iterations are required to
  * hash the user password while consuming 1 second of compute
-- 
2.41.0

[PULL 0/2] Misc next patches

2023-07-25 Thread Daniel P . Berrangé

The following changes since commit a279ca4ea07383314b2d2b2f1d550be9482f148e:

  Merge tag 'pull-target-arm-20230725' of 
https://git.linaro.org/people/pmaydell/qemu-arm into staging (2023-07-25 
12:44:39 +0100)

are available in the Git repository at:

  https://gitlab.com/berrange/qemu tags/misc-next-pull-request

for you to fetch changes up to 0e6b20b9656174e815751cf8b21f5e326148bb99:

  hw/usb/canokey: change license to GPLv2+ (2023-07-25 17:24:12 +0100)


Miscellaneous fixes

 * Switch canokey license from Apache to GPLv2+
 * Fix uninitialized variable in LUKS driver



Akihiko Odaki (1):
  crypto: Always initialize splitkeylen

Hongren (Zenithal) Zheng (1):
  hw/usb/canokey: change license to GPLv2+

 crypto/block-luks.c | 4 ++--
 hw/usb/canokey.c| 2 +-
 hw/usb/canokey.h| 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

-- 
2.41.0

[PULL 0/2] Misc next patches

2023-07-25 Thread Daniel P . Berrangé

The following changes since commit a279ca4ea07383314b2d2b2f1d550be9482f148e:

  Merge tag 'pull-target-arm-20230725' of 
https://git.linaro.org/people/pmaydell/qemu-arm into staging (2023-07-25 
12:44:39 +0100)

are available in the Git repository at:

  https://gitlab.com/berrange/qemu tags/misc-next-pull-request

for you to fetch changes up to 095be0910b89b5d156e20641bd65ac6cab3f8305:

  hw/usb/canokey: change license to GPLv2+ (2023-07-25 17:15:59 +0100)


Miscellaneous fixes

 * Switch canokey device license from Apache to GPLv2+
 * Fix uninitialized variable warning in LUKS code



Akihiko Odaki (1):
  crypto: Always initialize splitkeylen

Hongren (Zenithal) Zheng (1):
  hw/usb/canokey: change license to GPLv2+

 crypto/block-luks.c | 4 ++--
 hw/usb/canokey.c| 2 +-
 hw/usb/canokey.h| 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

-- 
2.41.0

[PULL 1/2] crypto: Always initialize splitkeylen

2023-07-25 Thread Daniel P . Berrangé

From: Akihiko Odaki 

When _FORTIFY_SOURCE=2, glibc version is 2.35, and GCC version is
12.1.0, the compiler complains as follows:

In file included from /usr/include/string.h:535,
 from /home/alarm/q/var/qemu/include/qemu/osdep.h:99,
 from ../crypto/block-luks.c:21:
In function 'memset',
inlined from 'qcrypto_block_luks_store_key' at ../crypto/block-luks.c:843:9:
/usr/include/bits/string_fortified.h:59:10: error: 'splitkeylen' may be used 
uninitialized [-Werror=maybe-uninitialized]
   59 |   return __builtin___memset_chk (__dest, __ch, __len,
  |  ^~~~
   60 |  __glibc_objsize0 (__dest));
  |  ~~
../crypto/block-luks.c: In function 'qcrypto_block_luks_store_key':
../crypto/block-luks.c:699:12: note: 'splitkeylen' was declared here
  699 | size_t splitkeylen;
  |^~~

It seems the compiler cannot see that splitkeylen will not be used
when splitkey is NULL. Suppress the warning by initializing splitkeylen
even when splitkey stays NULL.

Signed-off-by: Akihiko Odaki 
Signed-off-by: Daniel P. Berrangé 
---
 crypto/block-luks.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crypto/block-luks.c b/crypto/block-luks.c
index 5688783ab1..2f59c3a625 100644
--- a/crypto/block-luks.c
+++ b/crypto/block-luks.c
@@ -706,14 +706,14 @@ qcrypto_block_luks_store_key(QCryptoBlock *block,
 
 assert(slot_idx < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS);
 slot = >header.key_slots[slot_idx];
+splitkeylen = luks->header.master_key_len * slot->stripes;
+
 if (qcrypto_random_bytes(slot->salt,
  QCRYPTO_BLOCK_LUKS_SALT_LEN,
  errp) < 0) {
 goto cleanup;
 }
 
-splitkeylen = luks->header.master_key_len * slot->stripes;
-
 /*
  * Determine how many iterations are required to
  * hash the user password while consuming 1 second of compute
-- 
2.41.0

Re:Re: [PATCH] Open file as read only on private mapping in qemu_ram_alloc_from_file

2023-07-25 Thread ThinerLogoer

At 2023-07-25 19:42:30, "David Hildenbrand"  wrote:
>Hi,
>
>patch subject should start with "softmmu/physmem: Open ..."

Sorry I am newbie to the patch submission part. I will resubmit a version of 
patch if the
final acceptable patch after discussion is mostly the same. (For example, if 
this patch
finally involves adding another parameter and adding various hooks, then I may 
feel it
hard to finish the patch myself, both due to lack of knowledge of qemu source 
code tree,
and due to lack of various environment to test every case out)

Anyway thanks to all your suggestions.

>
>On 25.07.23 12:52, Thiner Logoer wrote:
>> An read only file can be mapped with read write as long as the
>> mapping is private, which is very common case. Make
>
>At least in the environments I know, using private file mappings is a corner 
>case ;)
>
>What is you use case? VM templating?

Mostly, if I understand the terminology correct. I was experimenting on vm 
snapshoting
that uses MAP_PRIVATE when recovering memory, similar to what firecracker says 
in this
documentation.

https://github.com/firecracker-microvm/firecracker/blob/main/docs/snapshotting/snapshot-support.md

And in my experiment qemu supports recovering from a memory file + a guest 
state file out
of the box.
In fact, `-mem-path filename4pc.ram` works out of the box (since the default 
parameter is
map_private+readwrite), only that vanilla setup requires memory file to be 
writeable
though the file never gets written. (the actual memory file & guest state file 
require
separated hacking)

And at least the patch provided here have been the solution to this last 
problem for me
for a while.

By the way the commit: "Commit 134253a4, machine: do not crash if default RAM 
backend name
has been stolen" disallows me to use a memory backed file directly as pc.ram 
and make
`-object memory-backed-file,*` based setup more complex (I cannot easily make 
the memory
unbacked by any file before snapshoting and backed by file after recovery from 
snapshot
after this patch). This is the reason why I prefer `-mem-path` despite the doc 
tells that
this usage is close to deprecated, and that `-mem-path` has less configurable 
parameters.

>
>> qemu_ram_alloc_from_file open file as read only when the
>> mapping is private, otherwise open will fail when file
>> does not allow write.
>>
>> If this file does not exist or is a directory, the flag is not used,
>> so it should be OK.
>>
>> from https://gitlab.com/qemu-project/qemu/-/issues/1689
>>
>> Signed-off-by: Thiner Logoer 
>> ---
>>   softmmu/physmem.c | 9 -
>>   1 file changed, 8 insertions(+), 1 deletion(-)
>>
>> diff --git a/softmmu/physmem.c b/softmmu/physmem.c
>> index 3df73542e1..e8036ee335 100644
>> --- a/softmmu/physmem.c
>> +++ b/softmmu/physmem.c
>> @@ -1945,8 +1945,15 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, 
>> MemoryRegion*mr,
>>   int fd;
>>   bool created;
>>   RAMBlock *block;
>> +
>
>^
>
>.git/rebase-apply/patch:13: trailing whitespace.

I remembered I have deleted this whitespace before. Obviously I have messed up 
with
different version of patch files, sorry about that ...

>
>> +/*
>> + * If map is private, the fd does not need to be writable.
>> + * This only get effective when the file is existent.
>
>"This will get ignored if the file does not yet exist."
>
>> + */
>> +bool open_as_readonly = readonly || !(ram_flags & RAM_SHARED);
>>
>> -fd = file_ram_open(mem_path, memory_region_name(mr), readonly, ,
>> +fd = file_ram_open(mem_path, memory_region_name(mr),
>> +   open_as_readonly, ,
>>  errp);
>>   if (fd < 0) {
>>   return NULL;
>
>
>Opening a file R/O will also make operations like fallocate/ftruncate/ ... 
>fail.

I saw fallocate in softmmu/physmem.c on somewhere, though I was not sure how it 
is
actually used. Your response fills in this part.

>
>For example, this will make fallocate(FALLOC_FL_PUNCH_HOLE) stop working and in
>turn make ram_block_discard_range() bail out.
>
>
>There was a recent discussion/patch on that:
>
>commit 1d44ff586f8a8e113379430750b5a0a2a3f64cf9
>Author: David Hildenbrand 
>Date:   Thu Jul 6 09:56:06 2023 +0200
>
> softmmu/physmem: Warn with ram_block_discard_range() on MAP_PRIVATE file 
> mapping
>
> ram_block_discard_range() cannot possibly do the right thing in
> MAP_PRIVATE file mappings in the general case.
>
> To achieve the documented semantics, we also have to punch a hole into
> the file, possibly messing with other MAP_PRIVATE/MAP_SHARED mappings
> of such a file.
>
> For example, using VM templating -- see commit b17fbbe55cba ("migration:
> allow private destination ram with x-ignore-shared") -- in combination 
> with
> any mechanism that relies on discarding of RAM is problematic. This
> includes:
> * Postcopy live migration
> * virtio-balloon inflation/deflation or free-page-reporting
> *

Re: [PATCH v4 06/12] virtio-sound: handle VIRTIO_SND_R_PCM_INFO request

2023-07-25 Thread Michael S. Tsirkin

On Tue, Jul 25, 2023 at 06:54:56PM +0400, Marc-André Lureau wrote:
> 
> Marc-André can you please stop with trying to use gmail web client?
> 
> 
> Trying? I think I have almost exclusively used it over the past 15y or so :)
>  

Then find a way make it not wrap lines.  The result of corrupted text is
people giving up in disgust and asking everyone to switch to gitlab
or slack or whatnot.

-- 
MST

Re: [PATCH] migration: Allow user to specify migration available bandwidth

2023-07-25 Thread Daniel P . Berrangé

On Tue, Jul 25, 2023 at 11:54:52AM -0400, Peter Xu wrote:
> We can make the semantics specific, no strong opinion here.  I wished it
> can be as generic / easy as possible but maybe I went too far.
> 
> Though, is there anything else we can choose from besides
> "max-convergence-bandwidth"? Or am I the only one that thinks it's hard to
> understand when put "max" and "convergence" together?
> 
> When I take one step back to look at the whole "bandwidth" parameters, I am
> not sure why we'd even need both "convergence" and "postcopy" bandwidth
> being separate.  With my current understanding of migration, we may
> actually need:
> 
>   - One bandwidth that we may want to run the background migration, aka,
> precopy migration, where we don't rush on pushing data.
> 
>   - One bandwidth that is whatever we can have maximum; for dedicated NIC
> that's the line speed.  We should always use this full speed for
> important things.  I'd say postcopy falls into this, and this
> "convergence" calculation should also rely on this.

I don't think postcopy should be assumed to run at line speed.

At the point where you flip to post-copy mode, there could
conceivably still be GB's worth of data still dirty and
pending transfer.

The migration convergance step is reasonable to put at line
speed, because the max downtime parameter caps how long this
burst will be, genrally to some fraction of a second.

Once in post-copy mode, while the remaining data to transfer
is finite, the wall clock time to complete that transfer may
still be huge. It is unreasonable to assume users want to
run at max linespeed for many minutes to finish post-copy
at least in terms of the background transfer. You could make
a  case for the page fault handling to run at a higher bandwidth
cap than the background transfer, but I think it is still probably
not reasonable to run page fault fetches at line speed by default.

IOW, I don't think we can put the same bandwidth limit on the
short convergance operation, as on the longer post-copy operation.

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH] migration: Allow user to specify migration available bandwidth

2023-07-25 Thread Peter Xu

On Tue, Jul 25, 2023 at 10:16:52AM +0100, Daniel P. Berrangé wrote:
> On Mon, Jul 24, 2023 at 03:47:50PM -0400, Peter Xu wrote:
> > On Mon, Jul 24, 2023 at 07:04:29PM +0100, Daniel P. Berrangé wrote:
> > > On Mon, Jul 24, 2023 at 01:07:55PM -0400, Peter Xu wrote:
> > > > Migration bandwidth is a very important value to live migration.  It's
> > > > because it's one of the major factors that we'll make decision on when 
> > > > to
> > > > switchover to destination in a precopy process.
> > > 
> > > To elaborate on this for those reading along...
> > > 
> > > QEMU takes maxmimum downtime limit and multiplies by its estimate
> > > of bandwidth. This gives a figure for the amount of data QEMU thinks
> > > it can transfer within the downtime period.
> > > 
> > > QEMU compares this figure to the amount of data that is still pending
> > > at the end of an iteration.
> > > 
> > > > This value is currently estimated by QEMU during the whole live 
> > > > migration
> > > > process by monitoring how fast we were sending the data.  This can be 
> > > > the
> > > > most accurate bandwidth if in the ideal world, where we're always 
> > > > feeding
> > > > unlimited data to the migration channel, and then it'll be limited to 
> > > > the
> > > > bandwidth that is available.
> > > 
> > > The QEMU estimate for available bandwidth will definitely be wrong,
> > > potentially by orders of magnitude, if QEMU has a max bandwidth limit
> > > set, as in that case it is never trying to push the peak rates available
> > > from the NICs/network fabric.
> > > 
> > > > The issue is QEMU itself may not be able to avoid those uncertainties on
> > > > measuing the real "available migration bandwidth".  At least not 
> > > > something
> > > > I can think of so far.
> > > 
> > > IIUC, you can query the NIC properties to find the hardware transfer
> > > rate of the NICs. That doesn't imply apps can actually reach that
> > > rate in practice - it has a decent chance of being a over-estimate
> > > of bandwidth, possibly very very much over.
> > > 
> > > Is such an over estimate better or worse than QEMU's current
> > > under-estimate ? It depends on the POV.
> > > 
> > > From the POV of QEMU, over-estimating means means it'll be not
> > > be throttling as much as it should. That's not a downside of
> > > migration - it makes it more likely for migration to complete :-)
> > 
> > Heh. :)
> > 
> > > 
> > > From the POV of non-QEMU apps though, if QEMU over-estimates,
> > > it'll mean other apps get starved of network bandwidth.
> > > 
> > > Overall I agree, there's no obvious way QEMU can ever come up
> > > with a reliable estimate for bandwidth available.
> > > 
> > > > One way to fix this is when the user is fully aware of the available
> > > > bandwidth, then we can allow the user to help providing an accurate 
> > > > value.
> > > >
> > > > For example, if the user has a dedicated channel of 10Gbps for migration
> > > > for this specific VM, the user can specify this bandwidth so QEMU can
> > > > always do the calculation based on this fact, trusting the user as long 
> > > > as
> > > > specified.
> > > 
> > > I can see that in theory, but when considering a non-trivial
> > > deployments of QEMU, I wonder if the user can really have any
> > > such certainty of what is truely avaialble. It would need
> > > global awareness of the whole network of hosts & workloads.
> > 
> > Indeed it may or may not be easy always.
> > 
> > The good thing about this parameter is we always use the old estimation if
> > the user can't specify anything valid, so this is always optional not
> > required.
> > 
> > It solves the cases where the user can still specify accurately on the bw -
> > our QE team has already verified that it worked for us on GPU tests, where
> > it used to not be able to migrate at all with any sane downtime specified.
> > I should have attached a Tested-By from Zhiyi but since this is not exactly
> > the patch he was using I didn't.
> > 
> > > 
> > > > When the user wants to have migration only use 5Gbps out of that 10Gbps,
> > > > one can set max-bandwidth to 5Gbps, along with available-bandwidth to 
> > > > 5Gbps
> > > > so it'll never use over 5Gbps too (so the user can have the rest 5Gbps 
> > > > for
> > > > other things).  So it can be useful even if the network is not 
> > > > dedicated,
> > > > but as long as the user can know a solid value.
> > > > 
> > > > A new parameter "available-bandwidth" is introduced just for this. So 
> > > > when
> > > > the user specified this parameter, instead of trusting the estimated 
> > > > value
> > > > from QEMU itself (based on the QEMUFile send speed), let's trust the 
> > > > user
> > > > more.
> > > 
> > > I feel like rather than "available-bandwidth", we should call
> > > it "max-convergance-bandwidth".
> > > 
> > > To me that name would better reflect the fact that this isn't
> > > really required to be a measure of how much NIC bandwidth is
> > > available. It is merely an expression of

1 2 3 >

1 - 100 of 276 matches

Mail list logo