[PULL 12/16] virtio-iommu: Remove set_config callback

2021-12-14 Thread Thomas Huth
From: Eric Auger 

The spec says "the driver must not write to device configuration
fields". So remove the set_config() callback which anyway did
not do anything.

Signed-off-by: Eric Auger 
Reviewed-by: Jean-Philippe Brucker 
Message-Id: <20211127072910.1261824-2-eric.au...@redhat.com>
Signed-off-by: Thomas Huth 
---
 hw/virtio/trace-events   |  1 -
 hw/virtio/virtio-iommu.c | 14 --
 2 files changed, 15 deletions(-)

diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index 650e521e35..54bd7da00c 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -92,7 +92,6 @@ virtio_iommu_device_reset(void) "reset!"
 virtio_iommu_get_features(uint64_t features) "device supports 
features=0x%"PRIx64
 virtio_iommu_device_status(uint8_t status) "driver status = %d"
 virtio_iommu_get_config(uint64_t page_size_mask, uint64_t start, uint64_t end, 
uint32_t domain_range, uint32_t probe_size) "page_size_mask=0x%"PRIx64" 
start=0x%"PRIx64" end=0x%"PRIx64" domain_range=%d probe_size=0x%x"
-virtio_iommu_set_config(uint64_t page_size_mask, uint64_t start, uint64_t end, 
uint32_t domain_range, uint32_t probe_size) "page_size_mask=0x%"PRIx64" 
start=0x%"PRIx64" end=0x%"PRIx64" domain_bits=%d probe_size=0x%x"
 virtio_iommu_attach(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d"
 virtio_iommu_detach(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d"
 virtio_iommu_map(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end, 
uint64_t phys_start, uint32_t flags) "domain=%d virt_start=0x%"PRIx64" 
virt_end=0x%"PRIx64 " phys_start=0x%"PRIx64" flags=%d"
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index 1b23e8e18c..645c0aa399 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -832,19 +832,6 @@ static void virtio_iommu_get_config(VirtIODevice *vdev, 
uint8_t *config_data)
 memcpy(config_data, >config, sizeof(struct virtio_iommu_config));
 }
 
-static void virtio_iommu_set_config(VirtIODevice *vdev,
-  const uint8_t *config_data)
-{
-struct virtio_iommu_config config;
-
-memcpy(, config_data, sizeof(struct virtio_iommu_config));
-trace_virtio_iommu_set_config(config.page_size_mask,
-  config.input_range.start,
-  config.input_range.end,
-  config.domain_range.end,
-  config.probe_size);
-}
-
 static uint64_t virtio_iommu_get_features(VirtIODevice *vdev, uint64_t f,
   Error **errp)
 {
@@ -1185,7 +1172,6 @@ static void virtio_iommu_class_init(ObjectClass *klass, 
void *data)
 vdc->unrealize = virtio_iommu_device_unrealize;
 vdc->reset = virtio_iommu_device_reset;
 vdc->get_config = virtio_iommu_get_config;
-vdc->set_config = virtio_iommu_set_config;
 vdc->get_features = virtio_iommu_get_features;
 vdc->set_status = virtio_iommu_set_status;
 vdc->vmsd = _virtio_iommu_device;
-- 
2.27.0




[PULL 07/16] tests/qtest: Add a function that gets a list with available machine types

2021-12-14 Thread Thomas Huth
For the upcoming patches, we will need a way to gets a list with all
available machine types. Refactor the qtest_cb_for_every_machine()
to split the related code out into a separate new function, and
gather the aliases of the various machine types, too.

Message-Id: <20211201104347.51922-4-th...@redhat.com>
Signed-off-by: Thomas Huth 
---
 tests/qtest/libqtest.c | 64 ++
 1 file changed, 53 insertions(+), 11 deletions(-)

diff --git a/tests/qtest/libqtest.c b/tests/qtest/libqtest.c
index 25aeea385b..7ae2dc4e1d 100644
--- a/tests/qtest/libqtest.c
+++ b/tests/qtest/libqtest.c
@@ -1321,16 +1321,29 @@ static bool qtest_is_old_versioned_machine(const char 
*mname)
 return res;
 }
 
-void qtest_cb_for_every_machine(void (*cb)(const char *machine),
-bool skip_old_versioned)
+struct MachInfo {
+char *name;
+char *alias;
+};
+
+/*
+ * Returns an array with pointers to the available machine names.
+ * The terminating entry has the name set to NULL.
+ */
+static struct MachInfo *qtest_get_machines(void)
 {
+static struct MachInfo *machines;
 QDict *response, *minfo;
 QList *list;
 const QListEntry *p;
 QObject *qobj;
 QString *qstr;
-const char *mname;
 QTestState *qts;
+int idx;
+
+if (machines) {
+return machines;
+}
 
 qts = qtest_init("-machine none");
 response = qtest_qmp(qts, "{ 'execute': 'query-machines' }");
@@ -1338,25 +1351,54 @@ void qtest_cb_for_every_machine(void (*cb)(const char 
*machine),
 list = qdict_get_qlist(response, "return");
 g_assert(list);
 
-for (p = qlist_first(list); p; p = qlist_next(p)) {
+machines = g_new(struct MachInfo, qlist_size(list) + 1);
+
+for (p = qlist_first(list), idx = 0; p; p = qlist_next(p), idx++) {
 minfo = qobject_to(QDict, qlist_entry_obj(p));
 g_assert(minfo);
+
 qobj = qdict_get(minfo, "name");
 g_assert(qobj);
 qstr = qobject_to(QString, qobj);
 g_assert(qstr);
-mname = qstring_get_str(qstr);
-/* Ignore machines that cannot be used for qtests */
-if (!strncmp("xenfv", mname, 5) || g_str_equal("xenpv", mname)) {
-continue;
-}
-if (!skip_old_versioned || !qtest_is_old_versioned_machine(mname)) {
-cb(mname);
+machines[idx].name = g_strdup(qstring_get_str(qstr));
+
+qobj = qdict_get(minfo, "alias");
+if (qobj) {   /* The alias is optional */
+qstr = qobject_to(QString, qobj);
+g_assert(qstr);
+machines[idx].alias = g_strdup(qstring_get_str(qstr));
+} else {
+machines[idx].alias = NULL;
 }
 }
 
 qtest_quit(qts);
 qobject_unref(response);
+
+memset([idx], 0, sizeof(struct MachInfo)); /* Terminating entry */
+return machines;
+}
+
+void qtest_cb_for_every_machine(void (*cb)(const char *machine),
+bool skip_old_versioned)
+{
+struct MachInfo *machines;
+int i;
+
+machines = qtest_get_machines();
+
+for (i = 0; machines[i].name != NULL; i++) {
+/* Ignore machines that cannot be used for qtests */
+if (!strncmp("xenfv", machines[i].name, 5) ||
+g_str_equal("xenpv", machines[i].name)) {
+continue;
+}
+if (!skip_old_versioned ||
+!qtest_is_old_versioned_machine(machines[i].name)) {
+cb(machines[i].name);
+}
+}
 }
 
 /*
-- 
2.27.0




[PULL 02/16] tests/qtest: add some tests for virtio-net failover

2021-12-14 Thread Thomas Huth
From: Laurent Vivier 

Add test cases to test several error cases that must be
generated by invalid failover configuration.

Add a combination of coldplug and hotplug test cases to be
sure the primary is correctly managed according the
presence or not of the STANDBY feature.

Signed-off-by: Laurent Vivier 
Message-Id: <20211208130350.10178-3-lviv...@redhat.com>
Signed-off-by: Thomas Huth 
---
 tests/qtest/meson.build   |   4 +
 tests/qtest/virtio-net-failover.c | 788 ++
 2 files changed, 792 insertions(+)
 create mode 100644 tests/qtest/virtio-net-failover.c

diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index c9d8458062..975a0f2f5f 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -68,6 +68,10 @@ qtests_i386 = \
   (config_all_devices.has_key('CONFIG_RTL8139_PCI') ? ['rtl8139-test'] : []) + 
 \
   (config_all_devices.has_key('CONFIG_E1000E_PCI_EXPRESS') ? 
['fuzz-e1000e-test'] : []) +   \
   (config_all_devices.has_key('CONFIG_ESP_PCI') ? ['am53c974-test'] : []) +
 \
+  (config_all_devices.has_key('CONFIG_VIRTIO_NET') and 
 \
+   config_all_devices.has_key('CONFIG_Q35') and
 \
+   config_all_devices.has_key('CONFIG_VIRTIO_PCI') and 
 \
+   slirp.found() ? ['virtio-net-failover'] : []) + 
 \
   (unpack_edk2_blobs ? ['bios-tables-test'] : []) +
 \
   qtests_pci + 
 \
   ['fdc-test',
diff --git a/tests/qtest/virtio-net-failover.c 
b/tests/qtest/virtio-net-failover.c
new file mode 100644
index 00..fd7821deaf
--- /dev/null
+++ b/tests/qtest/virtio-net-failover.c
@@ -0,0 +1,788 @@
+/*
+ * QTest testcase for virtio-net failover
+ *
+ * See docs/system/virtio-net-failover.rst
+ *
+ * Copyright (c) 2021 Red Hat, Inc.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include "qemu/osdep.h"
+#include "libqos/libqtest.h"
+#include "libqos/pci.h"
+#include "libqos/pci-pc.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qjson.h"
+#include "libqos/malloc-pc.h"
+#include "libqos/virtio-pci.h"
+#include "hw/pci/pci.h"
+
+#define ACPI_PCIHP_ADDR_ICH90x0cc0
+#define PCI_EJ_BASE 0x0008
+
+#define BASE_MACHINE "-M q35 -nodefaults " \
+"-device pcie-root-port,id=root0,addr=0x1,bus=pcie.0,chassis=1 " \
+"-device pcie-root-port,id=root1,addr=0x2,bus=pcie.0,chassis=2 "
+
+#define MAC_PRIMARY0 "52:54:00:11:11:11"
+#define MAC_STANDBY0 "52:54:00:22:22:22"
+
+static QGuestAllocator guest_malloc;
+static QPCIBus *pcibus;
+
+static QTestState *machine_start(const char *args, int numbus)
+{
+QTestState *qts;
+QPCIDevice *dev;
+int bus;
+
+qts = qtest_init(args);
+
+pc_alloc_init(_malloc, qts, 0);
+pcibus = qpci_new_pc(qts, _malloc);
+g_assert(qpci_secondary_buses_init(pcibus) == numbus);
+
+for (bus = 1; bus <= numbus; bus++) {
+dev = qpci_device_find(pcibus, QPCI_DEVFN(bus, 0));
+g_assert_nonnull(dev);
+
+qpci_device_enable(dev);
+qpci_iomap(dev, 4, NULL);
+
+g_free(dev);
+}
+
+return qts;
+}
+
+static void machine_stop(QTestState *qts)
+{
+qpci_free_pc(pcibus);
+alloc_destroy(_malloc);
+qtest_quit(qts);
+}
+
+static void test_error_id(void)
+{
+QTestState *qts;
+QDict *resp;
+QDict *err;
+
+qts = machine_start(BASE_MACHINE
+"-device virtio-net,bus=root0,id=standby0,failover=on",
+2);
+
+resp = qtest_qmp(qts, "{'execute': 'device_add',"
+  "'arguments': {"
+  "'driver': 'virtio-net',"
+  "'bus': 'root1',"
+  "'failover_pair_id': 'standby0'"
+  "} }");
+g_assert(qdict_haskey(resp, "error"));
+
+err = qdict_get_qdict(resp, "error");
+g_assert(qdict_haskey(err, "desc"));
+
+g_assert_cmpstr(qdict_get_str(err, "desc"), ==,
+"Device with failover_pair_id needs to have id");
+
+qobject_unref(resp);
+
+machine_stop(qts);
+}
+
+static void test_error_pcie(void)
+{
+QTestState *qts;
+QDict *resp;
+QDict *err;
+
+qts = machine_start(BASE_MACHINE
+"-device virtio-net,bus=root0,id=standby0,failover=on",
+2);
+
+resp = qtest_qmp(qts, "{'execute': 'device_add',"
+  "'arguments': {"
+  "'driver': 'virtio-net',"
+  "'id': 'primary0',"
+  "'bus': 'pcie.0',"
+  "'failover_pair_id': 'standby0'"
+  "} }");
+g_assert(qdict_haskey(resp, "error"));
+
+err = qdict_get_qdict(resp, "error");

[PULL 15/16] tests: qtest: Add virtio-iommu test

2021-12-14 Thread Thomas Huth
From: Eric Auger 

Add the framework to test the virtio-iommu-pci device
and tests exercising the attach/detach, map/unmap API.

Signed-off-by: Eric Auger 
Tested-by: Jean-Philippe Brucker 
Reviewed-by: Jean-Philippe Brucker 
Acked-by: Thomas Huth 
Message-Id: <20211127072910.1261824-5-eric.au...@redhat.com>
Signed-off-by: Thomas Huth 
---
 tests/qtest/libqos/meson.build|   1 +
 tests/qtest/libqos/virtio-iommu.c | 126 
 tests/qtest/libqos/virtio-iommu.h |  40 
 tests/qtest/meson.build   |   1 +
 tests/qtest/virtio-iommu-test.c   | 326 ++
 5 files changed, 494 insertions(+)
 create mode 100644 tests/qtest/libqos/virtio-iommu.c
 create mode 100644 tests/qtest/libqos/virtio-iommu.h
 create mode 100644 tests/qtest/virtio-iommu-test.c

diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build
index 4af1f04787..e988d15791 100644
--- a/tests/qtest/libqos/meson.build
+++ b/tests/qtest/libqos/meson.build
@@ -41,6 +41,7 @@ libqos_srcs = files('../libqtest.c',
 'virtio-rng.c',
 'virtio-scsi.c',
 'virtio-serial.c',
+'virtio-iommu.c',
 
 # qgraph machines:
 'aarch64-xlnx-zcu102-machine.c',
diff --git a/tests/qtest/libqos/virtio-iommu.c 
b/tests/qtest/libqos/virtio-iommu.c
new file mode 100644
index 00..18cba4ca36
--- /dev/null
+++ b/tests/qtest/libqos/virtio-iommu.c
@@ -0,0 +1,126 @@
+/*
+ * libqos driver virtio-iommu-pci framework
+ *
+ * Copyright (c) 2021 Red Hat, Inc.
+ *
+ * Authors:
+ *  Eric Auger 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at your
+ * option) any later version.  See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest.h"
+#include "qemu/module.h"
+#include "qgraph.h"
+#include "virtio-iommu.h"
+#include "hw/virtio/virtio-iommu.h"
+
+static QGuestAllocator *alloc;
+
+/* virtio-iommu-device */
+static void *qvirtio_iommu_get_driver(QVirtioIOMMU *v_iommu,
+  const char *interface)
+{
+if (!g_strcmp0(interface, "virtio-iommu")) {
+return v_iommu;
+}
+if (!g_strcmp0(interface, "virtio")) {
+return v_iommu->vdev;
+}
+
+fprintf(stderr, "%s not present in virtio-iommu-device\n", interface);
+g_assert_not_reached();
+}
+
+static void virtio_iommu_cleanup(QVirtioIOMMU *interface)
+{
+qvirtqueue_cleanup(interface->vdev->bus, interface->vq, alloc);
+}
+
+static void virtio_iommu_setup(QVirtioIOMMU *interface)
+{
+QVirtioDevice *vdev = interface->vdev;
+uint64_t features;
+
+features = qvirtio_get_features(vdev);
+features &= ~(QVIRTIO_F_BAD_FEATURE |
+  (1ull << VIRTIO_RING_F_INDIRECT_DESC) |
+  (1ull << VIRTIO_RING_F_EVENT_IDX) |
+  (1ull << VIRTIO_IOMMU_F_BYPASS));
+qvirtio_set_features(vdev, features);
+interface->vq = qvirtqueue_setup(interface->vdev, alloc, 0);
+qvirtio_set_driver_ok(interface->vdev);
+}
+
+/* virtio-iommu-pci */
+static void *qvirtio_iommu_pci_get_driver(void *object, const char *interface)
+{
+QVirtioIOMMUPCI *v_iommu = object;
+if (!g_strcmp0(interface, "pci-device")) {
+return v_iommu->pci_vdev.pdev;
+}
+return qvirtio_iommu_get_driver(_iommu->iommu, interface);
+}
+
+static void qvirtio_iommu_pci_destructor(QOSGraphObject *obj)
+{
+QVirtioIOMMUPCI *iommu_pci = (QVirtioIOMMUPCI *) obj;
+QVirtioIOMMU *interface = _pci->iommu;
+QOSGraphObject *pci_vobj =  _pci->pci_vdev.obj;
+
+virtio_iommu_cleanup(interface);
+qvirtio_pci_destructor(pci_vobj);
+}
+
+static void qvirtio_iommu_pci_start_hw(QOSGraphObject *obj)
+{
+QVirtioIOMMUPCI *iommu_pci = (QVirtioIOMMUPCI *) obj;
+QVirtioIOMMU *interface = _pci->iommu;
+QOSGraphObject *pci_vobj =  _pci->pci_vdev.obj;
+
+qvirtio_pci_start_hw(pci_vobj);
+virtio_iommu_setup(interface);
+}
+
+
+static void *virtio_iommu_pci_create(void *pci_bus, QGuestAllocator *t_alloc,
+   void *addr)
+{
+QVirtioIOMMUPCI *virtio_rpci = g_new0(QVirtioIOMMUPCI, 1);
+QVirtioIOMMU *interface = _rpci->iommu;
+QOSGraphObject *obj = _rpci->pci_vdev.obj;
+
+virtio_pci_init(_rpci->pci_vdev, pci_bus, addr);
+interface->vdev = _rpci->pci_vdev.vdev;
+alloc = t_alloc;
+
+obj->get_driver = qvirtio_iommu_pci_get_driver;
+obj->start_hw = qvirtio_iommu_pci_start_hw;
+obj->destructor = qvirtio_iommu_pci_destructor;
+
+return obj;
+}
+
+static void virtio_iommu_register_nodes(void)
+{
+QPCIAddress addr = {
+.devfn = QPCI_DEVFN(4, 0),
+};
+
+QOSGraphEdgeOptions opts = {
+.extra_device_opts = "addr=04.0",
+};
+
+/* virtio-iommu-pci */
+add_qpci_address(, );
+qos_node_create_driver("virtio-iommu-pci", virtio_iommu_pci_create);
+qos_node_consumes("virtio-iommu-pci", "pci-bus", );
+qos_node_produces("virtio-iommu-pci", "pci-device");
+

[PULL 06/16] tests/qtest: Fence the tests that need xlnx-zcu102 with CONFIG_XLNX_ZYNQMP_ARM

2021-12-14 Thread Thomas Huth
The 'xlnx-can-test' and the 'fuzz-xlnx-dp-test' need the "xlnx-zcu102"
machine and thus should only be built and run if CONFIG_XLNX_ZYNQMP_ARM
is enabled.

Message-Id: <20211201104347.51922-3-th...@redhat.com>
Signed-off-by: Thomas Huth 
---
 tests/qtest/meson.build | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index 36ca175660..9ff3eaf3d0 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -188,11 +188,10 @@ qtests_aarch64 = \
   (cpu != 'arm' and unpack_edk2_blobs ? ['bios-tables-test'] : []) +   
 \
   (config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? 
['tpm-tis-device-test'] : []) +\
   (config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? 
['tpm-tis-device-swtpm-test'] : []) +  \
+  (config_all_devices.has_key('CONFIG_XLNX_ZYNQMP_ARM') ? ['xlnx-can-test', 
'fuzz-xlnx-dp-test'] : []) + \
   ['arm-cpu-features',
'numa-test',
'boot-serial-test',
-   'xlnx-can-test',
-   'fuzz-xlnx-dp-test',
'migration-test']
 
 qtests_s390x = \
-- 
2.27.0




[PULL 08/16] tests/qtest: Add a function to check whether a machine is available

2021-12-14 Thread Thomas Huth
It is nowadays possible to build QEMU with a reduced set of machines
in each binary. However, the qtests still hard-code the expected
machines and fail if the binary does not feature the required machine.
Let's get a little bit more flexible here: Add a function that can be
used to query whether a certain machine is available or not, and use
it in some tests as an example (more work has to be done in other
tests which will follow later).

Message-Id: <20211201104347.51922-5-th...@redhat.com>
Acked-by: John Snow 
Signed-off-by: Thomas Huth 
---
 tests/qtest/boot-serial-test.c |  3 ++-
 tests/qtest/cdrom-test.c   |  8 +---
 tests/qtest/libqos/libqtest.h  |  8 
 tests/qtest/libqtest.c | 17 +
 tests/qtest/prom-env-test.c|  8 +---
 5 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c
index 83828ba270..4d8e1343bd 100644
--- a/tests/qtest/boot-serial-test.c
+++ b/tests/qtest/boot-serial-test.c
@@ -285,7 +285,8 @@ int main(int argc, char *argv[])
 g_test_init(, , NULL);
 
 for (i = 0; tests[i].arch != NULL; i++) {
-if (strcmp(arch, tests[i].arch) == 0) {
+if (g_str_equal(arch, tests[i].arch) &&
+qtest_has_machine(tests[i].machine)) {
 char *name = g_strdup_printf("boot-serial/%s", tests[i].machine);
 qtest_add_data_func(name, [i], test_machine);
 g_free(name);
diff --git a/tests/qtest/cdrom-test.c b/tests/qtest/cdrom-test.c
index 5af944a5fb..c1fcac5c45 100644
--- a/tests/qtest/cdrom-test.c
+++ b/tests/qtest/cdrom-test.c
@@ -109,9 +109,11 @@ static void test_cdrom_param(gconstpointer data)
 static void add_cdrom_param_tests(const char **machines)
 {
 while (*machines) {
-char *testname = g_strdup_printf("cdrom/param/%s", *machines);
-qtest_add_data_func(testname, *machines, test_cdrom_param);
-g_free(testname);
+if (qtest_has_machine(*machines)) {
+char *testname = g_strdup_printf("cdrom/param/%s", *machines);
+qtest_add_data_func(testname, *machines, test_cdrom_param);
+g_free(testname);
+}
 machines++;
 }
 }
diff --git a/tests/qtest/libqos/libqtest.h b/tests/qtest/libqos/libqtest.h
index 59e9271195..dff6b31cf0 100644
--- a/tests/qtest/libqos/libqtest.h
+++ b/tests/qtest/libqos/libqtest.h
@@ -710,6 +710,14 @@ QDict *qmp_fd(int fd, const char *fmt, ...) 
GCC_FMT_ATTR(2, 3);
 void qtest_cb_for_every_machine(void (*cb)(const char *machine),
 bool skip_old_versioned);
 
+/**
+ * qtest_has_machine:
+ * @machine: The machine to look for
+ *
+ * Returns: true if the machine is available in the target binary.
+ */
+bool qtest_has_machine(const char *machine);
+
 /**
  * qtest_qmp_device_add_qdict:
  * @qts: QTestState instance to operate on
diff --git a/tests/qtest/libqtest.c b/tests/qtest/libqtest.c
index 7ae2dc4e1d..65ed949685 100644
--- a/tests/qtest/libqtest.c
+++ b/tests/qtest/libqtest.c
@@ -1401,6 +1401,23 @@ void qtest_cb_for_every_machine(void (*cb)(const char 
*machine),
 }
 }
 
+bool qtest_has_machine(const char *machine)
+{
+struct MachInfo *machines;
+int i;
+
+machines = qtest_get_machines();
+
+for (i = 0; machines[i].name != NULL; i++) {
+if (g_str_equal(machine, machines[i].name) ||
+(machines[i].alias && g_str_equal(machine, machines[i].alias))) {
+return true;
+}
+}
+
+return false;
+}
+
 /*
  * Generic hot-plugging test via the device_add QMP commands.
  */
diff --git a/tests/qtest/prom-env-test.c b/tests/qtest/prom-env-test.c
index f41d80154a..bdbb01d8e5 100644
--- a/tests/qtest/prom-env-test.c
+++ b/tests/qtest/prom-env-test.c
@@ -71,9 +71,11 @@ static void add_tests(const char *machines[])
 char *name;
 
 for (i = 0; machines[i] != NULL; i++) {
-name = g_strdup_printf("prom-env/%s", machines[i]);
-qtest_add_data_func(name, machines[i], test_machine);
-g_free(name);
+if (qtest_has_machine(machines[i])) {
+name = g_strdup_printf("prom-env/%s", machines[i]);
+qtest_add_data_func(name, machines[i], test_machine);
+g_free(name);
+}
 }
 }
 
-- 
2.27.0




[PULL 01/16] qtest/libqos: add a function to initialize secondary PCI buses

2021-12-14 Thread Thomas Huth
From: Laurent Vivier 

Scan the PCI devices to find bridge and set PCI_SECONDARY_BUS and
PCI_SUBORDINATE_BUS (algorithm from seabios)

Signed-off-by: Laurent Vivier 
Acked-by: Thomas Huth 
Message-Id: <20211208130350.10178-2-lviv...@redhat.com>
Signed-off-by: Thomas Huth 
---
 include/hw/pci/pci_bridge.h |   8 +++
 tests/qtest/libqos/pci.c| 119 
 tests/qtest/libqos/pci.h|   1 +
 3 files changed, 128 insertions(+)

diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
index a94d350034..30691a6e57 100644
--- a/include/hw/pci/pci_bridge.h
+++ b/include/hw/pci/pci_bridge.h
@@ -138,6 +138,7 @@ typedef struct PCIBridgeQemuCap {
 uint64_t mem_pref_64; /* Prefetchable memory to reserve (64-bit MMIO) */
 } PCIBridgeQemuCap;
 
+#define REDHAT_PCI_CAP_TYPE_OFFSET  3
 #define REDHAT_PCI_CAP_RESOURCE_RESERVE 1
 
 /*
@@ -152,6 +153,13 @@ typedef struct PCIResReserve {
 uint64_t mem_pref_64;
 } PCIResReserve;
 
+#define REDHAT_PCI_CAP_RES_RESERVE_BUS_RES 4
+#define REDHAT_PCI_CAP_RES_RESERVE_IO  8
+#define REDHAT_PCI_CAP_RES_RESERVE_MEM 16
+#define REDHAT_PCI_CAP_RES_RESERVE_PREF_MEM_32 20
+#define REDHAT_PCI_CAP_RES_RESERVE_PREF_MEM_64 24
+#define REDHAT_PCI_CAP_RES_RESERVE_CAP_SIZE32
+
 int pci_bridge_qemu_reserve_cap_init(PCIDevice *dev, int cap_offset,
PCIResReserve res_reserve, Error **errp);
 
diff --git a/tests/qtest/libqos/pci.c b/tests/qtest/libqos/pci.c
index e1e96189c8..3a9076ae58 100644
--- a/tests/qtest/libqos/pci.c
+++ b/tests/qtest/libqos/pci.c
@@ -13,6 +13,8 @@
 #include "qemu/osdep.h"
 #include "pci.h"
 
+#include "hw/pci/pci.h"
+#include "hw/pci/pci_bridge.h"
 #include "hw/pci/pci_regs.h"
 #include "qemu/host-utils.h"
 #include "qgraph.h"
@@ -99,6 +101,123 @@ void qpci_device_init(QPCIDevice *dev, QPCIBus *bus, 
QPCIAddress *addr)
 g_assert(!addr->device_id || device_id == addr->device_id);
 }
 
+static uint8_t qpci_find_resource_reserve_capability(QPCIDevice *dev)
+{
+uint16_t device_id;
+uint8_t cap = 0;
+
+if (qpci_config_readw(dev, PCI_VENDOR_ID) != PCI_VENDOR_ID_REDHAT) {
+return 0;
+}
+
+device_id = qpci_config_readw(dev, PCI_DEVICE_ID);
+
+if (device_id != PCI_DEVICE_ID_REDHAT_PCIE_RP &&
+device_id != PCI_DEVICE_ID_REDHAT_BRIDGE) {
+return 0;
+}
+
+do {
+cap = qpci_find_capability(dev, PCI_CAP_ID_VNDR, cap);
+} while (cap &&
+ qpci_config_readb(dev, cap + REDHAT_PCI_CAP_TYPE_OFFSET) !=
+ REDHAT_PCI_CAP_RESOURCE_RESERVE);
+if (cap) {
+uint8_t cap_len = qpci_config_readb(dev, cap + PCI_CAP_FLAGS);
+if (cap_len < REDHAT_PCI_CAP_RES_RESERVE_CAP_SIZE) {
+return 0;
+}
+}
+return cap;
+}
+
+static void qpci_secondary_buses_rec(QPCIBus *qbus, int bus, int *pci_bus)
+{
+QPCIDevice *dev;
+uint16_t class;
+uint8_t pribus, secbus, subbus;
+int index;
+
+for (index = 0; index < 32; index++) {
+dev = qpci_device_find(qbus, QPCI_DEVFN(bus + index, 0));
+if (dev == NULL) {
+continue;
+}
+class = qpci_config_readw(dev, PCI_CLASS_DEVICE);
+if (class == PCI_CLASS_BRIDGE_PCI) {
+qpci_config_writeb(dev, PCI_SECONDARY_BUS, 255);
+qpci_config_writeb(dev, PCI_SUBORDINATE_BUS, 0);
+}
+g_free(dev);
+}
+
+for (index = 0; index < 32; index++) {
+dev = qpci_device_find(qbus, QPCI_DEVFN(bus + index, 0));
+if (dev == NULL) {
+continue;
+}
+class = qpci_config_readw(dev, PCI_CLASS_DEVICE);
+if (class != PCI_CLASS_BRIDGE_PCI) {
+g_free(dev);
+continue;
+}
+
+pribus = qpci_config_readb(dev, PCI_PRIMARY_BUS);
+if (pribus != bus) {
+qpci_config_writeb(dev, PCI_PRIMARY_BUS, bus);
+}
+
+secbus = qpci_config_readb(dev, PCI_SECONDARY_BUS);
+(*pci_bus)++;
+if (*pci_bus != secbus) {
+secbus = *pci_bus;
+qpci_config_writeb(dev, PCI_SECONDARY_BUS, secbus);
+}
+
+subbus = qpci_config_readb(dev, PCI_SUBORDINATE_BUS);
+qpci_config_writeb(dev, PCI_SUBORDINATE_BUS, 255);
+
+qpci_secondary_buses_rec(qbus, secbus << 5, pci_bus);
+
+if (subbus != *pci_bus) {
+uint8_t res_bus = *pci_bus;
+uint8_t cap = qpci_find_resource_reserve_capability(dev);
+
+if (cap) {
+uint32_t tmp_res_bus;
+
+tmp_res_bus = qpci_config_readl(dev, cap +
+
REDHAT_PCI_CAP_RES_RESERVE_BUS_RES);
+if (tmp_res_bus != (uint32_t)-1) {
+res_bus = tmp_res_bus & 0xFF;
+if ((uint8_t)(res_bus + secbus) < secbus ||
+(uint8_t)(res_bus + secbus) < res_bus) {
+res_bus = 0;
+

[PULL 11/16] gitlab-ci: Add cirrus-ci based tests for NetBSD and OpenBSD

2021-12-14 Thread Thomas Huth
Cirrus-CI provides KVM in their Linux containers, so we can also run
our VM-based NetBSD and OpenBSD build jobs there.
Since the VM installation might take a while, we only run the "help"
target on the first invocation to avoid timeouts, and then only check
the build during the next run, once the base image has been cached.
For the the build tests, we also only use very a limited set of target
CPUs since compiling in these VMs is not very fast (especially the
build on OpenBSD seems to be incredibly slow).

The jobs are marked as "manual" only, since this double-indirect setup
(with the cirrus-run script and VMs in the Cirrus-CI containers) might
fail more often than the other jobs, and since we can trigger a limited
amount of Cirrus-CI jobs at a time anyway (due to the restrictions in
the free tier of Cirrus). Thus these jobs are rather added as convenience
for contributors who would like to run the NetBSD/OpenBSD tests without
the need of downloading and installing the corresponding VM images on
their local machines.

Message-Id: <20211209103124.121942-1-th...@redhat.com>
Signed-off-by: Thomas Huth 
---
 .gitlab-ci.d/cirrus.yml   | 35 +++
 .gitlab-ci.d/cirrus/kvm-build.yml | 31 +++
 2 files changed, 66 insertions(+)
 create mode 100644 .gitlab-ci.d/cirrus/kvm-build.yml

diff --git a/.gitlab-ci.d/cirrus.yml b/.gitlab-ci.d/cirrus.yml
index d273a9e713..19e6c21401 100644
--- a/.gitlab-ci.d/cirrus.yml
+++ b/.gitlab-ci.d/cirrus.yml
@@ -89,3 +89,38 @@ x64-macos-11-base-build:
 PATH_EXTRA: /usr/local/opt/ccache/libexec:/usr/local/opt/gettext/bin
 PKG_CONFIG_PATH: 
/usr/local/opt/curl/lib/pkgconfig:/usr/local/opt/ncurses/lib/pkgconfig:/usr/local/opt/readline/lib/pkgconfig
 TEST_TARGETS: check-unit check-block check-qapi-schema check-softfloat 
check-qtest-x86_64
+
+
+# The following jobs run VM-based tests via KVM on a Linux-based Cirrus-CI job
+.cirrus_kvm_job:
+  stage: build
+  image: registry.gitlab.com/libvirt/libvirt-ci/cirrus-run:master
+  needs: []
+  timeout: 80m
+  allow_failure: true
+  script:
+- sed -e "s|[@]CI_REPOSITORY_URL@|$CI_REPOSITORY_URL|g"
+  -e "s|[@]CI_COMMIT_REF_NAME@|$CI_COMMIT_REF_NAME|g"
+  -e "s|[@]CI_COMMIT_SHA@|$CI_COMMIT_SHA|g"
+  -e "s|[@]NAME@|$NAME|g"
+  -e "s|[@]CONFIGURE_ARGS@|$CONFIGURE_ARGS|g"
+  -e "s|[@]TEST_TARGETS@|$TEST_TARGETS|g"
+  <.gitlab-ci.d/cirrus/kvm-build.yml >.gitlab-ci.d/cirrus/$NAME.yml
+- cat .gitlab-ci.d/cirrus/$NAME.yml
+- cirrus-run -v --show-build-log always .gitlab-ci.d/cirrus/$NAME.yml
+  rules:
+- when: manual
+
+x86-netbsd:
+  extends: .cirrus_kvm_job
+  variables:
+NAME: netbsd
+CONFIGURE_ARGS: --target-list=x86_64-softmmu,ppc64-softmmu,aarch64-softmmu
+TEST_TARGETS: check
+
+x86-openbsd:
+  extends: .cirrus_kvm_job
+  variables:
+NAME: openbsd
+CONFIGURE_ARGS: --target-list=i386-softmmu,riscv64-softmmu,mips64-softmmu
+TEST_TARGETS: check
diff --git a/.gitlab-ci.d/cirrus/kvm-build.yml 
b/.gitlab-ci.d/cirrus/kvm-build.yml
new file mode 100644
index 00..4334fabf39
--- /dev/null
+++ b/.gitlab-ci.d/cirrus/kvm-build.yml
@@ -0,0 +1,31 @@
+container:
+  image: fedora:35
+  cpu: 4
+  memory: 8Gb
+  kvm: true
+
+env:
+  CIRRUS_CLONE_DEPTH: 1
+  CI_REPOSITORY_URL: "@CI_REPOSITORY_URL@"
+  CI_COMMIT_REF_NAME: "@CI_COMMIT_REF_NAME@"
+  CI_COMMIT_SHA: "@CI_COMMIT_SHA@"
+
+@NAME@_task:
+  @NAME@_vm_cache:
+folder: $HOME/.cache/qemu-vm
+  install_script:
+- dnf update -y
+- dnf install -y git make openssh-clients qemu-img qemu-system-x86 wget
+  clone_script:
+- git clone --depth 100 "$CI_REPOSITORY_URL" .
+- git fetch origin "$CI_COMMIT_REF_NAME"
+- git reset --hard "$CI_COMMIT_SHA"
+  build_script:
+- if [ -f $HOME/.cache/qemu-vm/images/@NAME@.img ]; then
+make vm-build-@NAME@ J=$(getconf _NPROCESSORS_ONLN)
+  EXTRA_CONFIGURE_OPTS="@CONFIGURE_ARGS@"
+  BUILD_TARGET="@TEST_TARGETS@" ;
+  else
+make vm-build-@NAME@ J=$(getconf _NPROCESSORS_ONLN) BUILD_TARGET=help
+  EXTRA_CONFIGURE_OPTS="--disable-system --disable-user 
--disable-tools" ;
+  fi
-- 
2.27.0




[PULL 10/16] gitlab-ci.d/buildtest: Add jobs that run the device-crash-test

2021-12-14 Thread Thomas Huth
The device-crash-test script has been quite neglected in the past,
so that it bit-rot quite often. Let's add CI jobs that run this
script for at least some targets, so that this script does not
regress that easily anymore.

Message-Id: <20211126162724.1162049-1-th...@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Thomas Huth 
---
 .gitlab-ci.d/buildtest.yml | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml
index 71d0f407ad..7e1cb0b3c2 100644
--- a/.gitlab-ci.d/buildtest.yml
+++ b/.gitlab-ci.d/buildtest.yml
@@ -100,6 +100,17 @@ avocado-system-debian:
 IMAGE: debian-amd64
 MAKE_CHECK_ARGS: check-avocado
 
+crash-test-debian:
+  extends: .native_test_job_template
+  needs:
+- job: build-system-debian
+  artifacts: true
+  variables:
+IMAGE: debian-amd64
+  script:
+- cd build
+- scripts/device-crash-test -q ./qemu-system-i386
+
 build-system-fedora:
   extends: .native_build_job_template
   needs:
@@ -134,6 +145,18 @@ avocado-system-fedora:
 IMAGE: fedora
 MAKE_CHECK_ARGS: check-avocado
 
+crash-test-fedora:
+  extends: .native_test_job_template
+  needs:
+- job: build-system-fedora
+  artifacts: true
+  variables:
+IMAGE: fedora
+  script:
+- cd build
+- scripts/device-crash-test -q ./qemu-system-ppc
+- scripts/device-crash-test -q ./qemu-system-riscv32
+
 build-system-centos:
   extends: .native_build_job_template
   needs:
-- 
2.27.0




[PULL 04/16] tests/libqtest: add a migration test with two couples of failover devices

2021-12-14 Thread Thomas Huth
From: Laurent Vivier 

Signed-off-by: Laurent Vivier 
Acked-by: Thomas Huth 
Message-Id: <20211208130350.10178-5-lviv...@redhat.com>
Signed-off-by: Thomas Huth 
---
 tests/qtest/virtio-net-failover.c | 282 ++
 1 file changed, 282 insertions(+)

diff --git a/tests/qtest/virtio-net-failover.c 
b/tests/qtest/virtio-net-failover.c
index e998a546b0..4b2ba8a106 100644
--- a/tests/qtest/virtio-net-failover.c
+++ b/tests/qtest/virtio-net-failover.c
@@ -20,6 +20,7 @@
 
 #define ACPI_PCIHP_ADDR_ICH90x0cc0
 #define PCI_EJ_BASE 0x0008
+#define PCI_SEL_BASE0x0010
 
 #define BASE_MACHINE "-M q35 -nodefaults " \
 "-device pcie-root-port,id=root0,addr=0x1,bus=pcie.0,chassis=1 " \
@@ -27,6 +28,8 @@
 
 #define MAC_PRIMARY0 "52:54:00:11:11:11"
 #define MAC_STANDBY0 "52:54:00:22:22:22"
+#define MAC_PRIMARY1 "52:54:00:33:33:33"
+#define MAC_STANDBY1 "52:54:00:44:44:44"
 
 static QGuestAllocator guest_malloc;
 static QPCIBus *pcibus;
@@ -1026,6 +1029,281 @@ static void test_migrate_abort_timeout(gconstpointer 
opaque)
 machine_stop(qts);
 }
 
+static void test_multi_out(gconstpointer opaque)
+{
+QTestState *qts;
+QDict *resp, *args, *ret;
+g_autofree gchar *uri = g_strdup_printf("exec: cat > %s", (gchar *)opaque);
+const gchar *status, *expected;
+QVirtioPCIDevice *vdev0, *vdev1;
+
+qts = machine_start(BASE_MACHINE
+"-device pcie-root-port,id=root2,addr=0x3,bus=pcie.0,chassis=3 
"
+"-device pcie-root-port,id=root3,addr=0x4,bus=pcie.0,chassis=4 
"
+"-netdev user,id=hs0 "
+"-netdev user,id=hs1 "
+"-netdev user,id=hs2 "
+"-netdev user,id=hs3 ",
+4);
+
+check_one_card(qts, false, "standby0", MAC_STANDBY0);
+check_one_card(qts, false, "primary0", MAC_PRIMARY0);
+check_one_card(qts, false, "standby1", MAC_STANDBY1);
+check_one_card(qts, false, "primary1", MAC_PRIMARY1);
+
+qtest_qmp_device_add(qts, "virtio-net", "standby0",
+ "{'bus': 'root0',"
+ "'failover': 'on',"
+ "'netdev': 'hs0',"
+ "'mac': '"MAC_STANDBY0"'}");
+
+check_one_card(qts, true, "standby0", MAC_STANDBY0);
+check_one_card(qts, false, "primary0", MAC_PRIMARY0);
+check_one_card(qts, false, "standby1", MAC_STANDBY1);
+check_one_card(qts, false, "primary1", MAC_PRIMARY1);
+
+qtest_qmp_device_add(qts, "virtio-net", "primary0",
+ "{'bus': 'root1',"
+ "'failover_pair_id': 'standby0',"
+ "'netdev': 'hs1',"
+ "'rombar': 0,"
+ "'romfile': '',"
+ "'mac': '"MAC_PRIMARY0"'}");
+
+check_one_card(qts, true, "standby0", MAC_STANDBY0);
+check_one_card(qts, false, "primary0", MAC_PRIMARY0);
+check_one_card(qts, false, "standby1", MAC_STANDBY1);
+check_one_card(qts, false, "primary1", MAC_PRIMARY1);
+
+vdev0 = start_virtio_net(qts, 1, 0, "standby0");
+
+check_one_card(qts, true, "standby0", MAC_STANDBY0);
+check_one_card(qts, true, "primary0", MAC_PRIMARY0);
+check_one_card(qts, false, "standby1", MAC_STANDBY1);
+check_one_card(qts, false, "primary1", MAC_PRIMARY1);
+
+qtest_qmp_device_add(qts, "virtio-net", "standby1",
+ "{'bus': 'root2',"
+ "'failover': 'on',"
+ "'netdev': 'hs2',"
+ "'mac': '"MAC_STANDBY1"'}");
+
+check_one_card(qts, true, "standby0", MAC_STANDBY0);
+check_one_card(qts, true, "primary0", MAC_PRIMARY0);
+check_one_card(qts, true, "standby1", MAC_STANDBY1);
+check_one_card(qts, false, "primary1", MAC_PRIMARY1);
+
+qtest_qmp_device_add(qts, "virtio-net", "primary1",
+ "{'bus': 'root3',"
+ "'failover_pair_id': 'standby1',"
+ "'netdev': 'hs3',"
+ "'rombar': 0,"
+ "'romfile': '',"
+ "'mac': '"MAC_PRIMARY1"'}");
+
+check_one_card(qts, true, "standby0", MAC_STANDBY0);
+check_one_card(qts, true, "primary0", MAC_PRIMARY0);
+check_one_card(qts, true, "standby1", MAC_STANDBY1);
+check_one_card(qts, false, "primary1", MAC_PRIMARY1);
+
+vdev1 = start_virtio_net(qts, 3, 0, "standby1");
+
+check_one_card(qts, true, "standby0", MAC_STANDBY0);
+check_one_card(qts, true, "primary0", MAC_PRIMARY0);
+check_one_card(qts, true, "standby1", MAC_STANDBY1);
+check_one_card(qts, true, "primary1", MAC_PRIMARY1);
+
+args = qdict_from_jsonf_nofail("{}");
+g_assert_nonnull(args);
+qdict_put_str(args, "uri", uri);
+
+resp = qtest_qmp(qts, "{ 'execute': 'migrate', 'arguments': %p}", args);
+g_assert(qdict_haskey(resp, "return"));
+qobject_unref(resp);
+
+/* the event is 

[PULL 03/16] tests/libqtest: add some virtio-net failover migration cancelling tests

2021-12-14 Thread Thomas Huth
From: Laurent Vivier 

Add some tests to check the state of the machine if the migration
is cancelled while we are using virtio-net failover.

Signed-off-by: Laurent Vivier 
Acked-by: Thomas Huth 
Message-Id: <20211208130350.10178-4-lviv...@redhat.com>
Signed-off-by: Thomas Huth 
---
 tests/qtest/virtio-net-failover.c | 282 ++
 1 file changed, 282 insertions(+)

diff --git a/tests/qtest/virtio-net-failover.c 
b/tests/qtest/virtio-net-failover.c
index fd7821deaf..e998a546b0 100644
--- a/tests/qtest/virtio-net-failover.c
+++ b/tests/qtest/virtio-net-failover.c
@@ -752,6 +752,280 @@ static void test_migrate_in(gconstpointer opaque)
 machine_stop(qts);
 }
 
+static void test_migrate_abort_wait_unplug(gconstpointer opaque)
+{
+QTestState *qts;
+QDict *resp, *args, *ret;
+g_autofree gchar *uri = g_strdup_printf("exec: cat > %s", (gchar *)opaque);
+const gchar *status;
+QVirtioPCIDevice *vdev;
+
+qts = machine_start(BASE_MACHINE
+ "-netdev user,id=hs0 "
+ "-netdev user,id=hs1 ",
+ 2);
+
+check_one_card(qts, false, "standby0", MAC_STANDBY0);
+check_one_card(qts, false, "primary0", MAC_PRIMARY0);
+
+qtest_qmp_device_add(qts, "virtio-net", "standby0",
+ "{'bus': 'root0',"
+ "'failover': 'on',"
+ "'netdev': 'hs0',"
+ "'mac': '"MAC_STANDBY0"'}");
+
+check_one_card(qts, true, "standby0", MAC_STANDBY0);
+check_one_card(qts, false, "primary0", MAC_PRIMARY0);
+
+vdev = start_virtio_net(qts, 1, 0, "standby0");
+
+check_one_card(qts, true, "standby0", MAC_STANDBY0);
+check_one_card(qts, false, "primary0", MAC_PRIMARY0);
+
+qtest_qmp_device_add(qts, "virtio-net", "primary0",
+ "{'bus': 'root1',"
+ "'failover_pair_id': 'standby0',"
+ "'netdev': 'hs1',"
+ "'rombar': 0,"
+ "'romfile': '',"
+ "'mac': '"MAC_PRIMARY0"'}");
+
+check_one_card(qts, true, "standby0", MAC_STANDBY0);
+check_one_card(qts, true, "primary0", MAC_PRIMARY0);
+
+args = qdict_from_jsonf_nofail("{}");
+g_assert_nonnull(args);
+qdict_put_str(args, "uri", uri);
+
+resp = qtest_qmp(qts, "{ 'execute': 'migrate', 'arguments': %p}", args);
+g_assert(qdict_haskey(resp, "return"));
+qobject_unref(resp);
+
+/* the event is sent when QEMU asks the OS to unplug the card */
+resp = get_unplug_primary_event(qts);
+g_assert_cmpstr(qdict_get_str(resp, "device-id"), ==, "primary0");
+qobject_unref(resp);
+
+resp = qtest_qmp(qts, "{ 'execute': 'migrate_cancel' }");
+g_assert(qdict_haskey(resp, "return"));
+qobject_unref(resp);
+
+/* migration has been cancelled while the unplug was in progress */
+
+/* while the card is not ejected, we must be in "cancelling" state */
+ret = migrate_status(qts);
+
+status = qdict_get_str(ret, "status");
+g_assert_cmpstr(status, ==, "cancelling");
+qobject_unref(ret);
+
+/* OS unplugs the cards, QEMU can move from wait-unplug state */
+qtest_outl(qts, ACPI_PCIHP_ADDR_ICH9 + PCI_EJ_BASE, 1);
+
+while (true) {
+ret = migrate_status(qts);
+
+status = qdict_get_str(ret, "status");
+if (strcmp(status, "cancelled") == 0) {
+qobject_unref(ret);
+break;
+}
+g_assert_cmpstr(status, !=, "failed");
+g_assert_cmpstr(status, !=, "active");
+qobject_unref(ret);
+}
+
+check_one_card(qts, true, "standby0", MAC_STANDBY0);
+check_one_card(qts, true, "primary0", MAC_PRIMARY0);
+
+qos_object_destroy((QOSGraphObject *)vdev);
+machine_stop(qts);
+}
+
+static void test_migrate_abort_active(gconstpointer opaque)
+{
+QTestState *qts;
+QDict *resp, *args, *ret;
+g_autofree gchar *uri = g_strdup_printf("exec: cat > %s", (gchar *)opaque);
+const gchar *status;
+QVirtioPCIDevice *vdev;
+
+qts = machine_start(BASE_MACHINE
+ "-netdev user,id=hs0 "
+ "-netdev user,id=hs1 ",
+ 2);
+
+check_one_card(qts, false, "standby0", MAC_STANDBY0);
+check_one_card(qts, false, "primary0", MAC_PRIMARY0);
+
+qtest_qmp_device_add(qts, "virtio-net", "standby0",
+ "{'bus': 'root0',"
+ "'failover': 'on',"
+ "'netdev': 'hs0',"
+ "'mac': '"MAC_STANDBY0"'}");
+
+check_one_card(qts, true, "standby0", MAC_STANDBY0);
+check_one_card(qts, false, "primary0", MAC_PRIMARY0);
+
+vdev = start_virtio_net(qts, 1, 0, "standby0");
+
+check_one_card(qts, true, "standby0", MAC_STANDBY0);
+check_one_card(qts, false, "primary0", MAC_PRIMARY0);
+
+qtest_qmp_device_add(qts, "virtio-net", "primary0",
+ 

[PULL 09/16] Move the libssh setup from configure to meson.build

2021-12-14 Thread Thomas Huth
It's easier to do this in meson.build now.

Message-Id: <20211209144801.148388-1-th...@redhat.com>
Acked-by: Richard W.M. Jones 
Signed-off-by: Thomas Huth 
---
 configure | 27 ---
 meson.build   | 13 +
 meson_options.txt |  2 ++
 scripts/meson-buildoptions.sh |  3 +++
 4 files changed, 14 insertions(+), 31 deletions(-)

diff --git a/configure b/configure
index 48c21775f3..bb99a40ed0 100755
--- a/configure
+++ b/configure
@@ -344,7 +344,6 @@ debug_stack_usage="no"
 crypto_afalg="no"
 tls_priority="NORMAL"
 tpm="$default_feature"
-libssh="$default_feature"
 live_block_migration=${default_feature:-yes}
 numa="$default_feature"
 replication=${default_feature:-yes}
@@ -1078,10 +1077,6 @@ for opt do
   ;;
   --enable-tpm) tpm="yes"
   ;;
-  --disable-libssh) libssh="no"
-  ;;
-  --enable-libssh) libssh="yes"
-  ;;
   --disable-live-block-migration) live_block_migration="no"
   ;;
   --enable-live-block-migration) live_block_migration="yes"
@@ -1448,7 +1443,6 @@ cat << EOF
   live-block-migration   Block migration in the main migration stream
   coroutine-pool  coroutine freelist (better performance)
   tpm TPM support
-  libssh  ssh block device support
   numalibnuma support
   avx2AVX2 optimization support
   avx512f AVX512F optimization support
@@ -2561,21 +2555,6 @@ if test "$modules" = yes; then
 fi
 fi
 
-##
-# libssh probe
-if test "$libssh" != "no" ; then
-  if $pkg_config --exists "libssh >= 0.8.7"; then
-libssh_cflags=$($pkg_config libssh --cflags)
-libssh_libs=$($pkg_config libssh --libs)
-libssh=yes
-  else
-if test "$libssh" = "yes" ; then
-  error_exit "libssh required for --enable-libssh"
-fi
-libssh=no
-  fi
-fi
-
 ##
 # TPM emulation is only on POSIX
 
@@ -3636,12 +3615,6 @@ if test "$cmpxchg128" = "yes" ; then
   echo "CONFIG_CMPXCHG128=y" >> $config_host_mak
 fi
 
-if test "$libssh" = "yes" ; then
-  echo "CONFIG_LIBSSH=y" >> $config_host_mak
-  echo "LIBSSH_CFLAGS=$libssh_cflags" >> $config_host_mak
-  echo "LIBSSH_LIBS=$libssh_libs" >> $config_host_mak
-fi
-
 if test "$live_block_migration" = "yes" ; then
   echo "CONFIG_LIVE_BLOCK_MIGRATION=y" >> $config_host_mak
 fi
diff --git a/meson.build b/meson.build
index 96de1a6ef9..ae67ca28ab 100644
--- a/meson.build
+++ b/meson.build
@@ -874,11 +874,15 @@ if not get_option('glusterfs').auto() or have_block
 ''', dependencies: glusterfs)
   endif
 endif
+
 libssh = not_found
-if 'CONFIG_LIBSSH' in config_host
-  libssh = declare_dependency(compile_args: 
config_host['LIBSSH_CFLAGS'].split(),
-  link_args: config_host['LIBSSH_LIBS'].split())
+if not get_option('libssh').auto() or have_block
+  libssh = dependency('libssh', version: '>=0.8.7',
+method: 'pkg-config',
+required: get_option('libssh'),
+kwargs: static_kwargs)
 endif
+
 libbzip2 = not_found
 if not get_option('bzip2').auto() or have_block
   libbzip2 = cc.find_library('bz2', has_headers: ['bzlib.h'],
@@ -1451,6 +1455,7 @@ config_host_data.set('CONFIG_EBPF', libbpf.found())
 config_host_data.set('CONFIG_LIBDAXCTL', libdaxctl.found())
 config_host_data.set('CONFIG_LIBISCSI', libiscsi.found())
 config_host_data.set('CONFIG_LIBNFS', libnfs.found())
+config_host_data.set('CONFIG_LIBSSH', libssh.found())
 config_host_data.set('CONFIG_LINUX_AIO', libaio.found())
 config_host_data.set('CONFIG_LINUX_IO_URING', linux_io_uring.found())
 config_host_data.set('CONFIG_LIBPMEM', libpmem.found())
@@ -3430,7 +3435,7 @@ endif
 summary_info += {'seccomp support':   seccomp}
 summary_info += {'GlusterFS support': glusterfs}
 summary_info += {'TPM support':   config_host.has_key('CONFIG_TPM')}
-summary_info += {'libssh support':config_host.has_key('CONFIG_LIBSSH')}
+summary_info += {'libssh support':libssh}
 summary_info += {'lzo support':   lzo}
 summary_info += {'snappy support':snappy}
 summary_info += {'bzip2 support': libbzip2}
diff --git a/meson_options.txt b/meson_options.txt
index e392323732..4114bfcaa4 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -105,6 +105,8 @@ option('libdaxctl', type : 'feature', value : 'auto',
description: 'libdaxctl support')
 option('libpmem', type : 'feature', value : 'auto',
description: 'libpmem support')
+option('libssh', type : 'feature', value : 'auto',
+   description: 'ssh block device support')
 option('libudev', type : 'feature', value : 'auto',
description: 'Use libudev to enumerate host devices')
 option('libusb', type : 'feature', value : 'auto',
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 7a17ff4218..ae8f18edc2 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -53,6 +53,7 @@ meson_options_help() 

[PULL 05/16] tests/qtest: Run the PPC 32-bit tests with the 64-bit target binary, too

2021-12-14 Thread Thomas Huth
The ppc64 target is a superset of the 32-bit target, so we should
include the tests here, too. This used to be done in the past already,
but it got lost during the conversion to meson.

Fixes: a2ce7dbd91 ("meson: convert tests/qtest to meson")
Message-Id: <20211201104347.51922-2-th...@redhat.com>
Signed-off-by: Thomas Huth 
---
 tests/qtest/meson.build | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index 975a0f2f5f..36ca175660 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -138,6 +138,7 @@ qtests_ppc = \
   ['boot-order-test', 'prom-env-test', 'boot-serial-test'] \
 
 qtests_ppc64 = \
+  qtests_ppc + \
   (config_all_devices.has_key('CONFIG_PSERIES') ? ['device-plug-test'] : []) + 
  \
   (config_all_devices.has_key('CONFIG_POWERNV') ? ['pnv-xscom-test'] : []) +   
  \
   (config_all_devices.has_key('CONFIG_PSERIES') ? ['rtas-test'] : []) +
  \
-- 
2.27.0




[PULL 00/16] qtest and gitlab-CI improvements

2021-12-14 Thread Thomas Huth
 Hi!

The following changes since commit 76b56fdfc9fa43ec6e5986aee33f108c6c6a511e:

  Merge tag 'block-pull-request' of https://gitlab.com/stefanha/qemu into 
staging (2021-12-14 12:46:18 -0800)

are available in the Git repository at:

  https://gitlab.com/thuth/qemu.git tags/pull-request-2021-12-15

for you to fetch changes up to 7876cba8fc0cab9a4c803a30f427d4b20b95a868:

  gitlab-ci: Test compilation on Windows with MSYS2 (2021-12-15 08:08:59 +0100)


* Add virtio-net failover test
* Make qtests a little bit more flexible with regards to reduced configs
* Move libssh setup from configure to meson.build
* Run device-crash-test in CI
* Add jobs for NetBSD and OpenBSD to the CI
* Test compilation with MSYS2 in the gitlab-ci, too
* Add new virtio-iommu test


Eric Auger (4):
  virtio-iommu: Remove set_config callback
  virtio-iommu: Fix endianness in get_config
  virtio-iommu: Fix the domain_range end
  tests: qtest: Add virtio-iommu test

Laurent Vivier (4):
  qtest/libqos: add a function to initialize secondary PCI buses
  tests/qtest: add some tests for virtio-net failover
  tests/libqtest: add some virtio-net failover migration cancelling tests
  tests/libqtest: add a migration test with two couples of failover devices

Thomas Huth (8):
  tests/qtest: Run the PPC 32-bit tests with the 64-bit target binary, too
  tests/qtest: Fence the tests that need xlnx-zcu102 with 
CONFIG_XLNX_ZYNQMP_ARM
  tests/qtest: Add a function that gets a list with available machine types
  tests/qtest: Add a function to check whether a machine is available
  Move the libssh setup from configure to meson.build
  gitlab-ci.d/buildtest: Add jobs that run the device-crash-test
  gitlab-ci: Add cirrus-ci based tests for NetBSD and OpenBSD
  gitlab-ci: Test compilation on Windows with MSYS2

 .gitlab-ci.d/buildtest.yml|   23 +
 .gitlab-ci.d/cirrus.yml   |   35 +
 .gitlab-ci.d/cirrus/kvm-build.yml |   31 +
 .gitlab-ci.d/qemu-project.yml |1 +
 .gitlab-ci.d/windows.yml  |   98 +++
 configure |   27 -
 hw/virtio/trace-events|3 +-
 hw/virtio/virtio-iommu.c  |   42 +-
 include/hw/pci/pci_bridge.h   |8 +
 meson.build   |   13 +-
 meson_options.txt |2 +
 scripts/meson-buildoptions.sh |3 +
 tests/qtest/boot-serial-test.c|3 +-
 tests/qtest/cdrom-test.c  |8 +-
 tests/qtest/libqos/libqtest.h |8 +
 tests/qtest/libqos/meson.build|1 +
 tests/qtest/libqos/pci.c  |  119 
 tests/qtest/libqos/pci.h  |1 +
 tests/qtest/libqos/virtio-iommu.c |  126 
 tests/qtest/libqos/virtio-iommu.h |   40 ++
 tests/qtest/libqtest.c|   79 ++-
 tests/qtest/meson.build   |9 +-
 tests/qtest/prom-env-test.c   |8 +-
 tests/qtest/virtio-iommu-test.c   |  326 +
 tests/qtest/virtio-net-failover.c | 1352 +
 25 files changed, 2290 insertions(+), 76 deletions(-)
 create mode 100644 .gitlab-ci.d/cirrus/kvm-build.yml
 create mode 100644 .gitlab-ci.d/windows.yml
 create mode 100644 tests/qtest/libqos/virtio-iommu.c
 create mode 100644 tests/qtest/libqos/virtio-iommu.h
 create mode 100644 tests/qtest/virtio-iommu-test.c
 create mode 100644 tests/qtest/virtio-net-failover.c




Re: modify NetdevUserOptions through QMP in QEMU 6 - how?

2021-12-14 Thread Michael S. Tsirkin
On Wed, Dec 15, 2021 at 08:03:50AM +0100, Thomas Huth wrote:
> So if changing netdev parameters on the fly is something that we want, we
> should implement this properly instead indeed, and not via such an
> accidental bug.

How to do it is a separate thing, users don't really care at all.

-- 
MST




Re: modify NetdevUserOptions through QMP in QEMU 6 - how?

2021-12-14 Thread Michael S. Tsirkin
On Wed, Dec 15, 2021 at 07:48:06AM +0100, Markus Armbruster wrote:
> Jason Wang  writes:
> 
> > On Tue, Dec 14, 2021 at 10:53 PM Michael S. Tsirkin  wrote:
> >>
> >> On Mon, Dec 13, 2021 at 09:02:15AM +0100, Thomas Huth wrote:
> >> >  Hi!
> >> >
> >> > On 10/12/2021 18.02, Alexander Sosedkin wrote:
> >> > > With QEMU 5 I could totally issue a QMP netdev_add
> >> > > with the same ID to adjust the NetdevUserOptions I want,
> >> > > such as restrict or hostfwd. No deleting needed,
> >> > > just a netdev_add with what I want changed as a param.
> >> >
> >> > I'm a little bit surprised that this worked, since AFAIK there is no 
> >> > code in
> >> > QEMU to *change* the parameters of a running netdev... likely the code 
> >> > added
> >> > a new netdev with the same ID, replacing the old one?
> >> >
> >> > > With QEMU 6 it started failing, claiming the ID is already used.
> >> > > And if I do netdev_del + netdev_add, I just lose connectivity.
> >> > > What's even stranger, I still see old netdev attached in info network:
> >> > >
> >> > > > netdev_del {'id': 'net0'}
> >> > > {}
> >> > > > human-monitor-command {'command-line': 'info network'}
> >> > > virtio-net-pci.0:
> >> > > index=0,type=nic,model=virtio-net-pci,macaddr=52:54:00:12:34:56
> >> > >   \ net0: index=0,type=user,net=10.0.2.0,restrict=off
> >> >
> >> > I think that's "normal" - there used to be problems in the past that the
> >> > devices (virtio-net-pci in this case) did not like the netdevs to be 
> >> > removed
> >> > on the fly. So the netdevs are kept around until you remove the device, 
> >> > too
> >> > (i.e. issue a device_del for the virtio-net-pci device).
> >> >
> >> > > > netdev_add {'type': 'user', 'id': 'net0', 'restrict': False, 
> >> > > > 'hostfwd': [{'str': 'tcp:127.0.0.1:58239-:22'}]}
> >> > > {}
> >> > > > human-monitor-command {'command-line': 'info network'}
> >> > > unseal: virtio-net-pci.0:
> >> > > index=0,type=nic,model=virtio-net-pci,macaddr=52:54:00:12:34:56
> >> > >   \ net0: index=0,type=user,net=10.0.2.0,restrict=off
> >> > > net0: index=0,type=user,net=10.0.2.0,restrict=off
> >> > >
> >> > > What's the correct QMP command sequence to modify NetdevUserOptions?
> >> >
> >> > AFAIK there is no way to modify running netdevs - you'd have to delete 
> >> > the
> >> > netdev and the device, and then add both again. But I might have missed
> >> > something here, so I CC:-ed some people who might be more familiar with 
> >> > the
> >> > details here.
> >> >
> >> >  Thomas
> >> >
> >> >
> >> > > Please CC me on replies.
> >>
> >>
> >> Wow this really goes to show how wide our feature matrix is.
> >>
> >> Yes it's probably an unintended side effect but yes it
> >> did work it seems, so we really should not just break it
> >> without warning.
> 
> Depends.  See below.
> 
> >> Probably this one:
> >>
> >> commit 831734cce6494032e9233caff4d8442b3a1e7fef
> >> Author: Markus Armbruster 
> >> Date:   Wed Nov 25 11:02:20 2020 +0100
> >>
> >> net: Fix handling of id in netdev_add and netdev_del
> 
>CLI -netdev accumulates in option group "netdev".
> 
>Before commit 08712fcb85 "net: Track netdevs in NetClientState rather
>than QemuOpt", netdev_add added to the option group, and netdev_del
>removed from it, both HMP and QMP.  Thus, every netdev had a
>corresponding QemuOpts in this option group.
> 
>Commit 08712fcb85 dropped this for QMP netdev_add and both netdev_del.
>Now a netdev has a corresponding QemuOpts only when it was created
>with CLI or HMP.  Two issues:
> 
>* QMP and HMP netdev_del can leave QemuOpts behind, breaking HMP
>  netdev_add.  Reproducer:
> 
>$ qemu-system-x86_64 -S -display none -nodefaults -monitor stdio
>QEMU 5.1.92 monitor - type 'help' for more information
>(qemu) netdev_add user,id=net0
>(qemu) info network
>net0: index=0,type=user,net=10.0.2.0,restrict=off
>(qemu) netdev_del net0
>(qemu) info network
>(qemu) netdev_add user,id=net0
>upstream-qemu: Duplicate ID 'net0' for netdev
>Try "help netdev_add" for more information
> 
>  Fix by restoring the QemuOpts deletion in qmp_netdev_del(), but with
>  a guard, because the QemuOpts need not exist.
> 
>* QMP netdev_add loses its "no duplicate ID" check.  Reproducer:
> 
>$ qemu-system-x86_64 -S -display none -qmp stdio
>{"QMP": {"version": {"qemu": {"micro": 92, "minor": 1, "major": 
> 5}, "package": "v5.2.0-rc2-1-g02c1f0142c"}, "capabilities": ["oob"]}}
>{"execute": "qmp_capabilities"}
>{"return": {}}
>{"execute": "netdev_add", "arguments": {"type": "user", 
> "id":"net0"}}
>{"return": {}}
>{"execute": "netdev_add", "arguments": {"type": "user", 
> "id":"net0"}}
>{"return": {}}
> 
>  Fix by adding a duplicate ID check to 

Re: [PATCH v10 2/3] cpu-throttle: implement virtual CPU throttle

2021-12-14 Thread Markus Armbruster
huang...@chinatelecom.cn writes:

> From: Hyman Huang(黄勇) 
>
> Setup a negative feedback system when vCPU thread
> handling KVM_EXIT_DIRTY_RING_FULL exit by introducing
> throttle_us_per_full field in struct CPUState. Sleep
> throttle_us_per_full microseconds to throttle vCPU
> if dirtylimit is enabled.
>
> Start a thread to track current dirty page rates and
> tune the throttle_us_per_full dynamically untill current
> dirty page rate reach the quota.
>
> Introduce the util function in the header for dirtylimit
> implementation.
>
> Signed-off-by: Hyman Huang(黄勇) 

[...]

> diff --git a/qapi/migration.json b/qapi/migration.json
> index bbfd48c..ac5fa56 100644
> --- a/qapi/migration.json
> +++ b/qapi/migration.json
> @@ -1850,6 +1850,25 @@
>  { 'command': 'query-dirty-rate', 'returns': 'DirtyRateInfo' }
>  
>  ##
> +# @DirtyLimitInfo:
> +#
> +# Dirty page rate limit information of virtual CPU.
> +#
> +# @cpu-index: index of virtual CPU.
> +#
> +# @limit-rate: upper limit of dirty page rate for virtual CPU.

If I understand your code correctly, zero means unlimited.  This is
undocumented.

In review of v9, I asked to "make @dirty-rate optional, present means
enable, absent means disable."  Any particular reason for not doing it
that way?

> +#
> +# @current-rate: current dirty page rate for virtual CPU.
> +#
> +# Since: 7.0
> +#
> +##
> +{ 'struct': 'DirtyLimitInfo',
> +  'data': { 'cpu-index': 'int',
> +'limit-rate': 'int64',
> +'current-rate': 'int64' } }
> +
> +##
>  # @snapshot-save:
>  #
>  # Save a VM snapshot

[...]




Re: modify NetdevUserOptions through QMP in QEMU 6 - how?

2021-12-14 Thread Michael S. Tsirkin
On Wed, Dec 15, 2021 at 08:03:50AM +0100, Thomas Huth wrote:
> On 15/12/2021 04.31, Jason Wang wrote:
> > On Tue, Dec 14, 2021 at 10:53 PM Michael S. Tsirkin  wrote:
> > > 
> > > On Mon, Dec 13, 2021 at 09:02:15AM +0100, Thomas Huth wrote:
> > > >   Hi!
> > > > 
> > > > On 10/12/2021 18.02, Alexander Sosedkin wrote:
> > > > > With QEMU 5 I could totally issue a QMP netdev_add
> > > > > with the same ID to adjust the NetdevUserOptions I want,
> > > > > such as restrict or hostfwd. No deleting needed,
> > > > > just a netdev_add with what I want changed as a param.
> > > > 
> > > > I'm a little bit surprised that this worked, since AFAIK there is no 
> > > > code in
> > > > QEMU to *change* the parameters of a running netdev... likely the code 
> > > > added
> > > > a new netdev with the same ID, replacing the old one?
> > > > 
> > > > > With QEMU 6 it started failing, claiming the ID is already used.
> > > > > And if I do netdev_del + netdev_add, I just lose connectivity.
> > > > > What's even stranger, I still see old netdev attached in info network:
> > > > > 
> > > > > > netdev_del {'id': 'net0'}
> > > > > {}
> > > > > > human-monitor-command {'command-line': 'info network'}
> > > > > virtio-net-pci.0:
> > > > > index=0,type=nic,model=virtio-net-pci,macaddr=52:54:00:12:34:56
> > > > >\ net0: index=0,type=user,net=10.0.2.0,restrict=off
> > > > 
> > > > I think that's "normal" - there used to be problems in the past that the
> > > > devices (virtio-net-pci in this case) did not like the netdevs to be 
> > > > removed
> > > > on the fly. So the netdevs are kept around until you remove the device, 
> > > > too
> > > > (i.e. issue a device_del for the virtio-net-pci device).
> > > > 
> > > > > > netdev_add {'type': 'user', 'id': 'net0', 'restrict': False, 
> > > > > > 'hostfwd': [{'str': 'tcp:127.0.0.1:58239-:22'}]}
> > > > > {}
> > > > > > human-monitor-command {'command-line': 'info network'}
> > > > > unseal: virtio-net-pci.0:
> > > > > index=0,type=nic,model=virtio-net-pci,macaddr=52:54:00:12:34:56
> > > > >\ net0: index=0,type=user,net=10.0.2.0,restrict=off
> > > > > net0: index=0,type=user,net=10.0.2.0,restrict=off
> > > > > 
> > > > > What's the correct QMP command sequence to modify NetdevUserOptions?
> > > > 
> > > > AFAIK there is no way to modify running netdevs - you'd have to delete 
> > > > the
> > > > netdev and the device, and then add both again. But I might have missed
> > > > something here, so I CC:-ed some people who might be more familiar with 
> > > > the
> > > > details here.
> > > > 
> > > >   Thomas
> > > > 
> > > > 
> > > > > Please CC me on replies.
> > > 
> > > 
> > > Wow this really goes to show how wide our feature matrix is.
> > > 
> > > Yes it's probably an unintended side effect but yes it
> > > did work it seems, so we really should not just break it
> > > without warning.
> > > 
> > > 
> > > Probably this one:
> > > 
> > > commit 831734cce6494032e9233caff4d8442b3a1e7fef
> > > Author: Markus Armbruster 
> > > Date:   Wed Nov 25 11:02:20 2020 +0100
> > > 
> > >  net: Fix handling of id in netdev_add and netdev_del
> > > 
> > > 
> > > 
> > > Jason, what is your take here?
> > 
> > I might be wrong, but I agree with Thomas. Adding a netdev with the
> > same ID looks wrong, if it works, it looks like a bug.
> 
> It certainly calls for trouble as soon as you try to delete the netdev again
> - does it delete the first (inactive) instance? Does it delete the second
> active one? Does it delete both? (Otherwise it will leave a dangling
> instance behind) ...
> So if changing netdev parameters on the fly is something that we want, we
> should implement this properly instead indeed, and not via such an
> accidental bug.
> 
>  Thomas


Alexander, could you supply a reporoducer so we can check in which
QEMU versions it worked?
If it worked for a long time, then even if it was a result of a bug
it's an accidental ABI and we should not just break it.

-- 
MST




Re: modify NetdevUserOptions through QMP in QEMU 6 - how?

2021-12-14 Thread Thomas Huth

On 15/12/2021 04.31, Jason Wang wrote:

On Tue, Dec 14, 2021 at 10:53 PM Michael S. Tsirkin  wrote:


On Mon, Dec 13, 2021 at 09:02:15AM +0100, Thomas Huth wrote:

  Hi!

On 10/12/2021 18.02, Alexander Sosedkin wrote:

With QEMU 5 I could totally issue a QMP netdev_add
with the same ID to adjust the NetdevUserOptions I want,
such as restrict or hostfwd. No deleting needed,
just a netdev_add with what I want changed as a param.


I'm a little bit surprised that this worked, since AFAIK there is no code in
QEMU to *change* the parameters of a running netdev... likely the code added
a new netdev with the same ID, replacing the old one?


With QEMU 6 it started failing, claiming the ID is already used.
And if I do netdev_del + netdev_add, I just lose connectivity.
What's even stranger, I still see old netdev attached in info network:


netdev_del {'id': 'net0'}

{}

human-monitor-command {'command-line': 'info network'}

virtio-net-pci.0:
index=0,type=nic,model=virtio-net-pci,macaddr=52:54:00:12:34:56
   \ net0: index=0,type=user,net=10.0.2.0,restrict=off


I think that's "normal" - there used to be problems in the past that the
devices (virtio-net-pci in this case) did not like the netdevs to be removed
on the fly. So the netdevs are kept around until you remove the device, too
(i.e. issue a device_del for the virtio-net-pci device).


netdev_add {'type': 'user', 'id': 'net0', 'restrict': False, 'hostfwd': 
[{'str': 'tcp:127.0.0.1:58239-:22'}]}

{}

human-monitor-command {'command-line': 'info network'}

unseal: virtio-net-pci.0:
index=0,type=nic,model=virtio-net-pci,macaddr=52:54:00:12:34:56
   \ net0: index=0,type=user,net=10.0.2.0,restrict=off
net0: index=0,type=user,net=10.0.2.0,restrict=off

What's the correct QMP command sequence to modify NetdevUserOptions?


AFAIK there is no way to modify running netdevs - you'd have to delete the
netdev and the device, and then add both again. But I might have missed
something here, so I CC:-ed some people who might be more familiar with the
details here.

  Thomas



Please CC me on replies.



Wow this really goes to show how wide our feature matrix is.

Yes it's probably an unintended side effect but yes it
did work it seems, so we really should not just break it
without warning.


Probably this one:

commit 831734cce6494032e9233caff4d8442b3a1e7fef
Author: Markus Armbruster 
Date:   Wed Nov 25 11:02:20 2020 +0100

 net: Fix handling of id in netdev_add and netdev_del



Jason, what is your take here?


I might be wrong, but I agree with Thomas. Adding a netdev with the
same ID looks wrong, if it works, it looks like a bug. 


It certainly calls for trouble as soon as you try to delete the netdev again 
- does it delete the first (inactive) instance? Does it delete the second 
active one? Does it delete both? (Otherwise it will leave a dangling 
instance behind) ...
So if changing netdev parameters on the fly is something that we want, we 
should implement this properly instead indeed, and not via such an 
accidental bug.


 Thomas




Re: modify NetdevUserOptions through QMP in QEMU 6 - how?

2021-12-14 Thread Markus Armbruster
Jason Wang  writes:

> On Tue, Dec 14, 2021 at 10:53 PM Michael S. Tsirkin  wrote:
>>
>> On Mon, Dec 13, 2021 at 09:02:15AM +0100, Thomas Huth wrote:
>> >  Hi!
>> >
>> > On 10/12/2021 18.02, Alexander Sosedkin wrote:
>> > > With QEMU 5 I could totally issue a QMP netdev_add
>> > > with the same ID to adjust the NetdevUserOptions I want,
>> > > such as restrict or hostfwd. No deleting needed,
>> > > just a netdev_add with what I want changed as a param.
>> >
>> > I'm a little bit surprised that this worked, since AFAIK there is no code 
>> > in
>> > QEMU to *change* the parameters of a running netdev... likely the code 
>> > added
>> > a new netdev with the same ID, replacing the old one?
>> >
>> > > With QEMU 6 it started failing, claiming the ID is already used.
>> > > And if I do netdev_del + netdev_add, I just lose connectivity.
>> > > What's even stranger, I still see old netdev attached in info network:
>> > >
>> > > > netdev_del {'id': 'net0'}
>> > > {}
>> > > > human-monitor-command {'command-line': 'info network'}
>> > > virtio-net-pci.0:
>> > > index=0,type=nic,model=virtio-net-pci,macaddr=52:54:00:12:34:56
>> > >   \ net0: index=0,type=user,net=10.0.2.0,restrict=off
>> >
>> > I think that's "normal" - there used to be problems in the past that the
>> > devices (virtio-net-pci in this case) did not like the netdevs to be 
>> > removed
>> > on the fly. So the netdevs are kept around until you remove the device, too
>> > (i.e. issue a device_del for the virtio-net-pci device).
>> >
>> > > > netdev_add {'type': 'user', 'id': 'net0', 'restrict': False, 
>> > > > 'hostfwd': [{'str': 'tcp:127.0.0.1:58239-:22'}]}
>> > > {}
>> > > > human-monitor-command {'command-line': 'info network'}
>> > > unseal: virtio-net-pci.0:
>> > > index=0,type=nic,model=virtio-net-pci,macaddr=52:54:00:12:34:56
>> > >   \ net0: index=0,type=user,net=10.0.2.0,restrict=off
>> > > net0: index=0,type=user,net=10.0.2.0,restrict=off
>> > >
>> > > What's the correct QMP command sequence to modify NetdevUserOptions?
>> >
>> > AFAIK there is no way to modify running netdevs - you'd have to delete the
>> > netdev and the device, and then add both again. But I might have missed
>> > something here, so I CC:-ed some people who might be more familiar with the
>> > details here.
>> >
>> >  Thomas
>> >
>> >
>> > > Please CC me on replies.
>>
>>
>> Wow this really goes to show how wide our feature matrix is.
>>
>> Yes it's probably an unintended side effect but yes it
>> did work it seems, so we really should not just break it
>> without warning.

Depends.  See below.

>> Probably this one:
>>
>> commit 831734cce6494032e9233caff4d8442b3a1e7fef
>> Author: Markus Armbruster 
>> Date:   Wed Nov 25 11:02:20 2020 +0100
>>
>> net: Fix handling of id in netdev_add and netdev_del

   CLI -netdev accumulates in option group "netdev".

   Before commit 08712fcb85 "net: Track netdevs in NetClientState rather
   than QemuOpt", netdev_add added to the option group, and netdev_del
   removed from it, both HMP and QMP.  Thus, every netdev had a
   corresponding QemuOpts in this option group.

   Commit 08712fcb85 dropped this for QMP netdev_add and both netdev_del.
   Now a netdev has a corresponding QemuOpts only when it was created
   with CLI or HMP.  Two issues:

   * QMP and HMP netdev_del can leave QemuOpts behind, breaking HMP
 netdev_add.  Reproducer:

   $ qemu-system-x86_64 -S -display none -nodefaults -monitor stdio
   QEMU 5.1.92 monitor - type 'help' for more information
   (qemu) netdev_add user,id=net0
   (qemu) info network
   net0: index=0,type=user,net=10.0.2.0,restrict=off
   (qemu) netdev_del net0
   (qemu) info network
   (qemu) netdev_add user,id=net0
   upstream-qemu: Duplicate ID 'net0' for netdev
   Try "help netdev_add" for more information

 Fix by restoring the QemuOpts deletion in qmp_netdev_del(), but with
 a guard, because the QemuOpts need not exist.

   * QMP netdev_add loses its "no duplicate ID" check.  Reproducer:

   $ qemu-system-x86_64 -S -display none -qmp stdio
   {"QMP": {"version": {"qemu": {"micro": 92, "minor": 1, "major": 5}, 
"package": "v5.2.0-rc2-1-g02c1f0142c"}, "capabilities": ["oob"]}}
   {"execute": "qmp_capabilities"}
   {"return": {}}
   {"execute": "netdev_add", "arguments": {"type": "user", "id":"net0"}}
   {"return": {}}
   {"execute": "netdev_add", "arguments": {"type": "user", "id":"net0"}}
   {"return": {}}

 Fix by adding a duplicate ID check to net_client_init1() to replace
 the lost one.  The check is redundant for callers where QemuOpts
 still checks, i.e. for CLI and HMP.

   Reported-by: Andrew Melnichenko 
   Fixes: 08712fcb851034228b61f75bd922863a984a4f60
   Cc: qemu-sta...@nongnu.org
   Signed-off-by: Markus Armbruster 
   Reviewed-by: 

Re: [PATCH 08/12] s390x/pci: don't fence interpreted devices without MSI-X

2021-12-14 Thread Pierre Morel




On 12/7/21 22:04, Matthew Rosato wrote:

Lack of MSI-X support is not an issue for interpreted passthrough
devices, so let's let these in.  This will allow, for example, ISM
devices to be passed through -- but only when interpretation is
available and being used.

Signed-off-by: Matthew Rosato 
---
  hw/s390x/s390-pci-bus.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 451bd32d92..503326210a 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -1096,7 +1096,7 @@ static void s390_pcihost_plug(HotplugHandler 
*hotplug_dev, DeviceState *dev,
  pbdev->interp = false;
  }
  
-if (s390_pci_msix_init(pbdev)) {

+if (s390_pci_msix_init(pbdev) && !pbdev->interp) {
  error_setg(errp, "MSI-X support is mandatory "
 "in the S390 architecture");
  return;



Reviewed-by: Pierre Morel 

--
Pierre Morel
IBM Lab Boeblingen



Re: [PATCH 1/1] pcie: Do not set power state for some hot-plugged devices

2021-12-14 Thread Gerd Hoffmann
On Tue, Dec 14, 2021 at 09:53:12PM +, Annie Li wrote:
> After the PCIe device is hot-plugged, the device's power state is
> initialized as ON. However, the device isn't powered on yet, i.e.
> the PCI_EXP_SYSCTL_PCC bit isn't set to PCI_EXP_SLTCTL_PWR_ON.
> Later on, its power state will set back to OFF due to the non
> PCI_EXP_SLTCTL_PWR_ON state. The device is invisible until
> PCI_EXP_SLTCTL_PWR_ON is set.
> 
> This may be appropriate for general PCIe hot-plug cases. However,
> if the device is hot-plugged when the VM is in VM_STATE_PRELAUNCH
> state, especially the system disk device, the firmware will fail
> to find the system disk. As a result, the guest fails to boot.

Maybe we should just not set DeviceState->hotplugged = true for devices
added in VM_STATE_PRELAUNCH?  It's not actual hotplug (i.e. device added
while the system is running) after all ...

There are lots of places checking DeviceState->hotplugged, and I suspect
we have similar issues elsewhere.

take care,
  Gerd




Re: modify NetdevUserOptions through QMP in QEMU 6 - how?

2021-12-14 Thread Jason Wang
On Tue, Dec 14, 2021 at 10:53 PM Michael S. Tsirkin  wrote:
>
> On Mon, Dec 13, 2021 at 09:02:15AM +0100, Thomas Huth wrote:
> >  Hi!
> >
> > On 10/12/2021 18.02, Alexander Sosedkin wrote:
> > > With QEMU 5 I could totally issue a QMP netdev_add
> > > with the same ID to adjust the NetdevUserOptions I want,
> > > such as restrict or hostfwd. No deleting needed,
> > > just a netdev_add with what I want changed as a param.
> >
> > I'm a little bit surprised that this worked, since AFAIK there is no code in
> > QEMU to *change* the parameters of a running netdev... likely the code added
> > a new netdev with the same ID, replacing the old one?
> >
> > > With QEMU 6 it started failing, claiming the ID is already used.
> > > And if I do netdev_del + netdev_add, I just lose connectivity.
> > > What's even stranger, I still see old netdev attached in info network:
> > >
> > > > netdev_del {'id': 'net0'}
> > > {}
> > > > human-monitor-command {'command-line': 'info network'}
> > > virtio-net-pci.0:
> > > index=0,type=nic,model=virtio-net-pci,macaddr=52:54:00:12:34:56
> > >   \ net0: index=0,type=user,net=10.0.2.0,restrict=off
> >
> > I think that's "normal" - there used to be problems in the past that the
> > devices (virtio-net-pci in this case) did not like the netdevs to be removed
> > on the fly. So the netdevs are kept around until you remove the device, too
> > (i.e. issue a device_del for the virtio-net-pci device).
> >
> > > > netdev_add {'type': 'user', 'id': 'net0', 'restrict': False, 'hostfwd': 
> > > > [{'str': 'tcp:127.0.0.1:58239-:22'}]}
> > > {}
> > > > human-monitor-command {'command-line': 'info network'}
> > > unseal: virtio-net-pci.0:
> > > index=0,type=nic,model=virtio-net-pci,macaddr=52:54:00:12:34:56
> > >   \ net0: index=0,type=user,net=10.0.2.0,restrict=off
> > > net0: index=0,type=user,net=10.0.2.0,restrict=off
> > >
> > > What's the correct QMP command sequence to modify NetdevUserOptions?
> >
> > AFAIK there is no way to modify running netdevs - you'd have to delete the
> > netdev and the device, and then add both again. But I might have missed
> > something here, so I CC:-ed some people who might be more familiar with the
> > details here.
> >
> >  Thomas
> >
> >
> > > Please CC me on replies.
>
>
> Wow this really goes to show how wide our feature matrix is.
>
> Yes it's probably an unintended side effect but yes it
> did work it seems, so we really should not just break it
> without warning.
>
>
> Probably this one:
>
> commit 831734cce6494032e9233caff4d8442b3a1e7fef
> Author: Markus Armbruster 
> Date:   Wed Nov 25 11:02:20 2020 +0100
>
> net: Fix handling of id in netdev_add and netdev_del
>
>
>
> Jason, what is your take here?

I might be wrong, but I agree with Thomas. Adding a netdev with the
same ID looks wrong, if it works, it looks like a bug. And I don't
think we support changing netdev properties.

Thanks

>
>
> Alexander, what happens if we just drop the duplicate ID check? Do
> things work for you again?
> Warning: completely untested.
>
> Signed-off-by: Michael S. Tsirkin 
>
>
> diff --git a/net/net.c b/net/net.c
> index f0d14dbfc1..01f5a187b6 100644
> --- a/net/net.c
> +++ b/net/net.c
> @@ -1055,12 +1055,6 @@ static int net_client_init1(const Netdev *netdev, bool 
> is_netdev, Error **errp)
>  }
>  }
>
> -nc = qemu_find_netdev(netdev->id);
> -if (nc) {
> -error_setg(errp, "Duplicate ID '%s'", netdev->id);
> -return -1;
> -}
> -
>  if (net_client_init_fun[netdev->type](netdev, netdev->id, peer, errp) < 
> 0) {
>  /* FIXME drop when all init functions store an Error */
>  if (errp && !*errp) {
> --
> MST
>




Re: [PATCH v10 06/10] ACPI ERST: build the ACPI ERST table

2021-12-14 Thread Ani Sinha
On Tue, Dec 14, 2021 at 11:42 PM Eric DeVolder  wrote:
>
> Ani, one quick question below.
> eric
>
> On 12/13/21 20:58, Ani Sinha wrote:
> > On Tue, Dec 14, 2021 at 2:57 AM Eric DeVolder  
> > wrote:
> >>
> >> Hi Ani,
> >> inline response below.
> >> Eric
> >>
> >> On 12/12/21 07:43, Ani Sinha wrote:
> >>> .
> >>>
> >>> On Thu, Dec 9, 2021 at 11:28 PM Eric DeVolder  
> >>> wrote:
> 
>  This builds the ACPI ERST table to inform OSPM how to communicate
>  with the acpi-erst device.
> >>>
> >>> This patch starts in the middle of pci device code addition, between
> >>> erst_reg_ops and erst_post_load. I do not like this :(
> >>
> >> Below you suggest moving the contents of this patch to the bottom of 
> >> erst.c.
> >> Before I do that, consider moving the contents to the top of the file 
> >> instead, I believe that would
> >> address the concerns cited here, and it would allow for the last line of 
> >> the file to be the
> >> type_init(), like other files.
> >>
> >> I'll move it, just let me know if top or bottom.
> >
> > Moving to the top is fine.
> I've moved this to the top. The question is if you prefer this be integrated 
> into the main erst.c
> patch, or still separated out?

you can keep this separate no problem. Then you can incorporate mine
and michael's suggestions into it.

> thanks!
> eric
>
> >
> >> Thanks!
> >> eric
> >>
> >>
> >>>
> 
>  Signed-off-by: Eric DeVolder 
>  ---
> hw/acpi/erst.c | 241 
>  +
> 1 file changed, 241 insertions(+)
> 
>  diff --git a/hw/acpi/erst.c b/hw/acpi/erst.c
>  index 81f5435..753425a 100644
>  --- a/hw/acpi/erst.c
>  +++ b/hw/acpi/erst.c
>  @@ -711,6 +711,247 @@ static const MemoryRegionOps erst_reg_ops = {
> .endianness = DEVICE_NATIVE_ENDIAN,
> };
> 
>  +
>  +/***/
>  +/***/
>  +
>  +/* ACPI 4.0: Table 17-19 Serialization Instructions */
>  +#define INST_READ_REGISTER 0x00
>  +#define INST_READ_REGISTER_VALUE   0x01
>  +#define INST_WRITE_REGISTER0x02
>  +#define INST_WRITE_REGISTER_VALUE  0x03
>  +#define INST_NOOP  0x04
>  +#define INST_LOAD_VAR1 0x05
>  +#define INST_LOAD_VAR2 0x06
>  +#define INST_STORE_VAR10x07
>  +#define INST_ADD   0x08
>  +#define INST_SUBTRACT  0x09
>  +#define INST_ADD_VALUE 0x0A
>  +#define INST_SUBTRACT_VALUE0x0B
>  +#define INST_STALL 0x0C
>  +#define INST_STALL_WHILE_TRUE  0x0D
>  +#define INST_SKIP_NEXT_INSTRUCTION_IF_TRUE 0x0E
>  +#define INST_GOTO  0x0F
>  +#define INST_SET_SRC_ADDRESS_BASE  0x10
>  +#define INST_SET_DST_ADDRESS_BASE  0x11
>  +#define INST_MOVE_DATA 0x12
> >>>
> >>> I prefer these definitions to come at the top of the file along with
> >>> other definitions.
> >>>
>  +
>  +/* ACPI 4.0: 17.4.1.2 Serialization Instruction Entries */
>  +static void build_serialization_instruction_entry(GArray *table_data,
> >>>
> >>> This function and buiild_erst() can come at the end of erst.c. They go
> >>> together and are doing a common but different operation from the
> >>> operations of the pci device - building the erst table. Hence, ther
> >>> code should be separate from pci device code. A new file would be an
> >>> overkill at this state IMHO but in the future if erst table generation
> >>> code gains more weight, it can be split into two files.
> >>>
>  +uint8_t serialization_action,
>  +uint8_t instruction,
>  +uint8_t flags,
>  +uint8_t register_bit_width,
>  +uint64_t register_address,
>  +uint64_t value,
>  +uint64_t mask)
>  +{
>  +/* ACPI 4.0: Table 17-18 Serialization Instruction Entry */
>  +struct AcpiGenericAddress gas;
>  +
>  +/* Serialization Action */
>  +build_append_int_noprefix(table_data, serialization_action, 1);
>  +/* Instruction */
>  +build_append_int_noprefix(table_data, instruction , 1);
>  +/* Flags */
>  +build_append_int_noprefix(table_data, flags   , 1);
>  +/* Reserved */
>  +build_append_int_noprefix(table_data, 0   , 1);
>  +/* Register Region */
>  +gas.space_id = AML_SYSTEM_MEMORY;
>  +gas.bit_width = register_bit_width;
>  +gas.bit_offset = 0;
>  +switch (register_bit_width) {
>  +case 8:
>  +gas.access_width = 1;
>  +break;
>  +case 16:
>  + 

Re: [RFC] vhost-vdpa-net: add vhost-vdpa-net host device support

2021-12-14 Thread Jason Wang
On Tue, Dec 14, 2021 at 9:11 PM Stefan Hajnoczi  wrote:
>
> On Tue, Dec 14, 2021 at 10:22:53AM +0800, Jason Wang wrote:
> > On Mon, Dec 13, 2021 at 11:14 PM Stefan Hajnoczi  
> > wrote:
> > >
> > > On Mon, Dec 13, 2021 at 10:47:00AM +0800, Jason Wang wrote:
> > > > On Sun, Dec 12, 2021 at 5:30 PM Michael S. Tsirkin  
> > > > wrote:
> > > > >
> > > > > On Sat, Dec 11, 2021 at 03:00:27AM +, Longpeng (Mike, Cloud 
> > > > > Infrastructure Service Product Dept.) wrote:
> > > > > >
> > > > > >
> > > > > > > -Original Message-
> > > > > > > From: Stefan Hajnoczi [mailto:stefa...@redhat.com]
> > > > > > > Sent: Thursday, December 9, 2021 5:17 PM
> > > > > > > To: Longpeng (Mike, Cloud Infrastructure Service Product Dept.)
> > > > > > > 
> > > > > > > Cc: jasow...@redhat.com; m...@redhat.com; pa...@nvidia.com;
> > > > > > > xieyon...@bytedance.com; sgarz...@redhat.com; Yechuan 
> > > > > > > ;
> > > > > > > Gonglei (Arei) ; qemu-devel@nongnu.org
> > > > > > > Subject: Re: [RFC] vhost-vdpa-net: add vhost-vdpa-net host device 
> > > > > > > support
> > > > > > >
> > > > > > > On Wed, Dec 08, 2021 at 01:20:10PM +0800, Longpeng(Mike) wrote:
> > > > > > > > From: Longpeng 
> > > > > > > >
> > > > > > > > Hi guys,
> > > > > > > >
> > > > > > > > This patch introduces vhost-vdpa-net device, which is inspired
> > > > > > > > by vhost-user-blk and the proposal of vhost-vdpa-blk device [1].
> > > > > > > >
> > > > > > > > I've tested this patch on Huawei's offload card:
> > > > > > > > ./x86_64-softmmu/qemu-system-x86_64 \
> > > > > > > > -device vhost-vdpa-net-pci,vdpa-dev=/dev/vhost-vdpa-0
> > > > > > > >
> > > > > > > > For virtio hardware offloading, the most important requirement 
> > > > > > > > for us
> > > > > > > > is to support live migration between offloading cards from 
> > > > > > > > different
> > > > > > > > vendors, the combination of netdev and virtio-net seems too 
> > > > > > > > heavy, we
> > > > > > > > prefer a lightweight way.
> > > > > > > >
> > > > > > > > Maybe we could support both in the future ? Such as:
> > > > > > > >
> > > > > > > > * Lightweight
> > > > > > > >  Net: vhost-vdpa-net
> > > > > > > >  Storage: vhost-vdpa-blk
> > > > > > > >
> > > > > > > > * Heavy but more powerful
> > > > > > > >  Net: netdev + virtio-net + vhost-vdpa
> > > > > > > >  Storage: bdrv + virtio-blk + vhost-vdpa
> > > > > > > >
> > > > > > > > [1] 
> > > > > > > > https://www.mail-archive.com/qemu-devel@nongnu.org/msg797569.html
> > > > > > >
> > > > > > > Stefano presented a plan for vdpa-blk at KVM Forum 2021:
> > > > > > > https://kvmforum2021.sched.com/event/ke3a/vdpa-blk-unified-hardware-and-sof
> > > > > > > tware-offload-for-virtio-blk-stefano-garzarella-red-hat
> > > > > > >
> > > > > > > It's closer to today's virtio-net + vhost-net approach than the
> > > > > > > vhost-vdpa-blk device you have mentioned. The idea is to treat 
> > > > > > > vDPA as
> > > > > > > an offload feature rather than a completely separate code path 
> > > > > > > that
> > > > > > > needs to be maintained and tested. That way QEMU's block layer 
> > > > > > > features
> > > > > > > and live migration work with vDPA devices and re-use the 
> > > > > > > virtio-blk
> > > > > > > code. The key functionality that has not been implemented yet is 
> > > > > > > a "fast
> > > > > > > path" mechanism that allows the QEMU virtio-blk device's 
> > > > > > > virtqueue to be
> > > > > > > offloaded to vDPA.
> > > > > > >
> > > > > > > The unified vdpa-blk architecture should deliver the same 
> > > > > > > performance
> > > > > > > as the vhost-vdpa-blk device you mentioned but with more 
> > > > > > > features, so I
> > > > > > > wonder what aspects of the vhost-vdpa-blk idea are important to 
> > > > > > > you?
> > > > > > >
> > > > > > > QEMU already has vhost-user-blk, which takes a similar approach 
> > > > > > > as the
> > > > > > > vhost-vdpa-blk device you are proposing. I'm not against the
> > > > > > > vhost-vdpa-blk approach in priciple, but would like to understand 
> > > > > > > your
> > > > > > > requirements and see if there is a way to collaborate on one 
> > > > > > > vdpa-blk
> > > > > > > implementation instead of dividing our efforts between two.
> > > > > > >
> > > > > >
> > > > > > We prefer a simple way in the virtio hardware offloading case, it 
> > > > > > could reduce
> > > > > > our maintenance workload, we no need to maintain the virtio-net, 
> > > > > > netdev,
> > > > > > virtio-blk, bdrv and ... any more. If we need to support other vdpa 
> > > > > > devices
> > > > > > (such as virtio-crypto, virtio-fs) in the future, then we also need 
> > > > > > to maintain
> > > > > > the corresponding device emulation code?
> > > > > >
> > > > > > For the virtio hardware offloading case, we usually use the 
> > > > > > vfio-pci framework,
> > > > > > it saves a lot of our maintenance work in QEMU, we don't need to 
> > > > > > touch the device
> > > > > > types. Inspired by Jason, what we really prefer is 

Re: [PATCH v3 23/23] migration: Use multifd before we check for the zero page

2021-12-14 Thread Peter Xu
On Mon, Dec 13, 2021 at 10:03:53AM +0100, Juan Quintela wrote:
> Peter Xu  wrote:
> > On Thu, Dec 02, 2021 at 06:38:27PM +0100, Juan Quintela wrote:
> >> This needs to be improved to be compatible with old versions.
> >
> > Any plan to let new binary work with old binary?
> 
> Yes, but I was waiting for 7.0 to get out.  Basically I need to do:
> 
> if (old)
> run the old code
> else
> new code
> 
> this needs to be done only in a couple of places, but I need the
> machine_type 7.0 created to put the property there.

OK.  We can also have the tunable be false by default until the new machine
type arrives; then the series won't need to be blocked by the machine type
patch and it'll be only one last patch to be adjusted there.

> 
> > Maybe boost the version field for multifd packet (along with a
> > multifd_version=2 parameter and only set on new machine types)?
> 
> For now, we only need to add a flag for the ZERO_PAGE functionality.  if
> we are on older qemu, just don't test for zero pages.  On reception, we
> can just accept everything, i.e. if there are no zero pages, everything
> is ok.

Do you mean zero detection for multifd=on only?  As otherwise it could regress
old machine types in some very common scenarios, iiuc, e.g. idle guests.

> 
> > PS: We should really have some handshake mechanism between src/dst, I 
> > dreamt it
> > for a long time..  So that we only need to specify the 
> > capability/parameters on
> > src someday and we'll never see incompatible migration failing randomly 
> > because
> > the handshake should guarantee no stupid mistake..  Sad.
> 
> That has been on my ToDo list for too long, just need the time to do
> it.  It would make everything much, much easier.
> 
> >> But  if we don't care about RDMA, why do we care about
> >> control_save_page()?
> >
> > Could anyone help to explain why we don't care?  I still see bugfixes 
> > coming..
> 
> Sentence was inside a context.  We don't care for RDMA while we are on
> multifd.  If multifd ever supports RDMA, it would be a new
> implementation that don't use such hooks.
> 
> IMVHO, RDMA implementation in qemu is quite bad.  For historic reasons,
> they needed to use qemu_file abstraction for comunication, so they are
> dropping directly the ability of doing direct copies of pages.
> So, if one is requiring to mlock all the guest memory on both sides to
> use RDMA, the *right* thing to do from my point of view is just
> "remotely" read the page without any overhead.
> 
> Yes, that requires quite a bit of changes, I was not suggesting that it
> was a trivial task.

I see!

Thanks,

-- 
Peter Xu




Re: [PATCH v5 16/23] hw/riscv: virt: Use AIA INTC compatible string when available

2021-12-14 Thread Kip Walker
On Fri, Dec 10, 2021 at 8:35 PM Anup Patel  wrote:
>
> We should use the AIA INTC compatible string in the CPU INTC
> DT nodes when the CPUs support AIA feature. This will allow
> Linux INTC driver to use AIA local interrupt CSRs.
>
> Signed-off-by: Anup Patel 
> Reviewed-by: Alistair Francis 
> ---
>  hw/riscv/virt.c | 13 +++--
>  1 file changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> index 3af074148e..936156554c 100644
> --- a/hw/riscv/virt.c
> +++ b/hw/riscv/virt.c
> @@ -211,8 +211,17 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, 
> int socket,
>  qemu_fdt_add_subnode(mc->fdt, intc_name);
>  qemu_fdt_setprop_cell(mc->fdt, intc_name, "phandle",
>  intc_phandles[cpu]);
> -qemu_fdt_setprop_string(mc->fdt, intc_name, "compatible",
> -"riscv,cpu-intc");
> +if (riscv_feature(>soc[socket].harts[cpu].env,
> +  RISCV_FEATURE_AIA)) {
> +static const char * const compat[2] = {
> +"riscv,cpu-intc-aia", "riscv,cpu-intc"
> +};
> +qemu_fdt_setprop_string_array(mc->fdt, name, "compatible",
> +  (char **), ARRAY_SIZE(compat));

I think this should be intc_name rather than name.

Kip

> +} else {
> +qemu_fdt_setprop_string(mc->fdt, intc_name, "compatible",
> +"riscv,cpu-intc");
> +}
>  qemu_fdt_setprop(mc->fdt, intc_name, "interrupt-controller", NULL, 
> 0);
>  qemu_fdt_setprop_cell(mc->fdt, intc_name, "#interrupt-cells", 1);
>
> --
> 2.25.1
>
>



[PATCH 08/47] vhost-user: fix duplicated notifier MR init

2021-12-14 Thread Michael Roth
From: Xueming Li 

In case of device resume after suspend, VQ notifier MR still valid.
Duplicated registrations explode memory block list and slow down device
resume.

Fixes: 44866521bd6e ("vhost-user: support registering external host notifiers")
Cc: tiwei@intel.com
Cc: qemu-sta...@nongnu.org
Cc: Yuwei Zhang 

Signed-off-by: Xueming Li 
Message-Id: <20211008080215.590292-1-xuemi...@nvidia.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
(cherry picked from commit a1ed9ef1de87c3e86ff68589604298ec90875a14)
Signed-off-by: Michael Roth 
---
 hw/virtio/vhost-user.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index aec6cc1990..7b35c5d71d 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -1469,8 +1469,9 @@ static int 
vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
 
 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]",
user, queue_idx);
-memory_region_init_ram_device_ptr(>mr, OBJECT(vdev), name,
-  page_size, addr);
+if (!n->mr.ram) /* Don't init again after suspend. */
+memory_region_init_ram_device_ptr(>mr, OBJECT(vdev), name,
+  page_size, addr);
 g_free(name);
 
 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, >mr, true)) {
-- 
2.25.1




[PATCH 05/47] hmp: Unbreak "change vnc"

2021-12-14 Thread Michael Roth
From: Markus Armbruster 

HMP command "change vnc" can take the password as argument, or prompt
for it:

(qemu) change vnc password 123
(qemu) change vnc password
Password: ***
(qemu)

This regressed in commit cfb5387a1d "hmp: remove "change vnc TARGET"
command", v6.0.0.

(qemu) change vnc passwd 123
Password: ***
(qemu) change vnc passwd
(qemu)

The latter passes NULL to qmp_change_vnc_password(), which is a no-no.
Looks like it puts the display into "password required, but none set"
state.

The logic error is easy to miss in review, but testing should've
caught it.

Fix the obvious way.

Fixes: cfb5387a1de2acda23fb5c97d2378b9e7ddf8025
Cc: qemu-sta...@nongnu.org
Signed-off-by: Markus Armbruster 
Reviewed-by: Daniel P. Berrangé 
Reviewed-by: Gerd Hoffmann 
Message-Id: <20210909081219.308065-2-arm...@redhat.com>
Signed-off-by: Laurent Vivier 
(cherry picked from commit 6193344f9337f8b76cd44ce94a32c9900d907d35)
Signed-off-by: Michael Roth 
---
 monitor/hmp-cmds.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index e00255f7ee..a7e197a90b 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -1496,7 +1496,7 @@ void hmp_change(Monitor *mon, const QDict *qdict)
 }
 if (strcmp(target, "passwd") == 0 ||
 strcmp(target, "password") == 0) {
-if (arg) {
+if (!arg) {
 MonitorHMP *hmp_mon = container_of(mon, MonitorHMP, common);
 monitor_read_password(hmp_mon, hmp_change_read_arg, NULL);
 return;
-- 
2.25.1




[PATCH 47/47] e1000: fix tx re-entrancy problem

2021-12-14 Thread Michael Roth
From: Jon Maloy 

The fact that the MMIO handler is not re-entrant causes an infinite
loop under certain conditions:

Guest write to TDT ->  Loopback -> RX (DMA to TDT) -> TX

We now eliminate the effect of this problem locally in e1000, by adding
a boolean in struct E1000State indicating when the TX side is busy. This
will cause any entering new call to return early instead of interfering
with the ongoing work, and eliminates any risk of looping.

This is intended to address CVE-2021-20257.

Signed-off-by: Jon Maloy 
Signed-off-by: Jason Wang 
(cherry picked from commit 25ddb946e6301f42cff3094ea1c25fb78813e7e9)
Signed-off-by: Michael Roth 
---
 hw/net/e1000.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index a30546c5d5..f5bc81296d 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -107,6 +107,7 @@ struct E1000State_st {
 e1000x_txd_props props;
 e1000x_txd_props tso_props;
 uint16_t tso_frames;
+bool busy;
 } tx;
 
 struct {
@@ -763,6 +764,11 @@ start_xmit(E1000State *s)
 return;
 }
 
+if (s->tx.busy) {
+return;
+}
+s->tx.busy = true;
+
 while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
 base = tx_desc_base(s) +
sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
@@ -789,6 +795,7 @@ start_xmit(E1000State *s)
 break;
 }
 }
+s->tx.busy = false;
 set_ics(s, 0, cause);
 }
 
-- 
2.25.1




[PATCH 04/47] qemu-nbd: Change default cache mode to writeback

2021-12-14 Thread Michael Roth
From: Nir Soffer 

Both qemu and qemu-img use writeback cache mode by default, which is
already documented in qemu(1). qemu-nbd uses writethrough cache mode by
default, and the default cache mode is not documented.

According to the qemu-nbd(8):

   --cache=CACHE
  The  cache  mode  to be used with the file.  See the
  documentation of the emulator's -drive cache=... option for
  allowed values.

qemu(1) says:

The default mode is cache=writeback.

So users have no reason to assume that qemu-nbd is using writethough
cache mode. The only hint is the painfully slow writing when using the
defaults.

Looking in git history, it seems that qemu used writethrough in the past
to support broken guests that did not flush data properly, or could not
flush due to limitations in qemu. But qemu-nbd clients can use
NBD_CMD_FLUSH to flush data, so using writethrough does not help anyone.

Change the default cache mode to writback, and document the default and
available values properly in the online help and manual.

With this change converting image via qemu-nbd is 3.5 times faster.

$ qemu-img create dst.img 50g
$ qemu-nbd -t -f raw -k /tmp/nbd.sock dst.img

Before this change:

$ hyperfine -r3 "./qemu-img convert -p -f raw -O raw -T none -W 
fedora34.img nbd+unix:///?socket=/tmp/nbd.sock"
Benchmark #1: ./qemu-img convert -p -f raw -O raw -T none -W fedora34.img 
nbd+unix:///?socket=/tmp/nbd.sock
  Time (mean ± σ): 83.639 s ±  5.970 s[User: 2.733 s, System: 6.112 
s]
  Range (min … max):   76.749 s … 87.245 s3 runs

After this change:

$ hyperfine -r3 "./qemu-img convert -p -f raw -O raw -T none -W 
fedora34.img nbd+unix:///?socket=/tmp/nbd.sock"
Benchmark #1: ./qemu-img convert -p -f raw -O raw -T none -W fedora34.img 
nbd+unix:///?socket=/tmp/nbd.sock
  Time (mean ± σ): 23.522 s ±  0.433 s[User: 2.083 s, System: 5.475 
s]
  Range (min … max):   23.234 s … 24.019 s3 runs

Users can avoid the issue by using --cache=writeback[1] but the defaults
should give good performance for the common use case.

[1] https://bugzilla.redhat.com/1990656

Signed-off-by: Nir Soffer 
Message-Id: <20210813205519.50518-1-nsof...@redhat.com>
Reviewed-by: Eric Blake 
CC: qemu-sta...@nongnu.org
Signed-off-by: Eric Blake 
(cherry picked from commit 09615257058a0ae87b837bb041f56f7312d9ead8)
Signed-off-by: Michael Roth 
---
 docs/tools/qemu-nbd.rst | 6 --
 qemu-nbd.c  | 6 --
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/docs/tools/qemu-nbd.rst b/docs/tools/qemu-nbd.rst
index ee862fa0bc..5643da26e9 100644
--- a/docs/tools/qemu-nbd.rst
+++ b/docs/tools/qemu-nbd.rst
@@ -98,8 +98,10 @@ driver options if ``--image-opts`` is specified.
 
 .. option:: --cache=CACHE
 
-  The cache mode to be used with the file.  See the documentation of
-  the emulator's ``-drive cache=...`` option for allowed values.
+  The cache mode to be used with the file. Valid values are:
+  ``none``, ``writeback`` (the default), ``writethrough``,
+  ``directsync`` and ``unsafe``. See the documentation of
+  the emulator's ``-drive cache=...`` option for more info.
 
 .. option:: -n, --nocache
 
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 26ffbf15af..6c18fcd19a 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -135,7 +135,9 @@ static void usage(const char *name)
 "'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
 "'[ID_OR_NAME]'\n"
 "  -n, --nocache disable host cache\n"
-"  --cache=MODE  set cache mode (none, writeback, ...)\n"
+"  --cache=MODE  set cache mode used to access the disk image, 
the\n"
+"valid options are: 'none', 'writeback' 
(default),\n"
+"'writethrough', 'directsync' and 'unsafe'\n"
 "  --aio=MODEset AIO mode (native, io_uring or threads)\n"
 "  --discard=MODEset discard mode (ignore, unmap)\n"
 "  --detect-zeroes=MODE  set detect-zeroes mode (off, on, unmap)\n"
@@ -552,7 +554,7 @@ int main(int argc, char **argv)
 bool alloc_depth = false;
 const char *tlscredsid = NULL;
 bool imageOpts = false;
-bool writethrough = true;
+bool writethrough = false; /* Client will flush as needed. */
 bool fork_process = false;
 bool list = false;
 int old_stderr = -1;
-- 
2.25.1




[PATCH 06/47] virtio-mem-pci: Fix memory leak when creating MEMORY_DEVICE_SIZE_CHANGE event

2021-12-14 Thread Michael Roth
From: David Hildenbrand 

Apparently, we don't have to duplicate the string.

Fixes: 722a3c783ef4 ("virtio-pci: Send qapi events when the virtio-mem size 
changes")
Cc: qemu-sta...@nongnu.org
Signed-off-by: David Hildenbrand 
Reviewed-by: Markus Armbruster 
Message-Id: <20210929162445.64060-2-da...@redhat.com>
Signed-off-by: Paolo Bonzini 
(cherry picked from commit 75b98cb9f6456ccf194211beffcbf93b0a995fa4)
Signed-off-by: Michael Roth 
---
 hw/virtio/virtio-mem-pci.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/hw/virtio/virtio-mem-pci.c b/hw/virtio/virtio-mem-pci.c
index fa5395cd88..7e384b7397 100644
--- a/hw/virtio/virtio-mem-pci.c
+++ b/hw/virtio/virtio-mem-pci.c
@@ -88,13 +88,8 @@ static void virtio_mem_pci_size_change_notify(Notifier 
*notifier, void *data)
  size_change_notifier);
 DeviceState *dev = DEVICE(pci_mem);
 const uint64_t * const size_p = data;
-const char *id = NULL;
 
-if (dev->id) {
-id = g_strdup(dev->id);
-}
-
-qapi_event_send_memory_device_size_change(!!id, id, *size_p);
+qapi_event_send_memory_device_size_change(!!dev->id, dev->id, *size_p);
 }
 
 static void virtio_mem_pci_class_init(ObjectClass *klass, void *data)
-- 
2.25.1




[PATCH 37/47] virtio: use virtio accessor to access packed event

2021-12-14 Thread Michael Roth
From: Jason Wang 

We used to access packed descriptor event and off_wrap via
address_space_{write|read}_cached(). When we hit the cache, memcpy()
is used which is not atomic which may lead a wrong value to be read or
wrote.

This patch fixes this by switching to use
virito_{stw|lduw}_phys_cached() to make sure the access is atomic.

Fixes: 683f7665679c1 ("virtio: event suppression support for packed ring")
Cc: qemu-sta...@nongnu.org
Signed-off-by: Jason Wang 
Message-Id: <2021063854.29060-2-jasow...@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
(cherry picked from commit d152cdd6f6fad381e804c8185f0ba938030ccac9)
Signed-off-by: Michael Roth 
---
 hw/virtio/virtio.c | 13 -
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 2dbccb6b3f..c9567f09ed 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -249,13 +249,10 @@ static void vring_packed_event_read(VirtIODevice *vdev,
 hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
 hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
 
-address_space_read_cached(cache, off_flags, >flags,
-  sizeof(e->flags));
+e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags);
 /* Make sure flags is seen before off_wrap */
 smp_rmb();
-address_space_read_cached(cache, off_off, >off_wrap,
-  sizeof(e->off_wrap));
-virtio_tswap16s(vdev, >off_wrap);
+e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off);
 virtio_tswap16s(vdev, >flags);
 }
 
@@ -265,8 +262,7 @@ static void vring_packed_off_wrap_write(VirtIODevice *vdev,
 {
 hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
 
-virtio_tswap16s(vdev, _wrap);
-address_space_write_cached(cache, off, _wrap, sizeof(off_wrap));
+virtio_stw_phys_cached(vdev, cache, off, off_wrap);
 address_space_cache_invalidate(cache, off, sizeof(off_wrap));
 }
 
@@ -275,8 +271,7 @@ static void vring_packed_flags_write(VirtIODevice *vdev,
 {
 hwaddr off = offsetof(VRingPackedDescEvent, flags);
 
-virtio_tswap16s(vdev, );
-address_space_write_cached(cache, off, , sizeof(flags));
+virtio_stw_phys_cached(vdev, cache, off, flags);
 address_space_cache_invalidate(cache, off, sizeof(flags));
 }
 
-- 
2.25.1




[PATCH 07/47] uas: add stream number sanity checks.

2021-12-14 Thread Michael Roth
From: Gerd Hoffmann 

The device uses the guest-supplied stream number unchecked, which can
lead to guest-triggered out-of-band access to the UASDevice->data3 and
UASDevice->status3 fields.  Add the missing checks.

Fixes: CVE-2021-3713
Signed-off-by: Gerd Hoffmann 
Reported-by: Chen Zhe 
Reported-by: Tan Jingguo 
Reviewed-by: Philippe Mathieu-Daudé 
Message-Id: <20210818120505.1258262-2-kra...@redhat.com>
(cherry picked from commit 13b250b12ad3c59114a6a17d59caf073ce45b33a)
Signed-off-by: Michael Roth 
---
 hw/usb/dev-uas.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c
index 263056231c..f6309a5ebf 100644
--- a/hw/usb/dev-uas.c
+++ b/hw/usb/dev-uas.c
@@ -840,6 +840,9 @@ static void usb_uas_handle_data(USBDevice *dev, USBPacket 
*p)
 }
 break;
 case UAS_PIPE_ID_STATUS:
+if (p->stream > UAS_MAX_STREAMS) {
+goto err_stream;
+}
 if (p->stream) {
 QTAILQ_FOREACH(st, >results, next) {
 if (st->stream == p->stream) {
@@ -867,6 +870,9 @@ static void usb_uas_handle_data(USBDevice *dev, USBPacket 
*p)
 break;
 case UAS_PIPE_ID_DATA_IN:
 case UAS_PIPE_ID_DATA_OUT:
+if (p->stream > UAS_MAX_STREAMS) {
+goto err_stream;
+}
 if (p->stream) {
 req = usb_uas_find_request(uas, p->stream);
 } else {
@@ -902,6 +908,11 @@ static void usb_uas_handle_data(USBDevice *dev, USBPacket 
*p)
 p->status = USB_RET_STALL;
 break;
 }
+
+err_stream:
+error_report("%s: invalid stream %d", __func__, p->stream);
+p->status = USB_RET_STALL;
+return;
 }
 
 static void usb_uas_unrealize(USBDevice *dev)
-- 
2.25.1




[PATCH 34/47] accel/tcg: Register a force_rcu notifier

2021-12-14 Thread Michael Roth
From: Greg Kurz 

A TCG vCPU doing a busy loop systematicaly hangs the QEMU monitor
if the user passes 'device_add' without argument. This is because
drain_cpu_all() which is called from qmp_device_add() cannot return
if readers don't exit read-side critical sections. That is typically
what busy-looping TCG vCPUs do:

int cpu_exec(CPUState *cpu)
{
[...]
rcu_read_lock();
[...]
while (!cpu_handle_exception(cpu, )) {
// Busy loop keeps vCPU here
}
[...]
rcu_read_unlock();

return ret;
}

For MTTCG, have all vCPU threads register a force_rcu notifier that will
kick them out of the loop using async_run_on_cpu(). The notifier is called
with the rcu_registry_lock mutex held, using async_run_on_cpu() ensures
there are no deadlocks.

For RR, a single thread runs all vCPUs. Just register a single notifier
that kicks the current vCPU to the next one.

For MTTCG:
Suggested-by: Paolo Bonzini 

For RR:
Suggested-by: Richard Henderson 

Fixes: 7bed89958bfb ("device_core: use drain_call_rcu in in qmp_device_add")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/650
Signed-off-by: Greg Kurz 
Reviewed-by: Richard Henderson 
Message-Id: <20211109183523.47726-3-gr...@kaod.org>
Signed-off-by: Paolo Bonzini 
(cherry picked from commit dd47a8f654d84f666b235ce8891e17ee76f9be8b)
Signed-off-by: Michael Roth 
---
 accel/tcg/tcg-accel-ops-mttcg.c | 26 ++
 accel/tcg/tcg-accel-ops-rr.c| 10 ++
 2 files changed, 36 insertions(+)

diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
index 847d2079d2..29632bd4c0 100644
--- a/accel/tcg/tcg-accel-ops-mttcg.c
+++ b/accel/tcg/tcg-accel-ops-mttcg.c
@@ -28,6 +28,7 @@
 #include "sysemu/tcg.h"
 #include "sysemu/replay.h"
 #include "qemu/main-loop.h"
+#include "qemu/notify.h"
 #include "qemu/guest-random.h"
 #include "exec/exec-all.h"
 #include "hw/boards.h"
@@ -35,6 +36,26 @@
 #include "tcg-accel-ops.h"
 #include "tcg-accel-ops-mttcg.h"
 
+typedef struct MttcgForceRcuNotifier {
+Notifier notifier;
+CPUState *cpu;
+} MttcgForceRcuNotifier;
+
+static void do_nothing(CPUState *cpu, run_on_cpu_data d)
+{
+}
+
+static void mttcg_force_rcu(Notifier *notify, void *data)
+{
+CPUState *cpu = container_of(notify, MttcgForceRcuNotifier, notifier)->cpu;
+
+/*
+ * Called with rcu_registry_lock held, using async_run_on_cpu() ensures
+ * that there are no deadlocks.
+ */
+async_run_on_cpu(cpu, do_nothing, RUN_ON_CPU_NULL);
+}
+
 /*
  * In the multi-threaded case each vCPU has its own thread. The TLS
  * variable current_cpu can be used deep in the code to find the
@@ -43,12 +64,16 @@
 
 static void *mttcg_cpu_thread_fn(void *arg)
 {
+MttcgForceRcuNotifier force_rcu;
 CPUState *cpu = arg;
 
 assert(tcg_enabled());
 g_assert(!icount_enabled());
 
 rcu_register_thread();
+force_rcu.notifier.notify = mttcg_force_rcu;
+force_rcu.cpu = cpu;
+rcu_add_force_rcu_notifier(_rcu.notifier);
 tcg_register_thread();
 
 qemu_mutex_lock_iothread();
@@ -100,6 +125,7 @@ static void *mttcg_cpu_thread_fn(void *arg)
 
 tcg_cpus_destroy(cpu);
 qemu_mutex_unlock_iothread();
+rcu_remove_force_rcu_notifier(_rcu.notifier);
 rcu_unregister_thread();
 return NULL;
 }
diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
index c02c061ecb..aa5b4ac247 100644
--- a/accel/tcg/tcg-accel-ops-rr.c
+++ b/accel/tcg/tcg-accel-ops-rr.c
@@ -28,6 +28,7 @@
 #include "sysemu/tcg.h"
 #include "sysemu/replay.h"
 #include "qemu/main-loop.h"
+#include "qemu/notify.h"
 #include "qemu/guest-random.h"
 #include "exec/exec-all.h"
 
@@ -135,6 +136,11 @@ static void rr_deal_with_unplugged_cpus(void)
 }
 }
 
+static void rr_force_rcu(Notifier *notify, void *data)
+{
+rr_kick_next_cpu();
+}
+
 /*
  * In the single-threaded case each vCPU is simulated in turn. If
  * there is more than a single vCPU we create a simple timer to kick
@@ -145,10 +151,13 @@ static void rr_deal_with_unplugged_cpus(void)
 
 static void *rr_cpu_thread_fn(void *arg)
 {
+Notifier force_rcu;
 CPUState *cpu = arg;
 
 assert(tcg_enabled());
 rcu_register_thread();
+force_rcu.notify = rr_force_rcu;
+rcu_add_force_rcu_notifier(_rcu);
 tcg_register_thread();
 
 qemu_mutex_lock_iothread();
@@ -257,6 +266,7 @@ static void *rr_cpu_thread_fn(void *arg)
 rr_deal_with_unplugged_cpus();
 }
 
+rcu_remove_force_rcu_notifier(_rcu);
 rcu_unregister_thread();
 return NULL;
 }
-- 
2.25.1




[PATCH 46/47] net: vmxnet3: validate configuration values during activate (CVE-2021-20203)

2021-12-14 Thread Michael Roth
From: Prasad J Pandit 

While activating device in vmxnet3_acticate_device(), it does not
validate guest supplied configuration values against predefined
minimum - maximum limits. This may lead to integer overflow or
OOB access issues. Add checks to avoid it.

Fixes: CVE-2021-20203
Buglink: https://bugs.launchpad.net/qemu/+bug/1913873
Reported-by: Gaoning Pan 
Signed-off-by: Prasad J Pandit 
Signed-off-by: Jason Wang 
(cherry picked from commit d05dcd94aee88728facafb993c7280547eb4d645)
Signed-off-by: Michael Roth 
---
 hw/net/vmxnet3.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 41f796a247..f65af4e9ef 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -1441,6 +1441,7 @@ static void vmxnet3_activate_device(VMXNET3State *s)
 vmxnet3_setup_rx_filtering(s);
 /* Cache fields from shared memory */
 s->mtu = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.misc.mtu);
+assert(VMXNET3_MIN_MTU <= s->mtu && s->mtu < VMXNET3_MAX_MTU);
 VMW_CFPRN("MTU is %u", s->mtu);
 
 s->max_rx_frags =
@@ -1486,6 +1487,9 @@ static void vmxnet3_activate_device(VMXNET3State *s)
 /* Read rings memory locations for TX queues */
 pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.txRingBasePA);
 size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.txRingSize);
+if (size > VMXNET3_TX_RING_MAX_SIZE) {
+size = VMXNET3_TX_RING_MAX_SIZE;
+}
 
 vmxnet3_ring_init(d, >txq_descr[i].tx_ring, pa, size,
   sizeof(struct Vmxnet3_TxDesc), false);
@@ -1496,6 +1500,9 @@ static void vmxnet3_activate_device(VMXNET3State *s)
 /* TXC ring */
 pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.compRingBasePA);
 size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.compRingSize);
+if (size > VMXNET3_TC_RING_MAX_SIZE) {
+size = VMXNET3_TC_RING_MAX_SIZE;
+}
 vmxnet3_ring_init(d, >txq_descr[i].comp_ring, pa, size,
   sizeof(struct Vmxnet3_TxCompDesc), true);
 VMXNET3_RING_DUMP(VMW_CFPRN, "TXC", i, >txq_descr[i].comp_ring);
@@ -1537,6 +1544,9 @@ static void vmxnet3_activate_device(VMXNET3State *s)
 /* RX rings */
 pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.rxRingBasePA[j]);
 size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.rxRingSize[j]);
+if (size > VMXNET3_RX_RING_MAX_SIZE) {
+size = VMXNET3_RX_RING_MAX_SIZE;
+}
 vmxnet3_ring_init(d, >rxq_descr[i].rx_ring[j], pa, size,
   sizeof(struct Vmxnet3_RxDesc), false);
 VMW_CFPRN("RX queue %d:%d: Base: %" PRIx64 ", Size: %d",
@@ -1546,6 +1556,9 @@ static void vmxnet3_activate_device(VMXNET3State *s)
 /* RXC ring */
 pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.compRingBasePA);
 size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.compRingSize);
+if (size > VMXNET3_RC_RING_MAX_SIZE) {
+size = VMXNET3_RC_RING_MAX_SIZE;
+}
 vmxnet3_ring_init(d, >rxq_descr[i].comp_ring, pa, size,
   sizeof(struct Vmxnet3_RxCompDesc), true);
 VMW_CFPRN("RXC queue %d: Base: %" PRIx64 ", Size: %d", i, pa, size);
-- 
2.25.1




[PATCH 40/47] hw/nvme: fix buffer overrun in nvme_changed_nslist (CVE-2021-3947)

2021-12-14 Thread Michael Roth
From: Klaus Jensen 

Fix missing offset verification.

Cc: qemu-sta...@nongnu.org
Cc: Philippe Mathieu-Daudé 
Reported-by: Qiuhao Li 
Fixes: f432fdfa121 ("support changed namespace asynchronous event")
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Klaus Jensen 
(cherry picked from commit e2c57529c9306e4c9aac75d9879f6e7699584a22)
Signed-off-by: Michael Roth 
---
 hw/nvme/ctrl.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 6baf9e0420..27dddb87bd 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -4164,6 +4164,11 @@ static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t 
rae, uint32_t buf_len,
 int i = 0;
 uint32_t nsid;
 
+if (off >= sizeof(nslist)) {
+trace_pci_nvme_err_invalid_log_page_offset(off, sizeof(nslist));
+return NVME_INVALID_FIELD | NVME_DNR;
+}
+
 memset(nslist, 0x0, sizeof(nslist));
 trans_len = MIN(sizeof(nslist) - off, buf_len);
 
-- 
2.25.1




[PATCH 45/47] virtio-blk: Fix clean up of host notifiers for single MR transaction.

2021-12-14 Thread Michael Roth
From: Mark Mielke 

The code that introduced "virtio-blk: Configure all host notifiers in
a single MR transaction" introduced a second loop variable to perform
cleanup in second loop, but mistakenly still refers to the first
loop variable within the second loop body.

Fixes: d0267da61489 ("virtio-blk: Configure all host notifiers in a single MR 
transaction")
Signed-off-by: Mark Mielke 
Message-id: CALm7yL08qarOu0dnQkTN+pa=bsrc92g31ypqqndeait4ylz...@mail.gmail.com
Signed-off-by: Stefan Hajnoczi 
(cherry picked from commit 5b807181c27a940a3a7ad1f221a2e76a132cbdc0)
Signed-off-by: Michael Roth 
---
 hw/block/dataplane/virtio-blk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 252c3a7a23..ee5a5352dc 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -222,7 +222,7 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
 memory_region_transaction_commit();
 
 while (j--) {
-virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i);
+virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), j);
 }
 goto fail_host_notifiers;
 }
-- 
2.25.1




[PATCH 44/47] tests/qtest/fdc-test: Add a regression test for CVE-2021-20196

2021-12-14 Thread Michael Roth
From: Philippe Mathieu-Daudé 

Without the previous commit, when running 'make check-qtest-i386'
with QEMU configured with '--enable-sanitizers' we get:

  AddressSanitizer:DEADLYSIGNAL
  =
  ==287878==ERROR: AddressSanitizer: SEGV on unknown address 0x0344
  ==287878==The signal is caused by a WRITE memory access.
  ==287878==Hint: address points to the zero page.
  #0 0x564b2e5bac27 in blk_inc_in_flight block/block-backend.c:1346:5
  #1 0x564b2e5bb228 in blk_pwritev_part block/block-backend.c:1317:5
  #2 0x564b2e5bcd57 in blk_pwrite block/block-backend.c:1498:11
  #3 0x564b2ca1cdd3 in fdctrl_write_data hw/block/fdc.c:2221:17
  #4 0x564b2ca1b2f7 in fdctrl_write hw/block/fdc.c:829:9
  #5 0x564b2dc49503 in portio_write softmmu/ioport.c:201:9

Add the reproducer for CVE-2021-20196.

Suggested-by: Alexander Bulekov 
Reviewed-by: Darren Kenny 
Signed-off-by: Philippe Mathieu-Daudé 
Message-id: 20211124161536.631563-4-phi...@redhat.com
Signed-off-by: John Snow 
(cherry picked from commit cc20926e9b8077bff6813efc8dcdeae90d1a3b10)
Signed-off-by: Michael Roth 
---
 tests/qtest/fdc-test.c | 38 ++
 1 file changed, 38 insertions(+)

diff --git a/tests/qtest/fdc-test.c b/tests/qtest/fdc-test.c
index 26b69f7c5c..8f6eee84a4 100644
--- a/tests/qtest/fdc-test.c
+++ b/tests/qtest/fdc-test.c
@@ -32,6 +32,9 @@
 /* TODO actually test the results and get rid of this */
 #define qmp_discard_response(...) qobject_unref(qmp(__VA_ARGS__))
 
+#define DRIVE_FLOPPY_BLANK \
+"-drive 
if=floppy,file=null-co://,file.read-zeroes=on,format=raw,size=1440k"
+
 #define TEST_IMAGE_SIZE 1440 * 1024
 
 #define FLOPPY_BASE 0x3f0
@@ -546,6 +549,40 @@ static void fuzz_registers(void)
 }
 }
 
+static bool qtest_check_clang_sanitizer(void)
+{
+#if defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer)
+return true;
+#else
+g_test_skip("QEMU not configured using --enable-sanitizers");
+return false;
+#endif
+}
+static void test_cve_2021_20196(void)
+{
+QTestState *s;
+
+if (!qtest_check_clang_sanitizer()) {
+return;
+}
+
+s = qtest_initf("-nographic -m 32M -nodefaults " DRIVE_FLOPPY_BLANK);
+
+qtest_outw(s, 0x3f4, 0x0500);
+qtest_outb(s, 0x3f5, 0x00);
+qtest_outb(s, 0x3f5, 0x00);
+qtest_outw(s, 0x3f4, 0x);
+qtest_outb(s, 0x3f5, 0x00);
+qtest_outw(s, 0x3f1, 0x0400);
+qtest_outw(s, 0x3f4, 0x);
+qtest_outw(s, 0x3f4, 0x);
+qtest_outb(s, 0x3f5, 0x00);
+qtest_outb(s, 0x3f5, 0x01);
+qtest_outw(s, 0x3f1, 0x0500);
+qtest_outb(s, 0x3f5, 0x00);
+qtest_quit(s);
+}
+
 int main(int argc, char **argv)
 {
 int fd;
@@ -576,6 +613,7 @@ int main(int argc, char **argv)
 qtest_add_func("/fdc/read_no_dma_18", test_read_no_dma_18);
 qtest_add_func("/fdc/read_no_dma_19", test_read_no_dma_19);
 qtest_add_func("/fdc/fuzz-registers", fuzz_registers);
+qtest_add_func("/fdc/fuzz/cve_2021_20196", test_cve_2021_20196);
 
 ret = g_test_run();
 
-- 
2.25.1




[PATCH 43/47] hw/block/fdc: Kludge missing floppy drive to fix CVE-2021-20196

2021-12-14 Thread Michael Roth
From: Philippe Mathieu-Daudé 

Guest might select another drive on the bus by setting the
DRIVE_SEL bit of the DIGITAL OUTPUT REGISTER (DOR).
The current controller model doesn't expect a BlockBackend
to be NULL. A simple way to fix CVE-2021-20196 is to create
an empty BlockBackend when it is missing. All further
accesses will be safely handled, and the controller state
machines keep behaving correctly.

Cc: qemu-sta...@nongnu.org
Fixes: CVE-2021-20196
Reported-by: Gaoning Pan (Ant Security Light-Year Lab) 
Reviewed-by: Darren Kenny 
Reviewed-by: Hanna Reitz 
Signed-off-by: Philippe Mathieu-Daudé 
Message-id: 20211124161536.631563-3-phi...@redhat.com
BugLink: https://bugs.launchpad.net/qemu/+bug/1912780
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/338
Reviewed-by: Darren Kenny 
Reviewed-by: Hanna Reitz 
Signed-off-by: Philippe Mathieu-Daudé 
Signed-off-by: John Snow 
(cherry picked from commit 1ab95af033a419e7a64e2d58e67dd96b20af5233)
Signed-off-by: Michael Roth 
---
 hw/block/fdc.c | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index c3e09973ca..af398c1116 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -1166,7 +1166,19 @@ static FDrive *get_drv(FDCtrl *fdctrl, int unit)
 
 static FDrive *get_cur_drv(FDCtrl *fdctrl)
 {
-return get_drv(fdctrl, fdctrl->cur_drv);
+FDrive *cur_drv = get_drv(fdctrl, fdctrl->cur_drv);
+
+if (!cur_drv->blk) {
+/*
+ * Kludge: empty drive line selected. Create an anonymous
+ * BlockBackend to avoid NULL deref with various BlockBackend
+ * API calls within this model (CVE-2021-20196).
+ * Due to the controller QOM model limitations, we don't
+ * attach the created to the controller device.
+ */
+cur_drv->blk = blk_create_empty_drive();
+}
+return cur_drv;
 }
 
 /* Status A register : 0x00 (read-only) */
-- 
2.25.1




[PATCH 28/47] Partially revert "build: -no-pie is no functional linker flag"

2021-12-14 Thread Michael Roth
From: Jessica Clarke 

This partially reverts commit bbd2d5a8120771ec59b86a80a1f51884e0a26e53.

This commit was misguided and broke using --disable-pie on any distro
that enables PIE by default in their compiler driver, including Debian
and its derivatives. Whilst -no-pie is not a linker flag, it is a
compiler driver flag that ensures -pie is not automatically passed by it
to the linker. Without it, all compile_prog checks will fail as any code
built with the explicit -fno-pie will fail to link with the implicit
default -pie due to trying to use position-dependent relocations. The
only bug that needed fixing was LDFLAGS_NOPIE being used as a flag for
the linker itself in pc-bios/optionrom/Makefile.

Note this does not reinstate exporting LDFLAGS_NOPIE, as it is unused,
since the only previous use was the one that should not have existed. I
have also updated the comment for the -fno-pie and -no-pie checks to
reflect what they're actually needed for.

Fixes: bbd2d5a8120771ec59b86a80a1f51884e0a26e53
Cc: Christian Ehrhardt 
Cc: Paolo Bonzini 
Cc: qemu-sta...@nongnu.org
Signed-off-by: Jessica Clarke 
Message-Id: <20210805192545.38279-1-jrt...@jrtc27.com>
Signed-off-by: Paolo Bonzini 
(cherry picked from commit ffd205ef2901bd65fcfbd09a98c0ff7cfcec5e4d)
Signed-off-by: Michael Roth 
---
 configure | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/configure b/configure
index 7659870810..6e308ed77f 100755
--- a/configure
+++ b/configure
@@ -2246,9 +2246,11 @@ static THREAD int tls_var;
 int main(void) { return tls_var; }
 EOF
 
-# Check we support --no-pie first; we will need this for building ROMs.
+# Check we support -fno-pie and -no-pie first; we will need the former for
+# building ROMs, and both for everything if --disable-pie is passed.
 if compile_prog "-Werror -fno-pie" "-no-pie"; then
   CFLAGS_NOPIE="-fno-pie"
+  LDFLAGS_NOPIE="-no-pie"
 fi
 
 if test "$static" = "yes"; then
@@ -2264,6 +2266,7 @@ if test "$static" = "yes"; then
   fi
 elif test "$pie" = "no"; then
   CONFIGURE_CFLAGS="$CFLAGS_NOPIE $CONFIGURE_CFLAGS"
+  CONFIGURE_LDFLAGS="$LDFLAGS_NOPIE $CONFIGURE_LDFLAGS"
 elif compile_prog "-Werror -fPIE -DPIE" "-pie"; then
   CONFIGURE_CFLAGS="-fPIE -DPIE $CONFIGURE_CFLAGS"
   CONFIGURE_LDFLAGS="-pie $CONFIGURE_LDFLAGS"
-- 
2.25.1




[PATCH 30/47] target-i386: mmu: fix handling of noncanonical virtual addresses

2021-12-14 Thread Michael Roth
From: Paolo Bonzini 

mmu_translate is supposed to return an error code for page faults; it is
not able to handle other exceptions.  The #GP case for noncanonical
virtual addresses is not handled correctly, and incorrectly raised as
a page fault with error code 1.  Since it cannot happen for nested
page tables, move it directly to handle_mmu_fault, even before the
invocation of mmu_translate.

Fixes: #676
Fixes: 661ff4879e ("target/i386: extract mmu_translate", 2021-05-11)
Cc: qemu-sta...@nongnu.org
Tested-by: Mark Cave-Ayland 
Signed-off-by: Paolo Bonzini 
(cherry picked from commit b04dc92e013d55c9ac8082caefff45dcfb1310e7)
Signed-off-by: Michael Roth 
---
 target/i386/tcg/sysemu/excp_helper.c | 21 -
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/target/i386/tcg/sysemu/excp_helper.c 
b/target/i386/tcg/sysemu/excp_helper.c
index 2dea4a248e..9fb59058ef 100644
--- a/target/i386/tcg/sysemu/excp_helper.c
+++ b/target/i386/tcg/sysemu/excp_helper.c
@@ -94,15 +94,6 @@ static int mmu_translate(CPUState *cs, hwaddr addr, 
MMUTranslateFunc get_hphys_f
 bool la57 = pg_mode & PG_MODE_LA57;
 uint64_t pml5e_addr, pml5e;
 uint64_t pml4e_addr, pml4e;
-int32_t sext;
-
-/* test virtual address sign extension */
-sext = la57 ? (int64_t)addr >> 56 : (int64_t)addr >> 47;
-if (get_hphys_func && sext != 0 && sext != -1) {
-env->error_code = 0;
-cs->exception_index = EXCP0D_GPF;
-return 1;
-}
 
 if (la57) {
 pml5e_addr = ((cr3 & ~0xfff) +
@@ -423,6 +414,18 @@ static int handle_mmu_fault(CPUState *cs, vaddr addr, int 
size,
 page_size = 4096;
 } else {
 pg_mode = get_pg_mode(env);
+if (pg_mode & PG_MODE_LMA) {
+int32_t sext;
+
+/* test virtual address sign extension */
+sext = (int64_t)addr >> (pg_mode & PG_MODE_LA57 ? 56 : 47);
+if (sext != 0 && sext != -1) {
+env->error_code = 0;
+cs->exception_index = EXCP0D_GPF;
+return 1;
+}
+}
+
 error_code = mmu_translate(cs, addr, get_hphys, env->cr[3], is_write1,
mmu_idx, pg_mode,
, _size, );
-- 
2.25.1




[PATCH 42/47] hw/block/fdc: Extract blk_create_empty_drive()

2021-12-14 Thread Michael Roth
From: Philippe Mathieu-Daudé 

We are going to re-use this code in the next commit,
so extract it as a new blk_create_empty_drive() function.

Inspired-by: Hanna Reitz 
Signed-off-by: Philippe Mathieu-Daudé 
Message-id: 20211124161536.631563-2-phi...@redhat.com
Signed-off-by: John Snow 
(cherry picked from commit b154791e7b6d4ca5cdcd54443484d97360bd7ad2)
Signed-off-by: Michael Roth 
---
 hw/block/fdc.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 9014cd30b3..c3e09973ca 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -61,6 +61,12 @@
 } while (0)
 
 
+/* Anonymous BlockBackend for empty drive */
+static BlockBackend *blk_create_empty_drive(void)
+{
+return blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
+}
+
 //
 /* qdev floppy bus  */
 
@@ -486,8 +492,7 @@ static void floppy_drive_realize(DeviceState *qdev, Error 
**errp)
 }
 
 if (!dev->conf.blk) {
-/* Anonymous BlockBackend for an empty drive */
-dev->conf.blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
+dev->conf.blk = blk_create_empty_drive();
 ret = blk_attach_dev(dev->conf.blk, qdev);
 assert(ret == 0);
 
-- 
2.25.1




[PATCH 03/47] virtio-net: fix use after unmap/free for sg

2021-12-14 Thread Michael Roth
From: Jason Wang 

When mergeable buffer is enabled, we try to set the num_buffers after
the virtqueue elem has been unmapped. This will lead several issues,
E.g a use after free when the descriptor has an address which belongs
to the non direct access region. In this case we use bounce buffer
that is allocated during address_space_map() and freed during
address_space_unmap().

Fixing this by storing the elems temporarily in an array and delay the
unmap after we set the the num_buffers.

This addresses CVE-2021-3748.

Reported-by: Alexander Bulekov 
Fixes: fbe78f4f55c6 ("virtio-net support")
Cc: qemu-sta...@nongnu.org
Signed-off-by: Jason Wang 
(cherry picked from commit bedd7e93d01961fcb16a97ae45d93acf357e11f6)
Signed-off-by: Michael Roth 
---
 hw/net/virtio-net.c | 39 ---
 1 file changed, 32 insertions(+), 7 deletions(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 16d20cdee5..f205331dcf 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1746,10 +1746,13 @@ static ssize_t virtio_net_receive_rcu(NetClientState 
*nc, const uint8_t *buf,
 VirtIONet *n = qemu_get_nic_opaque(nc);
 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
 VirtIODevice *vdev = VIRTIO_DEVICE(n);
+VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
+size_t lens[VIRTQUEUE_MAX_SIZE];
 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
 struct virtio_net_hdr_mrg_rxbuf mhdr;
 unsigned mhdr_cnt = 0;
-size_t offset, i, guest_offset;
+size_t offset, i, guest_offset, j;
+ssize_t err;
 
 if (!virtio_net_can_receive(nc)) {
 return -1;
@@ -1780,6 +1783,12 @@ static ssize_t virtio_net_receive_rcu(NetClientState 
*nc, const uint8_t *buf,
 
 total = 0;
 
+if (i == VIRTQUEUE_MAX_SIZE) {
+virtio_error(vdev, "virtio-net unexpected long buffer chain");
+err = size;
+goto err;
+}
+
 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
 if (!elem) {
 if (i) {
@@ -1791,7 +1800,8 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, 
const uint8_t *buf,
  n->guest_hdr_len, n->host_hdr_len,
  vdev->guest_features);
 }
-return -1;
+err = -1;
+goto err;
 }
 
 if (elem->in_num < 1) {
@@ -1799,7 +1809,8 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, 
const uint8_t *buf,
  "virtio-net receive queue contains no in buffers");
 virtqueue_detach_element(q->rx_vq, elem, 0);
 g_free(elem);
-return -1;
+err = -1;
+goto err;
 }
 
 sg = elem->in_sg;
@@ -1836,12 +1847,13 @@ static ssize_t virtio_net_receive_rcu(NetClientState 
*nc, const uint8_t *buf,
 if (!n->mergeable_rx_bufs && offset < size) {
 virtqueue_unpop(q->rx_vq, elem, total);
 g_free(elem);
-return size;
+err = size;
+goto err;
 }
 
-/* signal other side */
-virtqueue_fill(q->rx_vq, elem, total, i++);
-g_free(elem);
+elems[i] = elem;
+lens[i] = total;
+i++;
 }
 
 if (mhdr_cnt) {
@@ -1851,10 +1863,23 @@ static ssize_t virtio_net_receive_rcu(NetClientState 
*nc, const uint8_t *buf,
  _buffers, sizeof mhdr.num_buffers);
 }
 
+for (j = 0; j < i; j++) {
+/* signal other side */
+virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
+g_free(elems[j]);
+}
+
 virtqueue_flush(q->rx_vq, i);
 virtio_notify(vdev, q->rx_vq);
 
 return size;
+
+err:
+for (j = 0; j < i; j++) {
+g_free(elems[j]);
+}
+
+return err;
 }
 
 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
-- 
2.25.1




[PATCH 27/47] block/file-posix: Fix return value translation for AIO discards

2021-12-14 Thread Michael Roth
From: Ari Sundholm 

AIO discards regressed as a result of the following commit:
0dfc7af2 block/file-posix: Optimize for macOS

When trying to run blkdiscard within a Linux guest, the request would
fail, with some errors in dmesg:

 [ snip ] 
[4.010070] sd 2:0:0:0: [sda] tag#0 FAILED Result: hostbyte=DID_OK
driverbyte=DRIVER_SENSE
[4.011061] sd 2:0:0:0: [sda] tag#0 Sense Key : Aborted Command
[current]
[4.011061] sd 2:0:0:0: [sda] tag#0 Add. Sense: I/O process
terminated
[4.011061] sd 2:0:0:0: [sda] tag#0 CDB: Unmap/Read sub-channel 42
00 00 00 00 00 00 00 18 00
[4.011061] blk_update_request: I/O error, dev sda, sector 0
 [ snip ] 

This turns out to be a result of a flaw in changes to the error value
translation logic in handle_aiocb_discard(). The default return value
may be left untranslated in some configurations, and the wrong variable
is used in one translation.

Fix both issues.

Fixes: 0dfc7af2b28 ("block/file-posix: Optimize for macOS")
Cc: qemu-sta...@nongnu.org
Signed-off-by: Ari Sundholm 
Signed-off-by: Emil Karlson 
Reviewed-by: Akihiko Odaki 
Reviewed-by: Stefan Hajnoczi 
Message-Id: <20211019110954.4170931-1-...@tuxera.com>
Signed-off-by: Kevin Wolf 
(cherry picked from commit 13a028336f2c05e7ff47dfdaf30dfac7f4883e80)
Signed-off-by: Michael Roth 
---
 block/file-posix.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index 1567edb3d5..dd295cfc6d 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1807,7 +1807,7 @@ static int handle_aiocb_copy_range(void *opaque)
 static int handle_aiocb_discard(void *opaque)
 {
 RawPosixAIOData *aiocb = opaque;
-int ret = -EOPNOTSUPP;
+int ret = -ENOTSUP;
 BDRVRawState *s = aiocb->bs->opaque;
 
 if (!s->has_discard) {
@@ -1829,7 +1829,7 @@ static int handle_aiocb_discard(void *opaque)
 #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
 ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
aiocb->aio_offset, aiocb->aio_nbytes);
-ret = translate_err(-errno);
+ret = translate_err(ret);
 #elif defined(__APPLE__) && (__MACH__)
 fpunchhole_t fpunchhole;
 fpunchhole.fp_flags = 0;
-- 
2.25.1




[PATCH 25/47] tests/acpi/pcihp: add unit tests for hotplug on multifunction bridges for q35

2021-12-14 Thread Michael Roth
From: Ani Sinha 

commit d7346e614f4ec ("acpi: x86: pcihp: add support hotplug on multifunction 
bridges")
added ACPI hotplug descriptions for cold plugged bridges for functions other
than 0. For all other devices, the ACPI hotplug descriptions are limited to
function 0 only. This change adds unit tests for this feature.

This test adds the following devices to qemu and then checks the changes
introduced in the DSDT table due to the addition of the following devices:

(a) a multifunction bridge device
(b) a bridge device with function 1
(c) a non-bridge device with function 2

In the DSDT table, we should see AML hotplug descriptions for (a) and (b).
For (a) we should find a hotplug AML description for function 0.

The following diff compares the DSDT table AML with the new unit test before
and after the change d7346e614f4ec is introduced. In other words,
this diff reflects the changes that occurs in the DSDT table due to the change
d7346e614f4ec .

@@ -1,60 +1,38 @@
 /*
  * Intel ACPI Component Architecture
  * AML/ASL+ Disassembler version 20190509 (64-bit version)
  * Copyright (c) 2000 - 2019 Intel Corporation
  *
  * Disassembling to symbolic ASL+ operators
  *
- * Disassembly of tests/data/acpi/q35/DSDT.multi-bridge, Thu Oct  7 18:56:05 
2021
+ * Disassembly of /tmp/aml-AN0DA1, Thu Oct  7 18:56:05 2021
  *
  * Original Table Header:
  * Signature"DSDT"
- * Length   0x20FE (8446)
+ * Length   0x2187 (8583)
  * Revision 0x01  32-bit table (V1), no 64-bit math support
- * Checksum 0xDE
+ * Checksum 0x8D
  * OEM ID   "BOCHS "
  * OEM Table ID "BXPC"
  * OEM Revision 0x0001 (1)
  * Compiler ID  "BXPC"
  * Compiler Version 0x0001 (1)
  */
 DefinitionBlock ("", "DSDT", 1, "BOCHS ", "BXPC", 0x0001)
 {
-/*
- * iASL Warning: There was 1 external control method found during
- * disassembly, but only 0 were resolved (1 unresolved). Additional
- * ACPI tables may be required to properly disassemble the code. This
- * resulting disassembler output file may not compile because the
- * disassembler did not know how many arguments to assign to the
- * unresolved methods. Note: SSDTs can be dynamically loaded at
- * runtime and may or may not be available via the host OS.
- *
- * In addition, the -fe option can be used to specify a file containing
- * control method external declarations with the associated method
- * argument counts. Each line of the file must be of the form:
- * External (, MethodObj, )
- * Invocation:
- * iasl -fe refs.txt -d dsdt.aml
- *
- * The following methods were unresolved and many not compile properly
- * because the disassembler had to guess at the number of arguments
- * required for each:
- */
-External (_SB_.PCI0.S19_.PCNT, MethodObj)// Warning: Unknown method, 
guessing 1 arguments
-
 Scope (\)
 {
 OperationRegion (DBG, SystemIO, 0x0402, One)
 Field (DBG, ByteAcc, NoLock, Preserve)
 {
 DBGB,   8
 }

 Method (DBUG, 1, NotSerialized)
 {
 ToHexString (Arg0, Local0)
 ToBuffer (Local0, Local0)
 Local1 = (SizeOf (Local0) - One)
 Local2 = Zero
 While ((Local2 < Local1))
 {
@@ -3322,24 +3300,60 @@
 Method (DVNT, 2, NotSerialized)
 {
 If ((Arg0 & One))
 {
 Notify (S00, Arg1)
 }
 }

 Method (PCNT, 0, NotSerialized)
 {
 BNUM = One
 DVNT (PCIU, One)
 DVNT (PCID, 0x03)
 }
 }

+Device (S19)
+{
+Name (_ADR, 0x00030001)  // _ADR: Address
+Name (BSEL, Zero)
+Device (S00)
+{
+Name (_SUN, Zero)  // _SUN: Slot User Number
+Name (_ADR, Zero)  // _ADR: Address
+Method (_EJ0, 1, NotSerialized)  // _EJx: Eject Device, 
x=0-9
+{
+PCEJ (BSEL, _SUN)
+}
+
+Method (_DSM, 4, Serialized)  // _DSM: Device-Specific 
Method
+{
+Return (PDSM (Arg0, Arg1, Arg2, Arg3, BSEL, _SUN))
+}
+}
+
+Method (DVNT, 2, NotSerialized)
+{
+If ((Arg0 & One))
+{
+Notify (S00, Arg1)
+}
+}
+
+Method (PCNT, 0, NotSerialized)
+{
+BNUM = Zero
+DVNT (PCIU, One)
+DVNT (PCID, 0x03)
+}

[PATCH 41/47] chardev/wctable: don't free the instance in wctablet_chr_finalize

2021-12-14 Thread Michael Roth
From: Daniil Tatianin 

Object is supposed to be freed by invoking obj->free, and not
obj->instance_finalize. This would lead to use-after-free followed by
double free in object_unref/object_finalize.

Signed-off-by: Daniil Tatianin 
Reviewed-by: Marc-André Lureau 
Message-Id: <2027142349.836279-1-d-tatia...@yandex-team.ru>
Signed-off-by: Paolo Bonzini 
(cherry picked from commit fdc6e168181d06391711171b7c409b34f2981ced)
Signed-off-by: Michael Roth 
---
 chardev/wctablet.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/chardev/wctablet.c b/chardev/wctablet.c
index 95e005f5a5..e8b292c43c 100644
--- a/chardev/wctablet.c
+++ b/chardev/wctablet.c
@@ -320,7 +320,6 @@ static void wctablet_chr_finalize(Object *obj)
 TabletChardev *tablet = WCTABLET_CHARDEV(obj);
 
 qemu_input_handler_unregister(tablet->hs);
-g_free(tablet);
 }
 
 static void wctablet_chr_open(Chardev *chr,
-- 
2.25.1




[PATCH 39/47] nbd/server: Don't complain on certain client disconnects

2021-12-14 Thread Michael Roth
From: Eric Blake 

When a client disconnects abruptly, but did not have any pending
requests (for example, when using nbdsh without calling h.shutdown),
we used to output the following message:

$ qemu-nbd -f raw file
$ nbdsh -u 'nbd://localhost:10809' -c 'h.trim(1,0)'
qemu-nbd: Disconnect client, due to: Failed to read request: Unexpected 
end-of-file before all bytes were read

Then in commit f148ae7, we refactored nbd_receive_request() to use
nbd_read_eof(); when this returns 0, we regressed into tracing
uninitialized memory (if tracing is enabled) and reporting a
less-specific:

qemu-nbd: Disconnect client, due to: Request handling failed in intermediate 
state

Note that with Unix sockets, we have yet another error message,
unchanged by the 6.0 regression:

$ qemu-nbd -k /tmp/sock -f raw file
$ nbdsh -u 'nbd+unix:///?socket=/tmp/sock' -c 'h.trim(1,0)'
qemu-nbd: Disconnect client, due to: Failed to send reply: Unable to write to 
socket: Broken pipe

But in all cases, the error message goes away if the client performs a
soft shutdown by using NBD_CMD_DISC, rather than a hard shutdown by
abrupt disconnect:

$ nbdsh -u 'nbd://localhost:10809' -c 'h.trim(1,0)' -c 'h.shutdown()'

This patch fixes things to avoid uninitialized memory, and in general
avoids warning about a client that does a hard shutdown when not in
the middle of a packet.  A client that aborts mid-request, or which
does not read the full server's reply, can still result in warnings,
but those are indeed much more unusual situations.

CC: qemu-sta...@nongnu.org
Fixes: f148ae7d36 ("nbd/server: Quiesce coroutines on context switch", v6.0.0)
Signed-off-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
[eblake: defer unrelated typo fixes to later patch]
Message-Id: <2027170230.1128262-2-ebl...@redhat.com>
Signed-off-by: Eric Blake 
(cherry picked from commit 1644cccea5c71b02b9cf8f78b780e7069a29b189)
Signed-off-by: Michael Roth 
---
 nbd/server.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/nbd/server.c b/nbd/server.c
index 3927f7789d..83aeed51c7 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1413,6 +1413,9 @@ static int nbd_receive_request(NBDClient *client, 
NBDRequest *request,
 if (ret < 0) {
 return ret;
 }
+if (ret == 0) {
+return -EIO;
+}
 
 /* Request
[ 0 ..  3]   magic   (NBD_REQUEST_MAGIC)
-- 
2.25.1




[PATCH 26/47] tests/acpi/bios-tables-test: update DSDT blob for multifunction bridge test

2021-12-14 Thread Michael Roth
From: Ani Sinha 

We added a new unit test for testing acpi hotplug on multifunction bridges in
q35 machines. Here, we update the DSDT table gloden master blob for this unit
test.

The test adds the following devices to qemu and then checks the changes
introduced in the DSDT table due to the addition of the following devices:

(a) a multifunction bridge device
(b) a bridge device with function 1
(c) a non-bridge device with function 2

In the DSDT table, we should see AML hotplug descriptions for (a) and (b).
For (a) we should find a hotplug AML description for function 0.

Following is the ASL diff between the original DSDT table and the modified DSDT
table due to the unit test. We see that multifunction bridge on bus 2 and single
function bridge on bus 3 function 1 are described, not the non-bridge balloon
device on bus 4, function 2.

@@ -1,30 +1,30 @@
 /*
  * Intel ACPI Component Architecture
  * AML/ASL+ Disassembler version 20190509 (64-bit version)
  * Copyright (c) 2000 - 2019 Intel Corporation
  *
  * Disassembling to symbolic ASL+ operators
  *
- * Disassembly of tests/data/acpi/q35/DSDT, Thu Oct  7 18:29:19 2021
+ * Disassembly of /tmp/aml-C7JCA1, Thu Oct  7 18:29:19 2021
  *
  * Original Table Header:
  * Signature"DSDT"
- * Length   0x2061 (8289)
+ * Length   0x2187 (8583)
  * Revision 0x01  32-bit table (V1), no 64-bit math support
- * Checksum 0xF9
+ * Checksum 0x8D
  * OEM ID   "BOCHS "
  * OEM Table ID "BXPC"
  * OEM Revision 0x0001 (1)
  * Compiler ID  "BXPC"
  * Compiler Version 0x0001 (1)
  */
 DefinitionBlock ("", "DSDT", 1, "BOCHS ", "BXPC", 0x0001)
 {
 Scope (\)
 {
 OperationRegion (DBG, SystemIO, 0x0402, One)
 Field (DBG, ByteAcc, NoLock, Preserve)
 {
 DBGB,   8
 }

@@ -3265,23 +3265,95 @@
 Method (_S1D, 0, NotSerialized)  // _S1D: S1 Device State
 {
 Return (Zero)
 }

 Method (_S2D, 0, NotSerialized)  // _S2D: S2 Device State
 {
 Return (Zero)
 }

 Method (_S3D, 0, NotSerialized)  // _S3D: S3 Device State
 {
 Return (Zero)
 }
 }

+Device (S10)
+{
+Name (_ADR, 0x0002)  // _ADR: Address
+Name (BSEL, One)
+Device (S00)
+{
+Name (_SUN, Zero)  // _SUN: Slot User Number
+Name (_ADR, Zero)  // _ADR: Address
+Method (_EJ0, 1, NotSerialized)  // _EJx: Eject Device, 
x=0-9
+{
+PCEJ (BSEL, _SUN)
+}
+
+Method (_DSM, 4, Serialized)  // _DSM: Device-Specific 
Method
+{
+Return (PDSM (Arg0, Arg1, Arg2, Arg3, BSEL, _SUN))
+}
+}
+
+Method (DVNT, 2, NotSerialized)
+{
+If ((Arg0 & One))
+{
+Notify (S00, Arg1)
+}
+}
+
+Method (PCNT, 0, NotSerialized)
+{
+BNUM = One
+DVNT (PCIU, One)
+DVNT (PCID, 0x03)
+}
+}
+
+Device (S19)
+{
+Name (_ADR, 0x00030001)  // _ADR: Address
+Name (BSEL, Zero)
+Device (S00)
+{
+Name (_SUN, Zero)  // _SUN: Slot User Number
+Name (_ADR, Zero)  // _ADR: Address
+Method (_EJ0, 1, NotSerialized)  // _EJx: Eject Device, 
x=0-9
+{
+PCEJ (BSEL, _SUN)
+}
+
+Method (_DSM, 4, Serialized)  // _DSM: Device-Specific 
Method
+{
+Return (PDSM (Arg0, Arg1, Arg2, Arg3, BSEL, _SUN))
+}
+}
+
+Method (DVNT, 2, NotSerialized)
+{
+If ((Arg0 & One))
+{
+Notify (S00, Arg1)
+}
+}
+
+Method (PCNT, 0, NotSerialized)
+{
+BNUM = Zero
+DVNT (PCIU, One)
+DVNT (PCID, 0x03)
+}
+}
+
 Method (PCNT, 0, NotSerialized)
 {
+^S19.PCNT ()
+^S10.PCNT ()
 }
 }
 }
 }

Signed-off-by: Ani Sinha 
Message-Id: <20211007135750.1277213-4-...@anisinha.ca>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Acked-by: 

[PATCH 24/47] tests/acpi/bios-tables-test: add and allow changes to a new q35 DSDT table blob

2021-12-14 Thread Michael Roth
From: Ani Sinha 

We are adding a new unit test to cover the acpi hotplug support in q35 for
multi-function bridges. This test uses a new table DSDT.multi-bridge.
We need to allow changes in DSDT acpi table for addition of this new
unit test.

Signed-off-by: Ani Sinha 
Message-Id: <20211007135750.1277213-2-...@anisinha.ca>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Acked-by: Igor Mammedov 
(cherry picked from commit 6dcb1cc9512c6b4cd8f85abc537abaf6f6c0738b)
Signed-off-by: Michael Roth 
---
 tests/data/acpi/q35/DSDT.multi-bridge   | 0
 tests/qtest/bios-tables-test-allowed-diff.h | 1 +
 2 files changed, 1 insertion(+)
 create mode 100644 tests/data/acpi/q35/DSDT.multi-bridge

diff --git a/tests/data/acpi/q35/DSDT.multi-bridge 
b/tests/data/acpi/q35/DSDT.multi-bridge
new file mode 100644
index 00..e69de29bb2
diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
b/tests/qtest/bios-tables-test-allowed-diff.h
index dfb8523c8b..dabc024f53 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1 +1,2 @@
 /* List of comma-separated changed AML files to ignore */
+"tests/data/acpi/q35/DSDT.multi-bridge",
-- 
2.25.1




[PATCH 38/47] vfio: Fix memory leak of hostwin

2021-12-14 Thread Michael Roth
From: Peng Liang 

hostwin is allocated and added to hostwin_list in vfio_host_win_add, but
it is only deleted from hostwin_list in vfio_host_win_del, which causes
a memory leak.  Also, freeing all elements in hostwin_list is missing in
vfio_disconnect_container.

Fix: 2e4109de8e58 ("vfio/spapr: Create DMA window dynamically (SPAPR IOMMU v2)")
CC: qemu-sta...@nongnu.org
Signed-off-by: Peng Liang 
Link: https://lore.kernel.org/r/2027014739.1839263-1-liangpen...@huawei.com
Signed-off-by: Alex Williamson 
(cherry picked from commit f3bc3a73c908df15966e66f88d5a633bd42fd029)
Signed-off-by: Michael Roth 
---
 hw/vfio/common.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 8728d4d5c2..af37346aca 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -551,6 +551,7 @@ static int vfio_host_win_del(VFIOContainer *container, 
hwaddr min_iova,
 QLIST_FOREACH(hostwin, >hostwin_list, hostwin_next) {
 if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) {
 QLIST_REMOVE(hostwin, hostwin_next);
+g_free(hostwin);
 return 0;
 }
 }
@@ -2230,6 +2231,7 @@ static void vfio_disconnect_container(VFIOGroup *group)
 if (QLIST_EMPTY(>group_list)) {
 VFIOAddressSpace *space = container->space;
 VFIOGuestIOMMU *giommu, *tmp;
+VFIOHostDMAWindow *hostwin, *next;
 
 QLIST_REMOVE(container, next);
 
@@ -2240,6 +2242,12 @@ static void vfio_disconnect_container(VFIOGroup *group)
 g_free(giommu);
 }
 
+QLIST_FOREACH_SAFE(hostwin, >hostwin_list, hostwin_next,
+   next) {
+QLIST_REMOVE(hostwin, hostwin_next);
+g_free(hostwin);
+}
+
 trace_vfio_disconnect_container(container->fd);
 close(container->fd);
 g_free(container);
-- 
2.25.1




[PATCH 23/47] pci: fix PCI resource reserve capability on BE

2021-12-14 Thread Michael Roth
From: "Michael S. Tsirkin" 

PCI resource reserve capability should use LE format as all other PCI
things. If we don't then seabios won't boot:

=== PCI new allocation pass #1 ===
PCI: check devices
PCI: QEMU resource reserve cap: size 10 type io
PCI: secondary bus 1 size 10 type io
PCI: secondary bus 1 size 0020 type mem
PCI: secondary bus 1 size 0020 type prefmem
=== PCI new allocation pass #2 ===
PCI: out of I/O address space

This became more important since we started reserving IO by default,
previously no one noticed.

Fixes: e2a6290aab ("hw/pcie-root-port: Fix hotplug for PCI devices requiring 
IO")
Cc: marcel.apfelb...@gmail.com
Fixes: 226263fb5c ("hw/pci: add QEMU-specific PCI capability to the Generic PCI 
Express Root Port")
Cc: zuban...@gmail.com
Fixes: 6755e618d0 ("hw/pci: add PCI resource reserve capability to legacy PCI 
bridge")
Cc: jing2@linux.intel.com
Tested-by: Thomas Huth 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Philippe Mathieu-Daudé 
(cherry picked from commit 0e464f7d993113119f0fd17b890831440734ce15)
Signed-off-by: Michael Roth 
---
 hw/pci/pci_bridge.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
index 3789c17edc..6a1e8e433c 100644
--- a/hw/pci/pci_bridge.c
+++ b/hw/pci/pci_bridge.c
@@ -448,11 +448,11 @@ int pci_bridge_qemu_reserve_cap_init(PCIDevice *dev, int 
cap_offset,
 PCIBridgeQemuCap cap = {
 .len = cap_len,
 .type = REDHAT_PCI_CAP_RESOURCE_RESERVE,
-.bus_res = res_reserve.bus,
-.io = res_reserve.io,
-.mem = res_reserve.mem_non_pref,
-.mem_pref_32 = res_reserve.mem_pref_32,
-.mem_pref_64 = res_reserve.mem_pref_64
+.bus_res = cpu_to_le32(res_reserve.bus),
+.io = cpu_to_le64(res_reserve.io),
+.mem = cpu_to_le32(res_reserve.mem_non_pref),
+.mem_pref_32 = cpu_to_le32(res_reserve.mem_pref_32),
+.mem_pref_64 = cpu_to_le64(res_reserve.mem_pref_64)
 };
 
 int offset = pci_add_capability(dev, PCI_CAP_ID_VNDR,
-- 
2.25.1




[PATCH 32/47] hw: m68k: virt: Add compat machine for 6.1

2021-12-14 Thread Michael Roth
From: Laurent Vivier 

Add the missing machine type for m68k/virt

Cc: qemu-sta...@nongnu.org
Signed-off-by: Laurent Vivier 
Message-Id: <20211106194158.4068596-2-laur...@vivier.eu>
Signed-off-by: Laurent Vivier 
(cherry picked from commit 6837f299762679429924242a63f16490862578e3)
Signed-off-by: Michael Roth 
---
 hw/m68k/virt.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/hw/m68k/virt.c b/hw/m68k/virt.c
index 4e8bce5aa6..0d9e3f83c1 100644
--- a/hw/m68k/virt.c
+++ b/hw/m68k/virt.c
@@ -304,7 +304,14 @@ type_init(virt_machine_register_types)
 } \
 type_init(machvirt_machine_##major##_##minor##_init);
 
+static void virt_machine_6_1_options(MachineClass *mc)
+{
+}
+DEFINE_VIRT_MACHINE(6, 1, true)
+
 static void virt_machine_6_0_options(MachineClass *mc)
 {
+virt_machine_6_1_options(mc);
+compat_props_add(mc->compat_props, hw_compat_6_0, hw_compat_6_0_len);
 }
-DEFINE_VIRT_MACHINE(6, 0, true)
+DEFINE_VIRT_MACHINE(6, 0, false)
-- 
2.25.1




[PATCH 21/47] bios-tables-test: Update ACPI DSDT table golden blobs for q35

2021-12-14 Thread Michael Roth
From: Ani Sinha 

We have modified the IO address range for ACPI pci hotplug in q35. See change:

5adcc9e39e6a5 ("hw/i386/acpi: fix conflicting IO address range for acpi pci 
hotplug in q35")

The ACPI DSDT table golden blobs must be regenrated in order to make the unit 
tests
pass. This change updates the golden ACPI DSDT table blobs.

Following is the ASL diff between the blobs:

@@ -1,30 +1,30 @@
 /*
  * Intel ACPI Component Architecture
  * AML/ASL+ Disassembler version 20190509 (64-bit version)
  * Copyright (c) 2000 - 2019 Intel Corporation
  *
  * Disassembling to symbolic ASL+ operators
  *
- * Disassembly of tests/data/acpi/q35/DSDT, Tue Sep 14 09:04:06 2021
+ * Disassembly of /tmp/aml-52DP90, Tue Sep 14 09:04:06 2021
  *
  * Original Table Header:
  * Signature"DSDT"
  * Length   0x2061 (8289)
  * Revision 0x01  32-bit table (V1), no 64-bit math support
- * Checksum 0xE5
+ * Checksum 0xF9
  * OEM ID   "BOCHS "
  * OEM Table ID "BXPC"
  * OEM Revision 0x0001 (1)
  * Compiler ID  "BXPC"
  * Compiler Version 0x0001 (1)
  */
 DefinitionBlock ("", "DSDT", 1, "BOCHS ", "BXPC", 0x0001)
 {
 Scope (\)
 {
 OperationRegion (DBG, SystemIO, 0x0402, One)
 Field (DBG, ByteAcc, NoLock, Preserve)
 {
 DBGB,   8
 }

@@ -226,46 +226,46 @@
 Name (_CRS, ResourceTemplate ()  // _CRS: Current Resource Settings
 {
 IO (Decode16,
 0x0070, // Range Minimum
 0x0070, // Range Maximum
 0x01,   // Alignment
 0x08,   // Length
 )
 IRQNoFlags ()
 {8}
 })
 }
 }

 Scope (_SB.PCI0)
 {
-OperationRegion (PCST, SystemIO, 0x0CC4, 0x08)
+OperationRegion (PCST, SystemIO, 0x0CC0, 0x08)
 Field (PCST, DWordAcc, NoLock, WriteAsZeros)
 {
 PCIU,   32,
 PCID,   32
 }

-OperationRegion (SEJ, SystemIO, 0x0CCC, 0x04)
+OperationRegion (SEJ, SystemIO, 0x0CC8, 0x04)
 Field (SEJ, DWordAcc, NoLock, WriteAsZeros)
 {
 B0EJ,   32
 }

-OperationRegion (BNMR, SystemIO, 0x0CD4, 0x08)
+OperationRegion (BNMR, SystemIO, 0x0CD0, 0x08)
 Field (BNMR, DWordAcc, NoLock, WriteAsZeros)
 {
 BNUM,   32,
 PIDX,   32
 }

 Mutex (BLCK, 0x00)
 Method (PCEJ, 2, NotSerialized)
 {
 Acquire (BLCK, 0x)
 BNUM = Arg0
 B0EJ = (One << Arg1)
 Release (BLCK)
 Return (Zero)
 }

@@ -3185,34 +3185,34 @@
 0x0620, // Range Minimum
 0x0620, // Range Maximum
 0x01,   // Alignment
 0x10,   // Length
 )
 })
 }

 Device (PHPR)
 {
 Name (_HID, "PNP0A06" /* Generic Container Device */)  // _HID: 
Hardware ID
 Name (_UID, "PCI Hotplug resources")  // _UID: Unique ID
 Name (_STA, 0x0B)  // _STA: Status
 Name (_CRS, ResourceTemplate ()  // _CRS: Current Resource Settings
 {
 IO (Decode16,
-0x0CC4, // Range Minimum
-0x0CC4, // Range Maximum
+0x0CC0, // Range Minimum
+0x0CC0, // Range Maximum
 0x01,   // Alignment
 0x18,   // Length
 )
 })
 }
 }

 Scope (\)
 {
 Name (_S3, Package (0x04)  // _S3_: S3 System State
 {
 One,
 One,
 Zero,
 Zero
 })

Signed-off-by: Ani Sinha 
Acked-by: Igor Mammedov 
Message-Id: <20210916132838.3469580-4-...@anisinha.ca>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
(cherry picked from commit 500eb21cff08dfb0478db9b34f2fdba69eb31496)
*drop dependency on 75539b886a ("tests: acpi: tpm1.2: Add expected TPM 1.2 ACPI 
blobs")
Signed-off-by: Michael Roth 
---
 tests/data/acpi/q35/DSDT| Bin 8289 -> 8289 bytes
 tests/data/acpi/q35/DSDT.acpihmat   | Bin 9614 -> 9614 bytes
 tests/data/acpi/q35/DSDT.bridge | Bin 11003 -> 11003 bytes
 tests/data/acpi/q35/DSDT.cphp   | Bin 8753 -> 8753 bytes
 tests/data/acpi/q35/DSDT.dimmpxm| Bin 9943 -> 9943 bytes
 tests/data/acpi/q35/DSDT.ipmibt | Bin 8364 -> 8364 bytes
 tests/data/acpi/q35/DSDT.memhp  | Bin 9648 -> 9648 bytes
 tests/data/acpi/q35/DSDT.mmio64 | Bin 

[PATCH 36/47] virtio: use virtio accessor to access packed descriptor flags

2021-12-14 Thread Michael Roth
From: Jason Wang 

We used to access packed descriptor flags via
address_space_{write|read}_cached(). When we hit the cache, memcpy()
is used which is not an atomic operation which may lead a wrong value
is read or wrote.

So this patch switches to use virito_{stw|lduw}_phys_cached() to make
sure the aceess is atomic.

Fixes: 86044b24e865f ("virtio: basic packed virtqueue support")
Cc: qemu-sta...@nongnu.org
Signed-off-by: Jason Wang 
Message-Id: <2021063854.29060-1-jasow...@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
(cherry picked from commit f463e761a41ee71e59892121e1c74d9c25c985d2)
Signed-off-by: Michael Roth 
---
 hw/virtio/virtio.c | 11 ---
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 874377f37a..2dbccb6b3f 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -509,11 +509,9 @@ static void vring_packed_desc_read_flags(VirtIODevice 
*vdev,
  MemoryRegionCache *cache,
  int i)
 {
-address_space_read_cached(cache,
-  i * sizeof(VRingPackedDesc) +
-  offsetof(VRingPackedDesc, flags),
-  flags, sizeof(*flags));
-virtio_tswap16s(vdev, flags);
+hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, 
flags);
+
+*flags = virtio_lduw_phys_cached(vdev, cache, off);
 }
 
 static void vring_packed_desc_read(VirtIODevice *vdev,
@@ -566,8 +564,7 @@ static void vring_packed_desc_write_flags(VirtIODevice 
*vdev,
 {
 hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, 
flags);
 
-virtio_tswap16s(vdev, >flags);
-address_space_write_cached(cache, off, >flags, sizeof(desc->flags));
+virtio_stw_phys_cached(vdev, cache, off, desc->flags);
 address_space_cache_invalidate(cache, off, sizeof(desc->flags));
 }
 
-- 
2.25.1




[PATCH 22/47] block: introduce max_hw_iov for use in scsi-generic

2021-12-14 Thread Michael Roth
From: Paolo Bonzini 

Linux limits the size of iovecs to 1024 (UIO_MAXIOV in the kernel
sources, IOV_MAX in POSIX).  Because of this, on some host adapters
requests with many iovecs are rejected with -EINVAL by the
io_submit() or readv()/writev() system calls.

In fact, the same limit applies to SG_IO as well.  To fix both the
EINVAL and the possible performance issues from using fewer iovecs
than allowed by Linux (some HBAs have max_segments as low as 128),
introduce a separate entry in BlockLimits to hold the max_segments
value from sysfs.  This new limit is used only for SG_IO and clamped
to bs->bl.max_iov anyway, just like max_hw_transfer is clamped to
bs->bl.max_transfer.

Reported-by: Halil Pasic 
Cc: Hanna Reitz 
Cc: Kevin Wolf 
Cc: qemu-bl...@nongnu.org
Cc: qemu-sta...@nongnu.org
Fixes: 18473467d5 ("file-posix: try BLKSECTGET on block devices too, do not 
round to power of 2", 2021-06-25)
Signed-off-by: Paolo Bonzini 
Message-Id: <20210923130436.1187591-1-pbonz...@redhat.com>
Signed-off-by: Kevin Wolf 
(cherry picked from commit cc071629539dc1f303175a7e2d4ab854c0a8b20f)
Signed-off-by: Michael Roth 
---
 block/block-backend.c  | 6 ++
 block/file-posix.c | 2 +-
 block/io.c | 1 +
 hw/scsi/scsi-generic.c | 2 +-
 include/block/block_int.h  | 7 +++
 include/sysemu/block-backend.h | 1 +
 6 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index deb55c272e..6320752aa2 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1978,6 +1978,12 @@ uint32_t blk_get_max_transfer(BlockBackend *blk)
 return ROUND_DOWN(max, blk_get_request_alignment(blk));
 }
 
+int blk_get_max_hw_iov(BlockBackend *blk)
+{
+return MIN_NON_ZERO(blk->root->bs->bl.max_hw_iov,
+blk->root->bs->bl.max_iov);
+}
+
 int blk_get_max_iov(BlockBackend *blk)
 {
 return blk->root->bs->bl.max_iov;
diff --git a/block/file-posix.c b/block/file-posix.c
index cb9bffe047..1567edb3d5 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1273,7 +1273,7 @@ static void raw_refresh_limits(BlockDriverState *bs, 
Error **errp)
 
 ret = hdev_get_max_segments(s->fd, );
 if (ret > 0) {
-bs->bl.max_iov = ret;
+bs->bl.max_hw_iov = ret;
 }
 }
 }
diff --git a/block/io.c b/block/io.c
index a19942718b..f38e7f81d8 100644
--- a/block/io.c
+++ b/block/io.c
@@ -136,6 +136,7 @@ static void bdrv_merge_limits(BlockLimits *dst, const 
BlockLimits *src)
 dst->min_mem_alignment = MAX(dst->min_mem_alignment,
  src->min_mem_alignment);
 dst->max_iov = MIN_NON_ZERO(dst->max_iov, src->max_iov);
+dst->max_hw_iov = MIN_NON_ZERO(dst->max_hw_iov, src->max_hw_iov);
 }
 
 typedef struct BdrvRefreshLimitsState {
diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
index 665baf900e..0306ccc7b1 100644
--- a/hw/scsi/scsi-generic.c
+++ b/hw/scsi/scsi-generic.c
@@ -180,7 +180,7 @@ static int scsi_handle_inquiry_reply(SCSIGenericReq *r, 
SCSIDevice *s, int len)
 page = r->req.cmd.buf[2];
 if (page == 0xb0) {
 uint64_t max_transfer = blk_get_max_hw_transfer(s->conf.blk);
-uint32_t max_iov = blk_get_max_iov(s->conf.blk);
+uint32_t max_iov = blk_get_max_hw_iov(s->conf.blk);
 
 assert(max_transfer);
 max_transfer = MIN_NON_ZERO(max_transfer, max_iov * 
qemu_real_host_page_size)
diff --git a/include/block/block_int.h b/include/block/block_int.h
index f1a54db0f8..c31cbd034a 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -702,6 +702,13 @@ typedef struct BlockLimits {
  */
 uint64_t max_hw_transfer;
 
+/* Maximal number of scatter/gather elements allowed by the hardware.
+ * Applies whenever transfers to the device bypass the kernel I/O
+ * scheduler, for example with SG_IO.  If larger than max_iov
+ * or if zero, blk_get_max_hw_iov will fall back to max_iov.
+ */
+int max_hw_iov;
+
 /* memory alignment, in bytes so that no bounce buffer is needed */
 size_t min_mem_alignment;
 
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 9ac5f7bbd3..5daec61f6e 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -210,6 +210,7 @@ uint32_t blk_get_request_alignment(BlockBackend *blk);
 uint32_t blk_get_max_transfer(BlockBackend *blk);
 uint64_t blk_get_max_hw_transfer(BlockBackend *blk);
 int blk_get_max_iov(BlockBackend *blk);
+int blk_get_max_hw_iov(BlockBackend *blk);
 void blk_set_guest_block_size(BlockBackend *blk, int align);
 void *blk_try_blockalign(BlockBackend *blk, size_t size);
 void *blk_blockalign(BlockBackend *blk, size_t size);
-- 
2.25.1




[PATCH 31/47] hw/scsi/scsi-disk: MODE_PAGE_ALLS not allowed in MODE SELECT commands

2021-12-14 Thread Michael Roth
From: Mauro Matteo Cascella 

This avoids an off-by-one read of 'mode_sense_valid' buffer in
hw/scsi/scsi-disk.c:mode_sense_page().

Fixes: CVE-2021-3930
Cc: qemu-sta...@nongnu.org
Reported-by: Alexander Bulekov 
Fixes: a8f4bbe2900 ("scsi-disk: store valid mode pages in a table")
Fixes: #546
Reported-by: Qiuhao Li 
Signed-off-by: Mauro Matteo Cascella 
Signed-off-by: Paolo Bonzini 
(cherry picked from commit b3af7fdf9cc537f8f0dd3e2423d83f5c99a457e8)
Signed-off-by: Michael Roth 
---
 hw/scsi/scsi-disk.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index e8a547dbb7..d4914178ea 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -1087,6 +1087,7 @@ static int mode_sense_page(SCSIDiskState *s, int page, 
uint8_t **p_outbuf,
 uint8_t *p = *p_outbuf + 2;
 int length;
 
+assert(page < ARRAY_SIZE(mode_sense_valid));
 if ((mode_sense_valid[page] & (1 << s->qdev.type)) == 0) {
 return -1;
 }
@@ -1428,6 +1429,11 @@ static int scsi_disk_check_mode_select(SCSIDiskState *s, 
int page,
 return -1;
 }
 
+/* MODE_PAGE_ALLS is only valid for MODE SENSE commands */
+if (page == MODE_PAGE_ALLS) {
+return -1;
+}
+
 p = mode_current;
 memset(mode_current, 0, inlen + 2);
 len = mode_sense_page(s, page, , 0);
-- 
2.25.1




[PATCH 20/47] hw/i386/acpi: fix conflicting IO address range for acpi pci hotplug in q35

2021-12-14 Thread Michael Roth
From: Ani Sinha 

Change caf108bc58790 ("hw/i386/acpi-build: Add ACPI PCI hot-plug methods to 
Q35")
selects an IO address range for acpi based PCI hotplug for q35 arbitrarily. It
starts at address 0x0cc4 and ends at 0x0cdb. At the time when the patch was
written but the final version of the patch was not yet pushed upstream, this
address range was free and did not conflict with any other IO address ranges.
However, with the following change, this address range was no
longer conflict free as in this change, the IO address range
(value of ACPI_PCIHP_SIZE) was incremented by four bytes:

b32bd763a1ca92 ("pci: introduce acpi-index property for PCI device")

This can be seen from the output of QMP command 'info mtree' :

0600-0603 (prio 0, i/o): acpi-evt
0604-0605 (prio 0, i/o): acpi-cnt
0608-060b (prio 0, i/o): acpi-tmr
0620-062f (prio 0, i/o): acpi-gpe0
0630-0637 (prio 0, i/o): acpi-smi
0cc4-0cdb (prio 0, i/o): acpi-pci-hotplug
0cd8-0ce3 (prio 0, i/o): acpi-cpu-hotplug

It shows that there is a region of conflict between IO regions of acpi
pci hotplug and acpi cpu hotplug.

Unfortunately, the change caf108bc58790 did not update the IO address range
appropriately before it was pushed upstream to accommodate the increased
length of the IO address space introduced in change b32bd763a1ca92.

Due to this bug, windows guests complain 'This device cannot find
enough free resources it can use' in the device manager panel for extended
IO buses. This issue also breaks the correct functioning of pci hotplug as the
following shows that the IO space for pci hotplug has been truncated:

(qemu) info mtree -f
FlatView #0
 AS "I/O", root: io
 Root memory region: io
  0cc4-0cd7 (prio 0, i/o): acpi-pci-hotplug
  0cd8-0cf7 (prio 0, i/o): acpi-cpu-hotplug

Therefore, in this fix, we adjust the IO address range for the acpi pci
hotplug so that it does not conflict with cpu hotplug and there is no
truncation of IO spaces. The starting IO address of PCI hotplug region
has been decremented by four bytes in order to accommodate four byte
increment in the IO address space introduced by change
b32bd763a1ca92 ("pci: introduce acpi-index property for PCI device")

After fixing, the following are the corrected IO ranges:

0600-0603 (prio 0, i/o): acpi-evt
0604-0605 (prio 0, i/o): acpi-cnt
0608-060b (prio 0, i/o): acpi-tmr
0620-062f (prio 0, i/o): acpi-gpe0
0630-0637 (prio 0, i/o): acpi-smi
0cc0-0cd7 (prio 0, i/o): acpi-pci-hotplug
0cd8-0ce3 (prio 0, i/o): acpi-cpu-hotplug

This change has been tested using a Windows Server 2019 guest VM. Windows
no longer complains after this change.

Fixes: caf108bc58790 ("hw/i386/acpi-build: Add ACPI PCI hot-plug methods to 
Q35")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/561

Signed-off-by: Ani Sinha 
Reviewed-by: Igor Mammedov 
Reviewed-by: Julia Suvorova 
Message-Id: <20210916132838.3469580-3-...@anisinha.ca>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
(cherry picked from commit 0e780da76a6fe283a20283856718bca3986c104f)
Signed-off-by: Michael Roth 
---
 include/hw/acpi/ich9.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h
index a329ce43ab..f04f1791bd 100644
--- a/include/hw/acpi/ich9.h
+++ b/include/hw/acpi/ich9.h
@@ -29,7 +29,7 @@
 #include "hw/acpi/acpi_dev_interface.h"
 #include "hw/acpi/tco.h"
 
-#define ACPI_PCIHP_ADDR_ICH9 0x0cc4
+#define ACPI_PCIHP_ADDR_ICH9 0x0cc0
 
 typedef struct ICH9LPCPMRegs {
 /*
-- 
2.25.1




[PATCH 35/47] pcie: rename 'native-hotplug' to 'x-native-hotplug'

2021-12-14 Thread Michael Roth
From: Igor Mammedov 

Mark property as experimental/internal adding 'x-' prefix.

Property was introduced in 6.1 and it should have provided
ability to turn on native PCIE hotplug on port even when
ACPI PCI hotplug is in use is user explicitly sets property
on CLI. However that never worked since slot is wired to
ACPI hotplug controller.
Another non-intended usecase: disable native hotplug on slot
when APCI based hotplug is disabled, which works but slot has
'hotplug' property for this taks.

It should be relatively safe to rename it to experimental
as no users should exist for it and given that the property
is broken we don't really want to leave it around for much
longer lest users start using it.

Signed-off-by: Igor Mammedov 
Reviewed-by: Ani Sinha 
Message-Id: <2022110857.3116853-2-imamm...@redhat.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
(cherry picked from commit 2aa1842d6d79dcd1b84c58eeb44591a99a9e56df)
Signed-off-by: Michael Roth 
---
 hw/i386/pc_q35.c   | 2 +-
 hw/pci/pcie_port.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 04b4a4788d..9070544a90 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -243,7 +243,7 @@ static void pc_q35_init(MachineState *machine)
   NULL);
 
 if (acpi_pcihp) {
-object_register_sugar_prop(TYPE_PCIE_SLOT, "native-hotplug",
+object_register_sugar_prop(TYPE_PCIE_SLOT, "x-native-hotplug",
"false", true);
 }
 
diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c
index da850e8dde..e95c1e5519 100644
--- a/hw/pci/pcie_port.c
+++ b/hw/pci/pcie_port.c
@@ -148,7 +148,7 @@ static Property pcie_slot_props[] = {
 DEFINE_PROP_UINT8("chassis", PCIESlot, chassis, 0),
 DEFINE_PROP_UINT16("slot", PCIESlot, slot, 0),
 DEFINE_PROP_BOOL("hotplug", PCIESlot, hotplug, true),
-DEFINE_PROP_BOOL("native-hotplug", PCIESlot, native_hotplug, true),
+DEFINE_PROP_BOOL("x-native-hotplug", PCIESlot, native_hotplug, true),
 DEFINE_PROP_END_OF_LIST()
 };
 
-- 
2.25.1




[PATCH 01/47] virtio-balloon: don't start free page hinting if postcopy is possible

2021-12-14 Thread Michael Roth
From: David Hildenbrand 

Postcopy never worked properly with 'free-page-hint=on', as there are
at least two issues:

1) With postcopy, the guest will never receive a VIRTIO_BALLOON_CMD_ID_DONE
   and consequently won't release free pages back to the OS once
   migration finishes.

   The issue is that for postcopy, we won't do a final bitmap sync while
   the guest is stopped on the source and
   virtio_balloon_free_page_hint_notify() will only call
   virtio_balloon_free_page_done() on the source during
   PRECOPY_NOTIFY_CLEANUP, after the VM state was already migrated to
   the destination.

2) Once the VM touches a page on the destination that has been excluded
   from migration on the source via qemu_guest_free_page_hint() while
   postcopy is active, that thread will stall until postcopy finishes
   and all threads are woken up. (with older Linux kernels that won't
   retry faults when woken up via userfaultfd, we might actually get a
   SEGFAULT)

   The issue is that the source will refuse to migrate any pages that
   are not marked as dirty in the dirty bmap -- for example, because the
   page might just have been sent. Consequently, the faulting thread will
   stall, waiting for the page to be migrated -- which could take quite
   a while and result in guest OS issues.

While we could fix 1) comparatively easily, 2) is harder to get right and
might require more involved RAM migration changes on source and destination
[1].

As it never worked properly, let's not start free page hinting in the
precopy notifier if the postcopy migration capability was enabled to fix
it easily. Capabilities cannot be enabled once migration is already
running.

Note 1: in the future we might either adjust migration code on the source
to track pages that have actually been sent or adjust
migration code on source and destination  to eventually send
pages multiple times from the source and and deal with pages
that are sent multiple times on the destination.

Note 2: virtio-mem has similar issues, however, access to "unplugged"
memory by the guest is very rare and we would have to be very
lucky for it to happen during migration. The spec states
"The driver SHOULD NOT read from unplugged memory blocks ..."
and "The driver MUST NOT write to unplugged memory blocks".
virtio-mem will move away from virtio_balloon_free_page_done()
soon and handle this case explicitly on the destination.

[1] https://lkml.kernel.org/r/e79fd18c-aa62-c1d8-c7f3-ba3fc2c25...@redhat.com

Fixes: c13c4153f76d ("virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT")
Cc: qemu-sta...@nongnu.org
Cc: Wei Wang 
Cc: Michael S. Tsirkin 
Cc: Philippe Mathieu-Daudé 
Cc: Alexander Duyck 
Cc: Juan Quintela 
Cc: "Dr. David Alan Gilbert" 
Cc: Peter Xu 
Signed-off-by: David Hildenbrand 
Message-Id: <20210708095339.20274-2-da...@redhat.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Peter Xu 
(cherry picked from commit fd51e54fa10221e5a8add894c38cc1cf199f4bc4)
Signed-off-by: Michael Roth 
---
 hw/virtio/virtio-balloon.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 4b5d9e5e50..ae7867a8db 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -30,6 +30,7 @@
 #include "trace.h"
 #include "qemu/error-report.h"
 #include "migration/misc.h"
+#include "migration/migration.h"
 
 #include "hw/virtio/virtio-bus.h"
 #include "hw/virtio/virtio-access.h"
@@ -662,6 +663,18 @@ virtio_balloon_free_page_hint_notify(NotifierWithReturn 
*n, void *data)
 return 0;
 }
 
+/*
+ * Pages hinted via qemu_guest_free_page_hint() are cleared from the dirty
+ * bitmap and will not get migrated, especially also not when the postcopy
+ * destination starts using them and requests migration from the source; 
the
+ * faulting thread will stall until postcopy migration finishes and
+ * all threads are woken up. Let's not start free page hinting if postcopy
+ * is possible.
+ */
+if (migrate_postcopy_ram()) {
+return 0;
+}
+
 switch (pnd->reason) {
 case PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC:
 virtio_balloon_free_page_stop(dev);
-- 
2.25.1




[PATCH 16/47] vhost-vsock: fix migration issue when seqpacket is supported

2021-12-14 Thread Michael Roth
From: Stefano Garzarella 

Commit 1e08fd0a46 ("vhost-vsock: SOCK_SEQPACKET feature bit support")
enabled the SEQPACKET feature bit.
This commit is released with QEMU 6.1, so if we try to migrate a VM where
the host kernel supports SEQPACKET but machine type version is less than
6.1, we get the following errors:

Features 0x13002 unsupported. Allowed features: 0x17900
Failed to load virtio-vhost_vsock:virtio
error while loading state for instance 0x0 of device 
':00:05.0/virtio-vhost_vsock'
load of migration failed: Operation not permitted

Let's disable the feature bit for machine types < 6.1.
We add a new OnOffAuto property for this, called `seqpacket`.
When it is `auto` (default), QEMU behaves as before, trying to enable the
feature, when it is `on` QEMU will fail if the backend (vhost-vsock
kernel module) doesn't support it.

Fixes: 1e08fd0a46 ("vhost-vsock: SOCK_SEQPACKET feature bit support")
Cc: qemu-sta...@nongnu.org
Reported-by: Jiang Wang 
Signed-off-by: Stefano Garzarella 
Message-Id: <20210921161642.206461-2-sgarz...@redhat.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
(cherry picked from commit d6a9378f47515c6d70dbff4912c5740c98709880)
Signed-off-by: Michael Roth 
---
 hw/core/machine.c   |  1 +
 hw/virtio/vhost-vsock.c | 19 ---
 include/hw/virtio/vhost-vsock.h |  3 +++
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 54e040587d..2cf2f321f9 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -43,6 +43,7 @@ GlobalProperty hw_compat_6_0[] = {
 { "nvme-ns", "eui64-default", "off"},
 { "e1000", "init-vet", "off" },
 { "e1000e", "init-vet", "off" },
+{ "vhost-vsock-device", "seqpacket", "off" },
 };
 const size_t hw_compat_6_0_len = G_N_ELEMENTS(hw_compat_6_0);
 
diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c
index 1b1a5c70ed..dade0da031 100644
--- a/hw/virtio/vhost-vsock.c
+++ b/hw/virtio/vhost-vsock.c
@@ -114,10 +114,21 @@ static uint64_t vhost_vsock_get_features(VirtIODevice 
*vdev,
  Error **errp)
 {
 VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
+VHostVSock *vsock = VHOST_VSOCK(vdev);
+
+if (vsock->seqpacket != ON_OFF_AUTO_OFF) {
+virtio_add_feature(_features, VIRTIO_VSOCK_F_SEQPACKET);
+}
+
+requested_features = vhost_get_features(>vhost_dev, feature_bits,
+requested_features);
+
+if (vsock->seqpacket == ON_OFF_AUTO_ON &&
+!virtio_has_feature(requested_features, VIRTIO_VSOCK_F_SEQPACKET)) {
+error_setg(errp, "vhost-vsock backend doesn't support seqpacket");
+}
 
-virtio_add_feature(_features, VIRTIO_VSOCK_F_SEQPACKET);
-return vhost_get_features(>vhost_dev, feature_bits,
-requested_features);
+return requested_features;
 }
 
 static const VMStateDescription vmstate_virtio_vhost_vsock = {
@@ -218,6 +229,8 @@ static void vhost_vsock_device_unrealize(DeviceState *dev)
 static Property vhost_vsock_properties[] = {
 DEFINE_PROP_UINT64("guest-cid", VHostVSock, conf.guest_cid, 0),
 DEFINE_PROP_STRING("vhostfd", VHostVSock, conf.vhostfd),
+DEFINE_PROP_ON_OFF_AUTO("seqpacket", VHostVSock, seqpacket,
+ON_OFF_AUTO_AUTO),
 DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/include/hw/virtio/vhost-vsock.h b/include/hw/virtio/vhost-vsock.h
index 84f4e727c7..3f121a624f 100644
--- a/include/hw/virtio/vhost-vsock.h
+++ b/include/hw/virtio/vhost-vsock.h
@@ -30,6 +30,9 @@ struct VHostVSock {
 VHostVSockCommon parent;
 VHostVSockConf conf;
 
+/* features */
+OnOffAuto seqpacket;
+
 /*< public >*/
 };
 
-- 
2.25.1




[PATCH 33/47] rcu: Introduce force_rcu notifier

2021-12-14 Thread Michael Roth
From: Greg Kurz 

The drain_rcu_call() function can be blocked as long as an RCU reader
stays in a read-side critical section. This is typically what happens
when a TCG vCPU is executing a busy loop. It can deadlock the QEMU
monitor as reported in https://gitlab.com/qemu-project/qemu/-/issues/650 .

This can be avoided by allowing drain_rcu_call() to enforce an RCU grace
period. Since each reader might need to do specific actions to end a
read-side critical section, do it with notifiers.

Prepare ground for this by adding a notifier list to the RCU reader
struct and use it in wait_for_readers() if drain_rcu_call() is in
progress. An API is added for readers to register their notifiers.

This is largely based on a draft from Paolo Bonzini.

Suggested-by: Paolo Bonzini 
Signed-off-by: Greg Kurz 
Reviewed-by: Richard Henderson 
Message-Id: <20211109183523.47726-2-gr...@kaod.org>
Signed-off-by: Paolo Bonzini 
(cherry picked from commit ef149763a8fcce70b85dfda27cc1222ecf765750)
Signed-off-by: Michael Roth 
---
 include/qemu/rcu.h | 15 +++
 util/rcu.c | 19 +++
 2 files changed, 34 insertions(+)

diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h
index 515d327cf1..e69efbd47f 100644
--- a/include/qemu/rcu.h
+++ b/include/qemu/rcu.h
@@ -27,6 +27,7 @@
 #include "qemu/thread.h"
 #include "qemu/queue.h"
 #include "qemu/atomic.h"
+#include "qemu/notify.h"
 #include "qemu/sys_membarrier.h"
 
 #ifdef __cplusplus
@@ -66,6 +67,13 @@ struct rcu_reader_data {
 
 /* Data used for registry, protected by rcu_registry_lock */
 QLIST_ENTRY(rcu_reader_data) node;
+
+/*
+ * NotifierList used to force an RCU grace period.  Accessed under
+ * rcu_registry_lock.  Note that the notifier is called _outside_
+ * the thread!
+ */
+NotifierList force_rcu;
 };
 
 extern __thread struct rcu_reader_data rcu_reader;
@@ -180,6 +188,13 @@ G_DEFINE_AUTOPTR_CLEANUP_FUNC(RCUReadAuto, 
rcu_read_auto_unlock)
 #define RCU_READ_LOCK_GUARD() \
 g_autoptr(RCUReadAuto) _rcu_read_auto __attribute__((unused)) = 
rcu_read_auto_lock()
 
+/*
+ * Force-RCU notifiers tell readers that they should exit their
+ * read-side critical section.
+ */
+void rcu_add_force_rcu_notifier(Notifier *n);
+void rcu_remove_force_rcu_notifier(Notifier *n);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/util/rcu.c b/util/rcu.c
index 13ac0f75cb..c91da9f137 100644
--- a/util/rcu.c
+++ b/util/rcu.c
@@ -46,6 +46,7 @@
 unsigned long rcu_gp_ctr = RCU_GP_LOCKED;
 
 QemuEvent rcu_gp_event;
+static int in_drain_call_rcu;
 static QemuMutex rcu_registry_lock;
 static QemuMutex rcu_sync_lock;
 
@@ -107,6 +108,8 @@ static void wait_for_readers(void)
  * get some extra futex wakeups.
  */
 qatomic_set(>waiting, false);
+} else if (qatomic_read(_drain_call_rcu)) {
+notifier_list_notify(>force_rcu, NULL);
 }
 }
 
@@ -339,8 +342,10 @@ void drain_call_rcu(void)
  * assumed.
  */
 
+qatomic_inc(_drain_call_rcu);
 call_rcu1(_drain.rcu, drain_rcu_callback);
 qemu_event_wait(_drain.drain_complete_event);
+qatomic_dec(_drain_call_rcu);
 
 if (locked) {
 qemu_mutex_lock_iothread();
@@ -363,6 +368,20 @@ void rcu_unregister_thread(void)
 qemu_mutex_unlock(_registry_lock);
 }
 
+void rcu_add_force_rcu_notifier(Notifier *n)
+{
+qemu_mutex_lock(_registry_lock);
+notifier_list_add(_reader.force_rcu, n);
+qemu_mutex_unlock(_registry_lock);
+}
+
+void rcu_remove_force_rcu_notifier(Notifier *n)
+{
+qemu_mutex_lock(_registry_lock);
+notifier_remove(n);
+qemu_mutex_unlock(_registry_lock);
+}
+
 static void rcu_init_complete(void)
 {
 QemuThread thread;
-- 
2.25.1




[PATCH 29/47] target-i386: mmu: use pg_mode instead of HF_LMA_MASK

2021-12-14 Thread Michael Roth
From: Paolo Bonzini 

Correctly look up the paging mode of the hypervisor when it is using 64-bit
mode but the guest is not.

Fixes: 68746930ae ("target/i386: use mmu_translate for NPT walk", 2021-05-11)
Cc: qemu-sta...@nongnu.org
Signed-off-by: Paolo Bonzini 
(cherry picked from commit 93eae3583256896dd91a4c2ca38dcbb8d4051cff)
Signed-off-by: Michael Roth 
---
 target/i386/tcg/sysemu/excp_helper.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/i386/tcg/sysemu/excp_helper.c 
b/target/i386/tcg/sysemu/excp_helper.c
index b6d940e04e..2dea4a248e 100644
--- a/target/i386/tcg/sysemu/excp_helper.c
+++ b/target/i386/tcg/sysemu/excp_helper.c
@@ -90,7 +90,7 @@ static int mmu_translate(CPUState *cs, hwaddr addr, 
MMUTranslateFunc get_hphys_f
 target_ulong pdpe_addr;
 
 #ifdef TARGET_X86_64
-if (env->hflags & HF_LMA_MASK) {
+if (pg_mode & PG_MODE_LMA) {
 bool la57 = pg_mode & PG_MODE_LA57;
 uint64_t pml5e_addr, pml5e;
 uint64_t pml4e_addr, pml4e;
@@ -287,7 +287,7 @@ do_check_protect_pse36:
 *prot |= PAGE_EXEC;
 }
 
-if (!(env->hflags & HF_LMA_MASK)) {
+if (!(pg_mode & PG_MODE_LMA)) {
 pkr = 0;
 } else if (ptep & PG_USER_MASK) {
 pkr = pg_mode & PG_MODE_PKE ? env->pkru : 0;
-- 
2.25.1




[PATCH 17/47] hw/arm/virt: Rename default_bus_bypass_iommu

2021-12-14 Thread Michael Roth
From: Jean-Philippe Brucker 

Since commit d8fb7d0969d5 ("vl: switch -M parsing to keyval"), machine
parameter definitions cannot use underscores, because keyval_dashify()
transforms them to dashes and the parser doesn't find the parameter.

This affects option default_bus_bypass_iommu which was introduced in the
same release:

$ qemu-system-aarch64 -M virt,default_bus_bypass_iommu=on
qemu-system-aarch64: Property 'virt-6.1-machine.default-bus-bypass-iommu' not 
found

Rename the parameter to "default-bus-bypass-iommu". Passing
"default_bus_bypass_iommu" is still valid since the underscore are
transformed automatically.

Fixes: 6d7a85483a06 ("hw/arm/virt: Add default_bus_bypass_iommu machine option")
Signed-off-by: Jean-Philippe Brucker 
Tested-by: Eric Auger 
Reviewed-by: Eric Auger 
Reviewed-by: Philippe Mathieu-Daudé 
Message-Id: <20211026093733.2144161-1-jean-phili...@linaro.org>
Signed-off-by: Richard Henderson 
(cherry picked from commit 9dad363a223df8269175d218413aa8cd265e078e)
Signed-off-by: Michael Roth 
---
 hw/arm/virt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 81eda46b0b..106be60fd4 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2677,10 +2677,10 @@ static void virt_machine_class_init(ObjectClass *oc, 
void *data)
   "Set the IOMMU type. "
   "Valid values are none and smmuv3");
 
-object_class_property_add_bool(oc, "default_bus_bypass_iommu",
+object_class_property_add_bool(oc, "default-bus-bypass-iommu",
virt_get_default_bus_bypass_iommu,
virt_set_default_bus_bypass_iommu);
-object_class_property_set_description(oc, "default_bus_bypass_iommu",
+object_class_property_set_description(oc, "default-bus-bypass-iommu",
   "Set on/off to enable/disable "
   "bypass_iommu for default root bus");
 
-- 
2.25.1




[PATCH 02/47] target/arm: Don't skip M-profile reset entirely in user mode

2021-12-14 Thread Michael Roth
From: Peter Maydell 

Currently all of the M-profile specific code in arm_cpu_reset() is
inside a !defined(CONFIG_USER_ONLY) ifdef block.  This is
unintentional: it happened because originally the only
M-profile-specific handling was the setup of the initial SP and PC
from the vector table, which is system-emulation only.  But then we
added a lot of other M-profile setup to the same "if (ARM_FEATURE_M)"
code block without noticing that it was all inside a not-user-mode
ifdef.  This has generally been harmless, but with the addition of
v8.1M low-overhead-loop support we ran into a problem: the reset of
FPSCR.LTPSIZE to 4 was only being done for system emulation mode, so
if a user-mode guest tried to execute the LE instruction it would
incorrectly take a UsageFault.

Adjust the ifdefs so only the really system-emulation specific parts
are covered.  Because this means we now run some reset code that sets
up initial values in the FPCCR and similar FPU related registers,
explicitly set up the registers controlling FPU context handling in
user-emulation mode so that the FPU works by design and not by
chance.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/613
Cc: qemu-sta...@nongnu.org
Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Message-id: 20210914120725.24992-2-peter.mayd...@linaro.org
(cherry picked from commit b62ceeaf8096fdbbbfdc6087da0028bc4a4dd77e)
Signed-off-by: Michael Roth 
---
 target/arm/cpu.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 2866dd7658..af60c07ca1 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -265,12 +265,15 @@ static void arm_cpu_reset(DeviceState *dev)
 env->uncached_cpsr = ARM_CPU_MODE_SVC;
 }
 env->daif = PSTATE_D | PSTATE_A | PSTATE_I | PSTATE_F;
+#endif
 
 if (arm_feature(env, ARM_FEATURE_M)) {
+#ifndef CONFIG_USER_ONLY
 uint32_t initial_msp; /* Loaded from 0x0 */
 uint32_t initial_pc; /* Loaded from 0x4 */
 uint8_t *rom;
 uint32_t vecbase;
+#endif
 
 if (cpu_isar_feature(aa32_lob, cpu)) {
 /*
@@ -324,6 +327,8 @@ static void arm_cpu_reset(DeviceState *dev)
 env->v7m.fpccr[M_REG_S] = R_V7M_FPCCR_ASPEN_MASK |
 R_V7M_FPCCR_LSPEN_MASK | R_V7M_FPCCR_S_MASK;
 }
+
+#ifndef CONFIG_USER_ONLY
 /* Unlike A/R profile, M profile defines the reset LR value */
 env->regs[14] = 0x;
 
@@ -352,8 +357,22 @@ static void arm_cpu_reset(DeviceState *dev)
 env->regs[13] = initial_msp & 0xFFFC;
 env->regs[15] = initial_pc & ~1;
 env->thumb = initial_pc & 1;
+#else
+/*
+ * For user mode we run non-secure and with access to the FPU.
+ * The FPU context is active (ie does not need further setup)
+ * and is owned by non-secure.
+ */
+env->v7m.secure = false;
+env->v7m.nsacr = 0xcff;
+env->v7m.cpacr[M_REG_NS] = 0xf0;
+env->v7m.fpccr[M_REG_S] &=
+~(R_V7M_FPCCR_LSPEN_MASK | R_V7M_FPCCR_S_MASK);
+env->v7m.control[M_REG_S] |= R_V7M_CONTROL_FPCA_MASK;
+#endif
 }
 
+#ifndef CONFIG_USER_ONLY
 /* AArch32 has a hard highvec setting of 0x.  If we are currently
  * executing as AArch32 then check if highvecs are enabled and
  * adjust the PC accordingly.
-- 
2.25.1




[PATCH 11/47] i386/cpu: Remove AVX_VNNI feature from Cooperlake cpu model

2021-12-14 Thread Michael Roth
From: Yang Zhong 

The AVX_VNNI feature is not in Cooperlake platform, remove it
from cpu model.

Signed-off-by: Yang Zhong 
Message-Id: <20210820054611.84303-1-yang.zh...@intel.com>
Fixes: c1826ea6a052 ("i386/cpu: Expose AVX_VNNI instruction to guest")
Cc: qemu-sta...@nongnu.org
Signed-off-by: Eduardo Habkost 
(cherry picked from commit f429dbf8fc526a9cacf531176b28d0c65701475a)
Signed-off-by: Michael Roth 
---
 target/i386/cpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 34a7ce865b..24ddc5b926 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3102,7 +3102,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
 MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO |
 MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_TAA_NO,
 .features[FEAT_7_1_EAX] =
-CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_AVX512_BF16,
+CPUID_7_1_EAX_AVX512_BF16,
 /* XSAVES is added in version 2 */
 .features[FEAT_XSAVE] =
 CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC |
-- 
2.25.1




[PATCH 15/47] qemu-sockets: fix unix socket path copy (again)

2021-12-14 Thread Michael Roth
From: Michael Tokarev 

Commit 4cfd970ec188558daa6214f26203fe553fb1e01f added an
assert which ensures the path within an address of a unix
socket returned from the kernel is at least one byte and
does not exceed sun_path buffer. Both of this constraints
are wrong:

A unix socket can be unnamed, in this case the path is
completely empty (not even \0)

And some implementations (notable linux) can add extra
trailing byte (\0) _after_ the sun_path buffer if we
passed buffer larger than it (and we do).

So remove the assertion (since it causes real-life breakage)
but at the same time fix the usage of sun_path. Namely,
we should not access sun_path[0] if kernel did not return
it at all (this is the case for unnamed sockets),
and use the returned salen when copyig actual path as an
upper constraint for the amount of bytes to copy - this
will ensure we wont exceed the information provided by
the kernel, regardless whenever there is a trailing \0
or not. This also helps with unnamed sockets.

Note the case of abstract socket, the sun_path is actually
a blob and can contain \0 characters, - it should not be
passed to g_strndup and the like, it should be accessed by
memcpy-like functions.

Fixes: 4cfd970ec188558daa6214f26203fe553fb1e01f
Fixes: http://bugs.debian.org/993145
Signed-off-by: Michael Tokarev 
Reviewed-by: Daniel P. Berrangé 
Reviewed-by: Marc-André Lureau 
CC: qemu-sta...@nongnu.org
(cherry picked from commit 118d527f2e4baec5fe8060b22a6212468b8e4d3f)
Signed-off-by: Michael Roth 
---
 util/qemu-sockets.c | 13 +
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
index f2f3676d1f..c5043999e9 100644
--- a/util/qemu-sockets.c
+++ b/util/qemu-sockets.c
@@ -1345,25 +1345,22 @@ socket_sockaddr_to_address_unix(struct sockaddr_storage 
*sa,
 SocketAddress *addr;
 struct sockaddr_un *su = (struct sockaddr_un *)sa;
 
-assert(salen >= sizeof(su->sun_family) + 1 &&
-   salen <= sizeof(struct sockaddr_un));
-
 addr = g_new0(SocketAddress, 1);
 addr->type = SOCKET_ADDRESS_TYPE_UNIX;
+salen -= offsetof(struct sockaddr_un, sun_path);
 #ifdef CONFIG_LINUX
-if (!su->sun_path[0]) {
+if (salen > 0 && !su->sun_path[0]) {
 /* Linux abstract socket */
-addr->u.q_unix.path = g_strndup(su->sun_path + 1,
-salen - sizeof(su->sun_family) - 1);
+addr->u.q_unix.path = g_strndup(su->sun_path + 1, salen - 1);
 addr->u.q_unix.has_abstract = true;
 addr->u.q_unix.abstract = true;
 addr->u.q_unix.has_tight = true;
-addr->u.q_unix.tight = salen < sizeof(*su);
+addr->u.q_unix.tight = salen < sizeof(su->sun_path);
 return addr;
 }
 #endif
 
-addr->u.q_unix.path = g_strndup(su->sun_path, sizeof(su->sun_path));
+addr->u.q_unix.path = g_strndup(su->sun_path, salen);
 return addr;
 }
 #endif /* WIN32 */
-- 
2.25.1




[PATCH 19/47] bios-tables-test: allow changes in DSDT ACPI tables for q35

2021-12-14 Thread Michael Roth
From: Ani Sinha 

We are going to commit a change to fix IO address range allocated for acpi pci
hotplug in q35. This affects DSDT tables. This change allows DSDT table
modification so that unit tests are not broken.

Signed-off-by: Ani Sinha 
Acked-by: Igor Mammedov 
Message-Id: <20210916132838.3469580-2-...@anisinha.ca>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
(cherry picked from commit 9f29e872d5b3973003701401cf659cfb71c95013)
Signed-off-by: Michael Roth 
---
 tests/qtest/bios-tables-test-allowed-diff.h | 12 
 1 file changed, 12 insertions(+)

diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
b/tests/qtest/bios-tables-test-allowed-diff.h
index dfb8523c8b..c06da38af3 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1 +1,13 @@
 /* List of comma-separated changed AML files to ignore */
+"tests/data/acpi/q35/DSDT",
+"tests/data/acpi/q35/DSDT.tis.tpm12",
+"tests/data/acpi/q35/DSDT.tis.tpm2",
+"tests/data/acpi/q35/DSDT.bridge",
+"tests/data/acpi/q35/DSDT.mmio64",
+"tests/data/acpi/q35/DSDT.ipmibt",
+"tests/data/acpi/q35/DSDT.cphp",
+"tests/data/acpi/q35/DSDT.memhp",
+"tests/data/acpi/q35/DSDT.numamem",
+"tests/data/acpi/q35/DSDT.nohpet",
+"tests/data/acpi/q35/DSDT.dimmpxm",
+"tests/data/acpi/q35/DSDT.acpihmat",
-- 
2.25.1




[PATCH 13/47] plugins/execlog: removed unintended "s" at the end of log lines.

2021-12-14 Thread Michael Roth
From: Mahmoud Mandour 

Signed-off-by: Mahmoud Mandour 
Signed-off-by: Alex Bennée 
Message-Id: <20210803151428.125323-1-ma.mando...@gmail.com>
Message-Id: <20210806141015.2487502-2-alex.ben...@linaro.org>
Cc: qemu-sta...@nongnu.org
(cherry picked from commit b40310616d2bd550279dd22b05483c3c613a00ff)
Signed-off-by: Michael Roth 
---
 contrib/plugins/execlog.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/plugins/execlog.c b/contrib/plugins/execlog.c
index 2de9f0d7d4..a5275dcc15 100644
--- a/contrib/plugins/execlog.c
+++ b/contrib/plugins/execlog.c
@@ -67,7 +67,7 @@ static void vcpu_insn_exec(unsigned int cpu_index, void 
*udata)
 /* Print previous instruction in cache */
 if (s->len) {
 qemu_plugin_outs(s->str);
-qemu_plugin_outs("s\n");
+qemu_plugin_outs("\n");
 }
 
 /* Store new instruction in cache */
-- 
2.25.1




[PATCH 18/47] hw/i386: Rename default_bus_bypass_iommu

2021-12-14 Thread Michael Roth
From: Jean-Philippe Brucker 

Since commit d8fb7d0969d5 ("vl: switch -M parsing to keyval"), machine
parameter definitions cannot use underscores, because keyval_dashify()
transforms them to dashes and the parser doesn't find the parameter.

This affects option default_bus_bypass_iommu which was introduced in the
same release:

$ qemu-system-x86_64 -M q35,default_bus_bypass_iommu=on
qemu-system-x86_64: Property 'pc-q35-6.1-machine.default-bus-bypass-iommu' not 
found

Rename the parameter to "default-bus-bypass-iommu". Passing
"default_bus_bypass_iommu" is still valid since the underscore are
transformed automatically.

Fixes: c9e96b04fc19 ("hw/i386: Add a default_bus_bypass_iommu pc machine 
option")
Reviewed-by: Eric Auger 
Reviewed-by: Philippe Mathieu-Daudé 
Tested-by: Eric Auger 
Signed-off-by: Jean-Philippe Brucker 
Message-Id: <20211025104737.1560274-1-jean-phili...@linaro.org>
Cc: qemu-sta...@nongnu.org
Signed-off-by: Paolo Bonzini 
(cherry picked from commit 739b38630c45585cd9d372d44537f69c0b2b4346)
Signed-off-by: Michael Roth 
---
 hw/i386/pc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index c2b9d62a35..1d421ae2f8 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1763,7 +1763,7 @@ static void pc_machine_class_init(ObjectClass *oc, void 
*data)
 object_class_property_add_bool(oc, "hpet",
 pc_machine_get_hpet, pc_machine_set_hpet);
 
-object_class_property_add_bool(oc, "default_bus_bypass_iommu",
+object_class_property_add_bool(oc, "default-bus-bypass-iommu",
 pc_machine_get_default_bus_bypass_iommu,
 pc_machine_set_default_bus_bypass_iommu);
 
-- 
2.25.1




[PATCH 14/47] plugins: do not limit exported symbols if modules are active

2021-12-14 Thread Michael Roth
From: Paolo Bonzini 

On Mac --enable-modules and --enable-plugins are currently incompatible, 
because the
Apple -Wl,-exported_symbols_list command line options prevents the export of any
symbols needed by the modules.  On x86 -Wl,--dynamic-list does not have this 
effect,
but only because the -Wl,--export-dynamic option provided by gmodule-2.0.pc 
overrides
it.  On Apple there is no -Wl,--export-dynamic, because it is the default, and 
thus
no override.

Either way, when modules are active there is no reason to include the 
plugin_ldflags.
While at it, avoid the useless -Wl,--export-dynamic when --enable-plugins is
specified but --enable-modules is not; this way, the GNU and Apple 
configurations
are more similar.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/516
Signed-off-by: Paolo Bonzini 
[AJB: fix noexport to no-export]
Signed-off-by: Alex Bennée 
Message-Id: <20210811100550.54714-1-pbonz...@redhat.com>
Cc: qemu-sta...@nongnu.org
(cherry picked from commit b906acace2d4f68b6ff8de73739a773cc4851436)
Signed-off-by: Michael Roth 
---
 configure   |  5 ++---
 plugins/meson.build | 14 --
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/configure b/configure
index 9a79a004d7..7659870810 100755
--- a/configure
+++ b/configure
@@ -3187,9 +3187,8 @@ glib_req_ver=2.56
 glib_modules=gthread-2.0
 if test "$modules" = yes; then
 glib_modules="$glib_modules gmodule-export-2.0"
-fi
-if test "$plugins" = "yes"; then
-glib_modules="$glib_modules gmodule-2.0"
+elif test "$plugins" = "yes"; then
+glib_modules="$glib_modules gmodule-no-export-2.0"
 fi
 
 for i in $glib_modules; do
diff --git a/plugins/meson.build b/plugins/meson.build
index e77723010e..bfd5c9822a 100644
--- a/plugins/meson.build
+++ b/plugins/meson.build
@@ -1,9 +1,11 @@
-if 'CONFIG_HAS_LD_DYNAMIC_LIST' in config_host
-  plugin_ldflags = ['-Wl,--dynamic-list=' + (meson.build_root() / 
'qemu-plugins-ld.symbols')]
-elif 'CONFIG_HAS_LD_EXPORTED_SYMBOLS_LIST' in config_host
-  plugin_ldflags = ['-Wl,-exported_symbols_list,' + (meson.build_root() / 
'qemu-plugins-ld64.symbols')]
-else
-  plugin_ldflags = []
+plugin_ldflags = []
+# Modules need more symbols than just those in plugins/qemu-plugins.symbols
+if not enable_modules
+  if 'CONFIG_HAS_LD_DYNAMIC_LIST' in config_host
+plugin_ldflags = ['-Wl,--dynamic-list=' + (meson.build_root() / 
'qemu-plugins-ld.symbols')]
+  elif 'CONFIG_HAS_LD_EXPORTED_SYMBOLS_LIST' in config_host
+plugin_ldflags = ['-Wl,-exported_symbols_list,' + (meson.build_root() / 
'qemu-plugins-ld64.symbols')]
+  endif
 endif
 
 specific_ss.add(when: 'CONFIG_PLUGIN', if_true: [files(
-- 
2.25.1




[PATCH 12/47] 9pfs: fix crash in v9fs_walk()

2021-12-14 Thread Michael Roth
From: Christian Schoenebeck 

v9fs_walk() utilizes the v9fs_co_run_in_worker({...}) macro to run the
supplied fs driver code block on a background worker thread.

When either the 'Twalk' client request was interrupted or if the client
requested fid for that 'Twalk' request caused a stat error then that
fs driver code block was left by 'break' keyword, with the intention to
return from worker thread back to main thread as well:

v9fs_co_run_in_worker({
if (v9fs_request_cancelled(pdu)) {
err = -EINTR;
break;
}
err = s->ops->lstat(>ctx, , );
if (err < 0) {
err = -errno;
break;
}
...
});

However that 'break;' statement also skipped the v9fs_co_run_in_worker()
macro's final and mandatory

/* re-enter back to qemu thread */
qemu_coroutine_yield();

call and thus caused the rest of v9fs_walk() to be continued being
executed on the worker thread instead of main thread, eventually
leading to a crash in the transport virtio transport driver.

To fix this issue and to prevent the same error from happening again by
other users of v9fs_co_run_in_worker() in future, auto wrap the supplied
code block into its own

do { } while (0);

loop inside the 'v9fs_co_run_in_worker' macro definition.

Full discussion and backtrace:
https://lists.gnu.org/archive/html/qemu-devel/2021-08/msg05209.html
https://lists.gnu.org/archive/html/qemu-devel/2021-09/msg00174.html

Fixes: 8d6cb100731c4d28535adbf2a3c2d1f29be3fef4
Signed-off-by: Christian Schoenebeck 
Cc: qemu-sta...@nongnu.org
Reviewed-by: Greg Kurz 
Message-Id: 
(cherry picked from commit f83df00900816476cca41bb536e4d532b297d76e)
Signed-off-by: Michael Roth 
---
 hw/9pfs/coth.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hw/9pfs/coth.h b/hw/9pfs/coth.h
index c51289903d..f83c7dda7b 100644
--- a/hw/9pfs/coth.h
+++ b/hw/9pfs/coth.h
@@ -51,7 +51,9 @@
  */ \
 qemu_coroutine_yield(); \
 qemu_bh_delete(co_bh);  \
-code_block; \
+do {\
+code_block; \
+} while (0);\
 /* re-enter back to qemu thread */  \
 qemu_coroutine_yield(); \
 } while (0)
-- 
2.25.1




[PATCH 00/47] Patch Round-up for stable 6.1.1, freeze on 2021-12-21

2021-12-14 Thread Michael Roth
Hi everyone,

The following new patches are queued for QEMU stable v6.1.1:

  https://gitlab.com/qemu-project/qemu/-/commits/stable-6.1-staging/

Patch freeze is 2021-12-21, and the release is planned for 2021-12-23:

  https://wiki.qemu.org/Planning/6.1

Please respond here or CC qemu-sta...@nongnu.org on any additional patches
you think should (or shouldn't) be included in the release.

Thanks!


Ani Sinha (6):
  bios-tables-test: allow changes in DSDT ACPI tables for q35
  hw/i386/acpi: fix conflicting IO address range for acpi pci hotplug in q35
  bios-tables-test: Update ACPI DSDT table golden blobs for q35
  tests/acpi/bios-tables-test: add and allow changes to a new q35 DSDT 
table blob
  tests/acpi/pcihp: add unit tests for hotplug on multifunction bridges for 
q35
  tests/acpi/bios-tables-test: update DSDT blob for multifunction bridge 
test

Ari Sundholm (1):
  block/file-posix: Fix return value translation for AIO discards

Christian Schoenebeck (1):
  9pfs: fix crash in v9fs_walk()

Daniil Tatianin (1):
  chardev/wctable: don't free the instance in wctablet_chr_finalize

David Hildenbrand (3):
  virtio-balloon: don't start free page hinting if postcopy is possible
  virtio-mem-pci: Fix memory leak when creating MEMORY_DEVICE_SIZE_CHANGE 
event
  libvhost-user: fix VHOST_USER_REM_MEM_REG skipping mmap_addr

Eric Blake (1):
  nbd/server: Don't complain on certain client disconnects

Gerd Hoffmann (1):
  uas: add stream number sanity checks.

Greg Kurz (2):
  rcu: Introduce force_rcu notifier
  accel/tcg: Register a force_rcu notifier

Helge Deller (1):
  hw/display/artist: Fix bug in coordinate extraction in artist_vram_read() 
and artist_vram_write()

Igor Mammedov (1):
  pcie: rename 'native-hotplug' to 'x-native-hotplug'

Jason Wang (3):
  virtio-net: fix use after unmap/free for sg
  virtio: use virtio accessor to access packed descriptor flags
  virtio: use virtio accessor to access packed event

Jean-Philippe Brucker (2):
  hw/arm/virt: Rename default_bus_bypass_iommu
  hw/i386: Rename default_bus_bypass_iommu

Jessica Clarke (1):
  Partially revert "build: -no-pie is no functional linker flag"

Jon Maloy (1):
  e1000: fix tx re-entrancy problem

Klaus Jensen (1):
  hw/nvme: fix buffer overrun in nvme_changed_nslist (CVE-2021-3947)

Laurent Vivier (1):
  hw: m68k: virt: Add compat machine for 6.1

Mahmoud Mandour (1):
  plugins/execlog: removed unintended "s" at the end of log lines.

Mark Mielke (1):
  virtio-blk: Fix clean up of host notifiers for single MR transaction.

Markus Armbruster (1):
  hmp: Unbreak "change vnc"

Mauro Matteo Cascella (1):
  hw/scsi/scsi-disk: MODE_PAGE_ALLS not allowed in MODE SELECT commands

Michael S. Tsirkin (1):
  pci: fix PCI resource reserve capability on BE

Michael Tokarev (1):
  qemu-sockets: fix unix socket path copy (again)

Nir Soffer (1):
  qemu-nbd: Change default cache mode to writeback

Paolo Bonzini (4):
  plugins: do not limit exported symbols if modules are active
  block: introduce max_hw_iov for use in scsi-generic
  target-i386: mmu: use pg_mode instead of HF_LMA_MASK
  target-i386: mmu: fix handling of noncanonical virtual addresses

Peng Liang (1):
  vfio: Fix memory leak of hostwin

Peter Maydell (1):
  target/arm: Don't skip M-profile reset entirely in user mode

Philippe Mathieu-Daudé (3):
  hw/block/fdc: Extract blk_create_empty_drive()
  hw/block/fdc: Kludge missing floppy drive to fix CVE-2021-20196
  tests/qtest/fdc-test: Add a regression test for CVE-2021-20196

Prasad J Pandit (1):
  net: vmxnet3: validate configuration values during activate 
(CVE-2021-20203)

Stefano Garzarella (1):
  vhost-vsock: fix migration issue when seqpacket is supported

Xueming Li (1):
  vhost-user: fix duplicated notifier MR init

Yang Zhong (1):
  i386/cpu: Remove AVX_VNNI feature from Cooperlake cpu model

 accel/tcg/tcg-accel-ops-mttcg.c   |  26 
 accel/tcg/tcg-accel-ops-rr.c  |  10 
 block/block-backend.c |   6 +
 block/file-posix.c|   6 ++---
 block/io.c|   1 +
 chardev/wctablet.c|   1 -
 configure |  10 +---
 contrib/plugins/execlog.c |   2 +-
 docs/tools/qemu-nbd.rst   |   6 +++--
 hw/9pfs/coth.h|   4 ++-
 hw/arm/virt.c |   4 +--
 hw/block/dataplane/virtio-blk.c   |   2 +-
 hw/block/fdc.c|  23 +++---
 hw/core/machine.c |   1 +
 hw/display/artist.c   |   8 +++---
 hw/i386/pc.c  |   2 +-
 hw/i386/pc_q35.c 

[PATCH 10/47] hw/display/artist: Fix bug in coordinate extraction in artist_vram_read() and artist_vram_write()

2021-12-14 Thread Michael Roth
From: Helge Deller 

The CDE desktop on HP-UX 10 shows wrongly rendered pixels when the local screen
menu is closed. This bug was introduced by commit c7050f3f167b
("hw/display/artist: Refactor x/y coordination extraction") which converted the
coordinate extraction in artist_vram_read() and artist_vram_write() to use the
ADDR_TO_X and ADDR_TO_Y macros, but forgot to right-shift the address by 2 as
it was done before.

Signed-off-by: Helge Deller 
Fixes: c7050f3f167b ("hw/display/artist: Refactor x/y coordination extraction")
Cc: Philippe Mathieu-Daudé 
Cc: Richard Henderson 
Cc: Sven Schnelle 
Reviewed-by: Philippe Mathieu-Daudé 
Message-Id: 
Signed-off-by: Gerd Hoffmann 
(cherry picked from commit 01f750f5fef1afd8f6abc0548910f87d473e26d5)
Signed-off-by: Michael Roth 
---
 hw/display/artist.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hw/display/artist.c b/hw/display/artist.c
index aa7bd594aa..21b7fd1b44 100644
--- a/hw/display/artist.c
+++ b/hw/display/artist.c
@@ -1170,8 +1170,8 @@ static void artist_vram_write(void *opaque, hwaddr addr, 
uint64_t val,
 }
 
 buf = vram_write_buffer(s);
-posy = ADDR_TO_Y(addr);
-posx = ADDR_TO_X(addr);
+posy = ADDR_TO_Y(addr >> 2);
+posx = ADDR_TO_X(addr >> 2);
 
 if (!buf->size) {
 return;
@@ -1232,8 +1232,8 @@ static uint64_t artist_vram_read(void *opaque, hwaddr 
addr, unsigned size)
 return 0;
 }
 
-posy = ADDR_TO_Y(addr);
-posx = ADDR_TO_X(addr);
+posy = ADDR_TO_Y(addr >> 2);
+posx = ADDR_TO_X(addr >> 2);
 
 if (posy > buf->height || posx > buf->width) {
 return 0;
-- 
2.25.1




[PATCH 09/47] libvhost-user: fix VHOST_USER_REM_MEM_REG skipping mmap_addr

2021-12-14 Thread Michael Roth
From: David Hildenbrand 

We end up not copying the mmap_addr of all existing regions, resulting
in a SEGFAULT once we actually try to map/access anything within our
memory regions.

Fixes: 875b9fd97b34 ("Support individual region unmap in libvhost-user")
Cc: qemu-sta...@nongnu.org
Cc: Michael S. Tsirkin 
Cc: Raphael Norwitz 
Cc: "Marc-André Lureau" 
Cc: Stefan Hajnoczi 
Cc: Paolo Bonzini 
Cc: Coiby Xu 
Signed-off-by: David Hildenbrand 
Message-Id: <20211011201047.62587-1-da...@redhat.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Raphael Norwitz 
Reviewed-by: Stefan Hajnoczi 
(cherry picked from commit 6889eb2d431ae962e3e083b57bff47cd573cb1c4)
Signed-off-by: Michael Roth 
---
 subprojects/libvhost-user/libvhost-user.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/subprojects/libvhost-user/libvhost-user.c 
b/subprojects/libvhost-user/libvhost-user.c
index bf09693255..787f4d2d4f 100644
--- a/subprojects/libvhost-user/libvhost-user.c
+++ b/subprojects/libvhost-user/libvhost-user.c
@@ -816,6 +816,7 @@ vu_rem_mem_reg(VuDev *dev, VhostUserMsg *vmsg) {
 shadow_regions[j].gpa = dev->regions[i].gpa;
 shadow_regions[j].size = dev->regions[i].size;
 shadow_regions[j].qva = dev->regions[i].qva;
+shadow_regions[j].mmap_addr = dev->regions[i].mmap_addr;
 shadow_regions[j].mmap_offset = dev->regions[i].mmap_offset;
 j++;
 } else {
-- 
2.25.1




Re: [PATCH v2] MIPS - fix cycle counter timing calculations

2021-12-14 Thread Simon Burge
=?UTF-8?Q?Philippe_Mathieu-Daud=c3=a9?= wrote:

> Minor comment, it is better to post patch iterations as new thread,
> and not as reply to older patch, because in thread view your new
> patch might ended hidden / lost.

Ah, my bad.  I misread the part about using in-reply-to in the patch
submission page.

> Patch queued to mips-next, thanks.

Thanks!

Cheers,
Simon.



Re: [PATCH v7 04/15] linux-user/host/sparc64: Add safe-syscall.inc.S

2021-12-14 Thread Richard Henderson

On 12/14/21 7:30 AM, Philippe Mathieu-Daudé wrote:

Hi Richard,

On 12/14/21 01:25, Richard Henderson wrote:

Signed-off-by: Richard Henderson 
---
  linux-user/host/sparc64/hostdep.h  |  3 +
  linux-user/host/sparc64/safe-syscall.inc.S | 89 ++
  2 files changed, 92 insertions(+)
  create mode 100644 linux-user/host/sparc64/safe-syscall.inc.S



diff --git a/linux-user/host/sparc64/safe-syscall.inc.S 
b/linux-user/host/sparc64/safe-syscall.inc.S
new file mode 100644
index 00..bb35c64cfc
--- /dev/null
+++ b/linux-user/host/sparc64/safe-syscall.inc.S
@@ -0,0 +1,89 @@
+/*
+ * safe-syscall.inc.S : host-specific assembly fragment
+ * to handle signals occurring at the same time as system calls.
+ * This is intended to be included by linux-user/safe-syscall.S
+ *
+ * Written by Richard Henderson 
+ * Copyright (C) 2021 Red Hat, Inc.


Are you sure this is the correct (c)?


Hah.  What a cut-n-paste.


r~



Re: [PATCH v6 0/8] target/riscv: support Zfh, Zfhmin extension v0.1

2021-12-14 Thread Alistair Francis
On Fri, Dec 10, 2021 at 5:44 PM  wrote:
>
> From: Frank Chang 
>
> Zfh - Half width floating point
> Zfhmin - Subset of half width floating point
>
> Zfh, Zfhmin v0.1 is now in public review period and is required by
> RVV extension:
> https://groups.google.com/a/groups.riscv.org/g/isa-dev/c/63gDCinXTwE/m/871Wm9XIBQAJ
>
> Zfh, Zfhmin can be enabled with -cpu option: Zfh=true and Zfhmin=true
> respectively.
>
> The port is available at:
> https://github.com/sifive/qemu/tree/zfh-upstream-v6
>
> Note: This patchset depends on another patchset listed in Based-on
>   section below so it is not able to be built unless the patchset
>   is applied.
>
> Changelog:
>
> v6:
>   * Rebase on riscv-to-apply.next.
>
> v5:
>   * Rebase on riscv-to-apply.next.
>
> v4:
>   * Spilt Zfh, Zfhmin cpu properties related changes into individual
> patches.
>
> v3:
>   * Use the renamed softfloat min/max APIs: *_minimum_number()
> and *_maximum_number().
>   * Pick softfloat min/max APIs based on CPU privilege spec version.
>   * Add braces for if statements in REQUIRE_ZFH() and
> REQUIRE_ZFH_OR_ZFHMIN().
>   * Rearrange the positions of Zfh and Zfhmin cpu properties.
>
> v2:
>   * Use {get,dest}_gpr APIs.
>   * Add Zfhmin extension.
>
> Based-on: <20211021160847.2748577-1-frank.ch...@sifive.com>
>
> Frank Chang (3):
>   target/riscv: zfh: add Zfh cpu property
>   target/riscv: zfh: implement zfhmin extension
>   target/riscv: zfh: add Zfhmin cpu property
>
> Kito Cheng (5):
>   target/riscv: zfh: half-precision load and store
>   target/riscv: zfh: half-precision computational
>   target/riscv: zfh: half-precision convert and move
>   target/riscv: zfh: half-precision floating-point compare
>   target/riscv: zfh: half-precision floating-point classify
>
>  target/riscv/cpu.c|   2 +
>  target/riscv/cpu.h|   2 +
>  target/riscv/fpu_helper.c | 180 
>  target/riscv/helper.h |  29 ++
>  target/riscv/insn32.decode|  38 ++
>  target/riscv/insn_trans/trans_rvzfh.c.inc | 537 ++
>  target/riscv/internals.h  |  16 +
>  target/riscv/translate.c  |  20 +
>  8 files changed, 824 insertions(+)
>  create mode 100644 target/riscv/insn_trans/trans_rvzfh.c.inc

Thanks!

Applied to riscv-to-apply.next

Alistair

>
> --
> 2.31.1
>
>



[ANNOUNCE] QEMU 6.2.0 is now available

2021-12-14 Thread Michael Roth
Hello,

On behalf of the QEMU Team, I'd like to announce the availability of
the QEMU 6.2.0 release. This release contains 2300+ commits from 189
authors.

You can grab the tarball from our download page here:

  https://www.qemu.org/download/#source

The full list of changes are available at:

  https://wiki.qemu.org/ChangeLog/6.2

Highlights include:

 * virtio-mem: guest memory dumps are now fully supported, along with
   pre-copy/post-copy migration and background guest snapshots.
 * QMP: support for nw DEVICE_UNPLUG_GUEST_ERROR to detect
   guest-reported hotplug failures
 * TCG: improvements to TCG plugin argument syntax, and multi-core
   support for cache plugin

 * 68k: improved support for Apple's NuBus, including ability to load
   declaration ROMs, and slot IRQ support
 * ARM: macOS hosts with Apple Silicon CPUs now support 'hvf' accelerator for
   AArch64 guests
 * ARM: emulation support for Fujitsu A64FX processor model
 * ARM: emulation support for kudo-mbc machine type
 * ARM: M-profile MVE extension is now supported for Cortex-M55
 * ARM: 'virt' machine now supports an emulated ITS (Interrupt Translation
   Service) and supports more than 123 CPUs in emulation mode
 * ARM: xlnx-zcu102 and xlnx-versal-virt machines now support BBRAM and
   eFUSE devices
 * PowerPC: improved POWER10 support for the 'powernv' machine type
 * PowerPC: initial support for POWER10 DD2.0 CPU model
 * PowerPC: support for FORM2 PAPR NUMA descriptions for 'pseries'
   machine type
 * RISC-V: support for Zb[abcs] instruction set extensions
 * RISC-V: support for vhost-user and numa mem options across all boards
 * RISC-V: SiFive PWM support
 * x86: support for new Snowridge-v4 CPU model
 * x86: guest support for Intel SGX
 * x86: AMD SEV guests now support measurement of kernel binary when doing
   direct kernel boot (not using a bootloader)

 * and lots more...

Thank you to everyone involved!



Re: [PULL 0/1] Block patches

2021-12-14 Thread Richard Henderson

On 12/9/21 7:21 AM, Stefan Hajnoczi wrote:

The following changes since commit a3607def89f9cd68c1b994e1030527df33aa91d0:

   Update version for v6.2.0-rc4 release (2021-12-07 17:51:38 -0800)

are available in the Git repository at:

   https://gitlab.com/stefanha/qemu.git tags/block-pull-request

for you to fetch changes up to cf4fbc3030c974fff726756a7ceef8386cdf500b:

   block/nvme: fix infinite loop in nvme_free_req_queue_cb() (2021-12-09 
09:19:49 +)


Pull request

An infinite loop fix for the userspace NVMe driver.



Stefan Hajnoczi (1):
   block/nvme: fix infinite loop in nvme_free_req_queue_cb()

  block/nvme.c | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions

Applied, as the beginning of the 7.0 development tree.


r~



Re: [RFC PATCH 1/3] target/riscv: add support for svnapot extension

2021-12-14 Thread Alistair Francis
On Sun, Nov 28, 2021 at 11:54 PM liweiwei  wrote:
>

Can you add a commit message that describes what you are changing?

Alistair

> Signed-off-by: liweiwei 
> Signed-off-by: wangjunqiang 
> ---
>  target/riscv/cpu_bits.h   |  1 +
>  target/riscv/cpu_helper.c | 18 --
>  2 files changed, 13 insertions(+), 6 deletions(-)
>
> diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
> index 9913fa9f77..70391424b0 100644
> --- a/target/riscv/cpu_bits.h
> +++ b/target/riscv/cpu_bits.h
> @@ -473,6 +473,7 @@ typedef enum {
>  #define PTE_A   0x040 /* Accessed */
>  #define PTE_D   0x080 /* Dirty */
>  #define PTE_SOFT0x300 /* Reserved for Software */
> +#define PTE_N   0x8000
>
>  /* Page table PPN shift amount */
>  #define PTE_PPN_SHIFT   10
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index 9eeed38c7e..e68db3e119 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -588,7 +588,7 @@ restart:
>  return TRANSLATE_FAIL;
>  }
>
> -hwaddr ppn = pte >> PTE_PPN_SHIFT;
> +hwaddr ppn = (pte & ~(target_ulong)PTE_N) >> PTE_PPN_SHIFT;
>
>  if (!(pte & PTE_V)) {
>  /* Invalid PTE */
> @@ -668,8 +668,17 @@ restart:
>  /* for superpage mappings, make a fake leaf PTE for the TLB's
> benefit. */
>  target_ulong vpn = addr >> PGSHIFT;
> -*physical = ((ppn | (vpn & ((1L << ptshift) - 1))) << PGSHIFT) |
> -(addr & ~TARGET_PAGE_MASK);
> +
> +int napot_bits = ((pte & PTE_N) ? (ctzl(ppn) + 1) : 0);
> +if (((pte & PTE_N) && ((ppn == 0) || (i != (levels - 1 ||
> +(napot_bits != 0 && napot_bits != 4)) {
> +return TRANSLATE_FAIL;
> +}
> +
> +*physical = (((ppn & ~(((target_ulong)1 << napot_bits) - 1)) |
> +  (vpn & (((target_ulong)1 << napot_bits) - 1)) |
> +  (vpn & (((target_ulong)1 << ptshift) - 1))
> +) << PGSHIFT) | (addr & ~TARGET_PAGE_MASK);
>
>  /* set permissions on the TLB entry */
>  if ((pte & PTE_R) || ((pte & PTE_X) && mxr)) {
> @@ -856,7 +865,6 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int 
> size,
>  ret = get_physical_address(env, , , address,
> >guest_phys_fault_addr, access_type,
> mmu_idx, true, true, false);
> -
>  /*
>   * A G-stage exception may be triggered during two state lookup.
>   * And the env->guest_phys_fault_addr has already been set in
> @@ -879,7 +887,6 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int 
> size,
>  ret = get_physical_address(env, , , im_address, NULL,
> access_type, mmu_idx, false, true,
> false);
> -
>  qemu_log_mask(CPU_LOG_MMU,
>  "%s 2nd-stage address=%" VADDR_PRIx " ret %d physical "
>  TARGET_FMT_plx " prot %d\n",
> @@ -914,7 +921,6 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int 
> size,
>  /* Single stage lookup */
>  ret = get_physical_address(env, , , address, NULL,
> access_type, mmu_idx, true, false, false);
> -
>  qemu_log_mask(CPU_LOG_MMU,
>"%s address=%" VADDR_PRIx " ret %d physical "
>TARGET_FMT_plx " prot %d\n",
> --
> 2.17.1
>
>



[PATCH 1/1] pcie: Do not set power state for some hot-plugged devices

2021-12-14 Thread Annie Li
After the PCIe device is hot-plugged, the device's power state is
initialized as ON. However, the device isn't powered on yet, i.e.
the PCI_EXP_SYSCTL_PCC bit isn't set to PCI_EXP_SLTCTL_PWR_ON.
Later on, its power state will set back to OFF due to the non
PCI_EXP_SLTCTL_PWR_ON state. The device is invisible until
PCI_EXP_SLTCTL_PWR_ON is set.

This may be appropriate for general PCIe hot-plug cases. However,
if the device is hot-plugged when the VM is in VM_STATE_PRELAUNCH
state, especially the system disk device, the firmware will fail
to find the system disk. As a result, the guest fails to boot.

An extra flag(set_power) is added in this patch to indicate if
pci_set_power is needed. After the device is powered
on(PCI_EXP_SLTCTL_PWR_ON), its power state will be set as normal
devices.

Fixes: 090b32b8dae6 ("implement slot power control for pcie root ports")

Signed-off-by: Annie Li 
Reviewed-by: Darren Kenny 
---
 hw/pci/pci.c |  1 +
 hw/pci/pcie.c| 29 +++--
 include/hw/pci/pci.h |  1 +
 3 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index e5993c1ef5..b61c547291 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2186,6 +2186,7 @@ static void pci_qdev_realize(DeviceState *qdev, Error 
**errp)
 return;
 }
 
+pci_dev->set_power = true;
 pci_set_power(pci_dev, true);
 }
 
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index d7d73a31e4..e4ff23f3b9 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -28,6 +28,7 @@
 #include "hw/pci/pcie_regs.h"
 #include "hw/pci/pcie_port.h"
 #include "qemu/range.h"
+#include "sysemu/runstate.h"
 
 //#define DEBUG_PCIE
 #ifdef DEBUG_PCIE
@@ -385,8 +386,20 @@ static void pcie_cap_update_power(PCIDevice *hotplug_dev)
 power = (sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_ON;
 }
 
-pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
-pcie_set_power_device, );
+/*
+ * For devices hot-plugged in RUN_STATE_PRELAUNCH state, set_power is
+ * set to false to avoid unnecessary power state changes before the device
+ * is powered on. After the device is powered on, set_power has to be
+ * set back to true to allow general power state changes.
+ */
+if (!hotplug_dev->set_power && power) {
+hotplug_dev->set_power = true;
+}
+
+if (hotplug_dev->set_power) {
+pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
+pcie_set_power_device, );
+}
 }
 
 /*
@@ -475,6 +488,18 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, 
DeviceState *dev,
 }
 pcie_cap_slot_event(hotplug_pdev,
 PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP);
+
+/*
+ * After the system disk device is hot-plugged during
+ * RUN_STATE_PRELAUNCH state, its power state will be set to OFF
+ * before the device is actually powered on. The device is invisible
+ * during this period. Hence the firmware won't find the system
+ * disk to boot. The set_power is set to false to avoid setting the
+ * power state to OFF.
+ */
+if (runstate_check(RUN_STATE_PRELAUNCH)) {
+hotplug_pdev->set_power = false;
+}
 pcie_cap_update_power(hotplug_pdev);
 }
 }
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index e7cdf2d5ec..753df3523e 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -269,6 +269,7 @@ struct PCIDevice {
 DeviceState qdev;
 bool partially_hotplugged;
 bool has_power;
+bool set_power;
 
 /* PCI config space */
 uint8_t *config;
-- 
2.31.1




Re: [PATCH] target/riscv/pmp: fix no pmp illegal intrs

2021-12-14 Thread Richard Henderson

On 12/14/21 1:12 PM, Richard Henderson wrote:

On 12/14/21 1:26 AM, Nikita Shubin wrote:

-    if (!pmp_get_num_rules(env) && (prev_priv != PRV_M)) {
+    if (riscv_feature(env, RISCV_FEATURE_PMP) &&
+    !pmp_get_num_rules(env) && (prev_priv != PRV_M)) {


When would the number of rules become non-zero with PMP disabled?
When does this test make a difference?


Oh, nevermind, I see what you mean.


r~




Re: [PATCH] target/riscv/pmp: fix no pmp illegal intrs

2021-12-14 Thread Richard Henderson

On 12/14/21 1:26 AM, Nikita Shubin wrote:

-if (!pmp_get_num_rules(env) && (prev_priv != PRV_M)) {
+if (riscv_feature(env, RISCV_FEATURE_PMP) &&
+!pmp_get_num_rules(env) && (prev_priv != PRV_M)) {


When would the number of rules become non-zero with PMP disabled?
When does this test make a difference?


r~



Re: [PATCH v9 28/31] common-user: Add safe syscall handling for loongarch64 hosts

2021-12-14 Thread Peter Maydell
On Tue, 14 Dec 2021 at 19:29, Richard Henderson
 wrote:
> For loongarch64, according to glibc,
>
> #define __SYSCALL_CLOBBERS \
>"$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8", "memory"
>
> which does suggest that a6 is unused, saved across the syscall, and also 
> call-clobbered
> (so we don't have to allocate a stack frame).
>
> I've had a browse through the loongarch kernel code and that seems to be all 
> true.
> (Curiously, loongarch restores more registers than it saves on the way out of
> handle_syscall.  There may be a subtle reason for that, or room for 
> improvement.)

Sadly most of the kernel architectures don't document the "which registers
are clobbered" part of their ABI. It would be helpful if they did. (I did
nudge a local arm kernel dev to have a look at doing that for arm...)

-- PMM



Re: [PATCH 6/6] target/arm: Implement FEAT_LPA2

2021-12-14 Thread Richard Henderson

On 12/14/21 6:57 AM, Alex Bennée wrote:

+static inline bool isar_feature_aa64_tgran4_lpa2(const ARMISARegisters *id)
+{
+return sextract64(id->id_aa64mmfr0,
+  R_ID_AA64MMFR0_TGRAN4_SHIFT,
+  R_ID_AA64MMFR0_TGRAN4_LENGTH) >= 1;


Is this correct - it shows:

   0b 4KB granule not supported.


Yes, that's why the signed extract, so not supported comes out as -1.
See D13.1.3 "Principles of the ID scheme for fields in ID registers".



(a little more reading later)

   The ID_AA64MMFR0_EL1.TGran4_2, ID_AA64MMFR0_EL1.TGran16_2 and
   ID_AA64MMFR0_EL1.TGran64_2 fields that identify the memory translation stage 
2 granule size, do not follow
   the standard ID scheme. Software must treat these fields as follows:


Note that we're not testing the *_2 fields, which are *stage2* support, not stage1.  I did 
add a comment about assuming stage2 encodes the same value as stage1 (which is true for 
all supported cpus).




r~



Re: [PATCH v2 for-7.0] scripts: Explain the difference between linux-headers and standard-headers

2021-12-14 Thread Alex Bennée


Peter Maydell  writes:

> If you don't know it, it's hard to figure out the difference between
> the linux-headers folder and the include/standard-headers folder.
> So let's add a short explanation to clarify the difference.
>
> Suggested-by: Thomas Huth 
> Signed-off-by: Peter Maydell 

Reviewed-by: Alex Bennée 

-- 
Alex Bennée



Re: error in qemu mac install ..

2021-12-14 Thread Alex Bennée


גיא  writes:

> Hey
>
> trying to install for the 1st time and i got a message to contact you.
>
> also, the mouse is caught by the qemu but not moving ... so not possible to 
> format the hdd .. and continue.
>
> guy@guyaHP:~/Downloads/macOS-Simple-KVM-master$ sudo ./basic.sh

without seeing the script it is hard to see how QEMU is getting
launched. We need to know host OS and the command line options you are
using.

> Home directory not accessible: Permission denied
> pulseaudio: pa_context_connect() failed
> pulseaudio: Reason: Connection refused
> pulseaudio: Failed to initialize PA contextaudio: Could not init `pa' audio 
> driver
> Home directory not accessible: Permission denied
> ALSA lib confmisc.c:767:(parse_card) cannot find card '0'
> ALSA lib conf.c:4528:(_snd_config_evaluate) function snd_func_card_driver 
> returned error: No such file or directory
> ALSA lib confmisc.c:392:(snd_func_concat) error evaluating strings
> ALSA lib conf.c:4528:(_snd_config_evaluate) function snd_func_concat returned 
> error: No such file or directory
> ALSA lib confmisc.c:1246:(snd_func_refer) error evaluating name
> ALSA lib conf.c:4528:(_snd_config_evaluate) function snd_func_refer returned 
> error: No such file or directory
> ALSA lib conf.c:5007:(snd_config_expand) Evaluate error: No such file or 
> directory
> ALSA lib pcm.c:2495:(snd_pcm_open_noupdate) Unknown PCM default
> alsa: Could not initialize DAC
> alsa: Failed to open `default':
> alsa: Reason: No such file or directory
> ALSA lib confmisc.c:767:(parse_card) cannot find card '0'
> ALSA lib conf.c:4528:(_snd_config_evaluate) function snd_func_card_driver 
> returned error: No such file or directory
> ALSA lib confmisc.c:392:(snd_func_concat) error evaluating strings
> ALSA lib conf.c:4528:(_snd_config_evaluate) function snd_func_concat returned 
> error: No such file or directory
> ALSA lib confmisc.c:1246:(snd_func_refer) error evaluating name
> ALSA lib conf.c:4528:(_snd_config_evaluate) function snd_func_refer returned 
> error: No such file or directory
> ALSA lib conf.c:5007:(snd_config_expand) Evaluate error: No such file or 
> directory
> ALSA lib pcm.c:2495:(snd_pcm_open_noupdate) Unknown PCM default
> alsa: Could not initialize DAC
> alsa: Failed to open `default':
> alsa: Reason: No such file or directory
> audio: Failed to create voice `dac'
> unknown keycodes `(unnamed)', please report to qemu-devel@nongnu.org
> ./basic.sh: line 30: -device: command not found

that looks like a continuation line got missed in the script.

>
> Thank you
>
> Guy


-- 
Alex Bennée



Re: [PATCH v9 10/10] target/ppc/excp_helper.c: EBB handling adjustments

2021-12-14 Thread Daniel Henrique Barboza




On 12/8/21 22:52, David Gibson wrote:

On Wed, Dec 01, 2021 at 12:17:34PM -0300, Daniel Henrique Barboza wrote:

The current logic is only considering event-based exceptions triggered
by the performance monitor. This is true now, but we might want to add
support for external event-based exceptions in the future.

Let's make it a bit easier to do so by adding the bit logic that would
happen in case we were dealing with an external event-based exception.

While we're at it, add a few comments explaining why we're setting and
clearing BESCR bits.

Reviewed-by: David Gibson 


Still looks fine, but I'm not seeing a particularly strong reason to
keep this split from the previous patch.



Fair enough. I'll squash this patch with the previous one.



Thanks,


Daniel

 

Signed-off-by: Daniel Henrique Barboza 
---
  target/ppc/excp_helper.c | 45 ++--
  1 file changed, 39 insertions(+), 6 deletions(-)

diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index a26d266fe6..42e2fee9c8 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -801,14 +801,47 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int 
excp_model, int excp)
  break;
  case POWERPC_EXCP_EBB:   /* Event-based branch exception 
*/
  if ((env->spr[SPR_FSCR] & (1ull << FSCR_EBB)) &&
-(env->spr[SPR_BESCR] & BESCR_GE) &&
-(env->spr[SPR_BESCR] & BESCR_PME)) {
+(env->spr[SPR_BESCR] & BESCR_GE)) {
  target_ulong nip;
  
-env->spr[SPR_BESCR] &= ~BESCR_GE;   /* Clear GE */

-env->spr[SPR_BESCR] |= BESCR_PMEO;  /* Set PMEO */
-env->spr[SPR_EBBRR] = env->nip; /* Save NIP for rfebb insn */
-nip = env->spr[SPR_EBBHR];  /* EBB handler */
+/*
+ * If we have Performance Monitor Event-Based exception
+ * enabled (BESCR_PME) and a Performance Monitor alert
+ * occurred (MMCR0_PMAO), clear BESCR_PME and set BESCR_PMEO
+ * (Performance Monitor Event-Based Exception Occurred).
+ *
+ * Software is responsible for clearing both BESCR_PMEO and
+ * MMCR0_PMAO after the event has been handled.
+ */
+if ((env->spr[SPR_BESCR] & BESCR_PME) &&
+(env->spr[SPR_POWER_MMCR0] & MMCR0_PMAO)) {
+env->spr[SPR_BESCR] &= ~BESCR_PME;
+env->spr[SPR_BESCR] |= BESCR_PMEO;
+}
+
+/*
+ * In the case of External Event-Based exceptions, do a
+ * similar logic with BESCR_EE and BESCR_EEO. BESCR_EEO must
+ * also be cleared by software.
+ *
+ * PowerISA 3.1 considers that we'll not have BESCR_PMEO and
+ * BESCR_EEO set at the same time. We can check for BESCR_PMEO
+ * being not set in step above to see if this exception was
+ * trigged by an external event.
+ */
+if (env->spr[SPR_BESCR] & BESCR_EE &&
+!(env->spr[SPR_BESCR] & BESCR_PMEO)) {
+env->spr[SPR_BESCR] &= ~BESCR_EE;
+env->spr[SPR_BESCR] |= BESCR_EEO;
+}
+
+/*
+ * Clear BESCR_GE, save NIP for 'rfebb' and point the
+ * execution to the event handler (SPR_EBBHR) address.
+ */
+env->spr[SPR_BESCR] &= ~BESCR_GE;
+env->spr[SPR_EBBRR] = env->nip;
+nip = env->spr[SPR_EBBHR];
  powerpc_set_excp_state(cpu, nip, env->msr);
  }
  /*






Re: Qemu Userspace Emulator with library + method

2021-12-14 Thread Alex Bennée


Marcus Engene  writes:

> Hi,
>
> I'd like to do instruction traces with library+function name included.
>
> From what I understand in_asm only shows instructions when they are being 
> JIT:ed. 
> If I call a function twice I only see the instructions once so it makes sense.
>
> As a workaround, I tried to do a plugin. I looked at the examples in 
> contrib/plugins and it seems to work nicely. I see all instructions in
> userspace:
>
> 004000802100 48 89 e7 movq %rsp, %rdi
> 004000802103 e8 08 0e 00 00   callq 0x4000802f10
> 004000802f10 f3 0f 1e fa  endbr64 
> 004000802f14 55   pushq %rbp
> 004000802f15 48 89 e5 movq %rsp, %rbp
> 004000802f18 41 57pushq %r15

This looks like you could expand/tweak the existing execlog to get what
you want rather than writing a whole new plugin.

> However, for it to be super useful, I'd also like to see what library or 
> source file each instruction lives, and what function we're in.
>
> Example output from perf + intel_pt
>
> a.out 602812 [006] 206712.277263361:  7f8d50217084 brk+0x4 
> (/usr/lib/x86_64-linux-gnu/ld-2.31.so) mov $0xc, %eax
> a.out 602812 [006] 206712.277263361:  7f8d50217089 brk+0x9 
> (/usr/lib/x86_64-linux-gnu/ld-2.31.so) syscall 
> a.out 602812 [006] 206712.277264027:  7f8d5021708b brk+0xb 
> (/usr/lib/x86_64-linux-gnu/ld-2.31.so) cmp $0xf000, %rax
> a.out 602812 [006] 206712.277264027:  7f8d50217091 brk+0x11 
> (/usr/lib/x86_64-linux-gnu/ld-2.31.so) jnbe 0x7f8d502170a8
> a.out 602812 [006] 206712.277264027:  7f8d50217093 brk+0x13 
> (/usr/lib/x86_64-linux-gnu/ld-2.31.so) movq  %rax, 0x1106e
> (%rip)
>
> Is this doable? Do you have any advice on where to start?

Totally.

>
> I tried to get the "symbol" in the plugin, but I only get null values.

You mean using qemu_plugin_insn_symbol()?

IIRC that only works on the main binary and of course needs a
non-stripped binary. Basically we would need to expand the elf loader to
consider libraries as well. However that will require some sort of hook
to spot them getting mapped in by the guests ld.so. Maybe we could add
some heuristics to the mmap syscalls to spot that?

>
> Kind regards,
> Marcus


-- 
Alex Bennée



[PATCH v2] monitor: move x-query-profile into accel/tcg to fix build

2021-12-14 Thread Alex Bennée
As --enable-profiler isn't defended in CI we missed this breakage.
Move the qmp handler into accel/tcg so we have access to the helpers
we need. While we are at it ensure we gate the feature on CONFIG_TCG.

Signed-off-by: Alex Bennée 
Suggested-by: Daniel P. Berrangé 
Reported-by: Mark Cave-Ayland 
Fixes: 37087fde0e ("qapi: introduce x-query-profile QMP command")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/773

---
v2
  - enclosed in #ifndef CONFIG_USER_ONLY section
---
 qapi/machine.json|  1 +
 accel/tcg/cpu-exec.c | 31 +++
 monitor/qmp-cmds.c   | 31 ---
 3 files changed, 32 insertions(+), 31 deletions(-)

diff --git a/qapi/machine.json b/qapi/machine.json
index 067e3f5378..0c9f24a712 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -1492,6 +1492,7 @@
 ##
 { 'command': 'x-query-profile',
   'returns': 'HumanReadableText',
+  'if': 'CONFIG_TCG',
   'features': [ 'unstable' ] }
 
 ##
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 409ec8c38c..8b4cd6c59d 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -1090,4 +1090,35 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
 return human_readable_text_from_str(buf);
 }
 
+#ifdef CONFIG_PROFILER
+
+int64_t dev_time;
+
+HumanReadableText *qmp_x_query_profile(Error **errp)
+{
+g_autoptr(GString) buf = g_string_new("");
+static int64_t last_cpu_exec_time;
+int64_t cpu_exec_time;
+int64_t delta;
+
+cpu_exec_time = tcg_cpu_exec_time();
+delta = cpu_exec_time - last_cpu_exec_time;
+
+g_string_append_printf(buf, "async time  %" PRId64 " (%0.3f)\n",
+   dev_time, dev_time / 
(double)NANOSECONDS_PER_SECOND);
+g_string_append_printf(buf, "qemu time   %" PRId64 " (%0.3f)\n",
+   delta, delta / (double)NANOSECONDS_PER_SECOND);
+last_cpu_exec_time = cpu_exec_time;
+dev_time = 0;
+
+return human_readable_text_from_str(buf);
+}
+#else
+HumanReadableText *qmp_x_query_profile(Error **errp)
+{
+error_setg(errp, "Internal profiler not compiled");
+return NULL;
+}
+#endif
+
 #endif /* !CONFIG_USER_ONLY */
diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c
index 343353e27a..be5e44c569 100644
--- a/monitor/qmp-cmds.c
+++ b/monitor/qmp-cmds.c
@@ -355,37 +355,6 @@ void qmp_display_reload(DisplayReloadOptions *arg, Error 
**errp)
 }
 }
 
-#ifdef CONFIG_PROFILER
-
-int64_t dev_time;
-
-HumanReadableText *qmp_x_query_profile(Error **errp)
-{
-g_autoptr(GString) buf = g_string_new("");
-static int64_t last_cpu_exec_time;
-int64_t cpu_exec_time;
-int64_t delta;
-
-cpu_exec_time = tcg_cpu_exec_time();
-delta = cpu_exec_time - last_cpu_exec_time;
-
-g_string_append_printf(buf, "async time  %" PRId64 " (%0.3f)\n",
-   dev_time, dev_time / 
(double)NANOSECONDS_PER_SECOND);
-g_string_append_printf(buf, "qemu time   %" PRId64 " (%0.3f)\n",
-   delta, delta / (double)NANOSECONDS_PER_SECOND);
-last_cpu_exec_time = cpu_exec_time;
-dev_time = 0;
-
-return human_readable_text_from_str(buf);
-}
-#else
-HumanReadableText *qmp_x_query_profile(Error **errp)
-{
-error_setg(errp, "Internal profiler not compiled");
-return NULL;
-}
-#endif
-
 static int qmp_x_query_rdma_foreach(Object *obj, void *opaque)
 {
 RdmaProvider *rdma;
-- 
2.30.2




Re: [PATCH v5 09/31] block: introduce assert_bdrv_graph_writable

2021-12-14 Thread Emanuele Giuseppe Esposito




On 10/12/2021 18:43, Hanna Reitz wrote:

On 24.11.21 07:43, Emanuele Giuseppe Esposito wrote:

We want to be sure that the functions that write the child and
parent list of a bs are under BQL and drain.

BQL prevents from concurrent writings from the GS API, while
drains protect from I/O.

TODO: drains are missing in some functions using this assert.
Therefore a proper assertion will fail. Because adding drains
requires additional discussions, they will be added in future
series.

Signed-off-by: Emanuele Giuseppe Esposito 
---
  include/block/block_int-global-state.h | 10 +-
  block.c    |  4 
  block/io.c | 11 +++
  3 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/include/block/block_int-global-state.h 
b/include/block/block_int-global-state.h

index a1b7d0579d..fa96e8b449 100644
--- a/include/block/block_int-global-state.h
+++ b/include/block/block_int-global-state.h
@@ -312,4 +312,12 @@ void 
bdrv_remove_aio_context_notifier(BlockDriverState *bs,

   */
  void bdrv_drain_all_end_quiesce(BlockDriverState *bs);
-#endif /* BLOCK_INT_GLOBAL_STATE*/


This looks like it should be squashed into patch 7, sorry I missed this 
in v4...


(Rest of this patch looks good to me, for the record – and while I’m at 
it, for patches I didn’t reply to so far, I planned to send an R-b 
later.  But then there’s things like patches 2/3 looking good to me, but 
it turned out in my review for patch 4 that bdrv_lock_medium() is used 
in an I/O path, so I can’t really send an R-b now anymore...)


Sorry I don't understand this, what should be squashed into patch 7? The 
assertion? If so, why?


Thank you,
Emanuele




[PATCH] Hexagon (tests/tcg/hexagon) change .ref files for changes in test source

2021-12-14 Thread Taylor Simpson
This update changes the Hexagon reference files to match the test
changes from Richard Henderson .

The test changes can be found here
https://gitlab.com/rth7680/qemu/-/commits/fix-sfp-test

Signed-off-by: Taylor Simpson 
---
 tests/tcg/hexagon/float_convs.ref | 152 +++---
 tests/tcg/hexagon/float_madds.ref |  48 ++--
 2 files changed, 100 insertions(+), 100 deletions(-)

diff --git a/tests/tcg/hexagon/float_convs.ref 
b/tests/tcg/hexagon/float_convs.ref
index 9ec9ffc..a5505c3 100644
--- a/tests/tcg/hexagon/float_convs.ref
+++ b/tests/tcg/hexagon/float_convs.ref
@@ -18,31 +18,31 @@ from single: f32(-inf:0xff80)
   to uint32: 0 (INVALID)
   to uint64: 0 (INVALID)
 from single: f32(-0x1.fe00p+127:0xff7f)
-  to double: f64(-0x1.fe00p+127:0x00c7efe000) (INEXACT 
)
+  to double: f64(-0x1.fe00p+127:0x00c7efe000) (OK)
to int32: -2147483648 (INVALID)
to int64: -9223372036854775808 (INVALID)
   to uint32: 0 (INVALID)
   to uint64: 0 (INVALID)
 from single: f32(-0x1.1874b200p+103:0xf30c3a59)
-  to double: f64(-0x1.1874b200p+103:0x00c661874b2000) (INEXACT 
)
+  to double: f64(-0x1.1874b200p+103:0x00c661874b2000) (OK)
to int32: -2147483648 (INVALID)
to int64: -9223372036854775808 (INVALID)
   to uint32: 0 (INVALID)
   to uint64: 0 (INVALID)
 from single: f32(-0x1.c0bab600p+99:0xf1605d5b)
-  to double: f64(-0x1.c0bab600p+99:0x00c62c0bab6000) (INEXACT )
+  to double: f64(-0x1.c0bab600p+99:0x00c62c0bab6000) (OK)
to int32: -2147483648 (INVALID)
to int64: -9223372036854775808 (INVALID)
   to uint32: 0 (INVALID)
   to uint64: 0 (INVALID)
 from single: f32(-0x1.31f75000p-40:0xab98fba8)
-  to double: f64(-0x1.31f75000p-40:0x00bd731f75) (INEXACT )
+  to double: f64(-0x1.31f75000p-40:0x00bd731f75) (OK)
to int32: 0 (INEXACT )
to int64: 0 (INEXACT )
   to uint32: 0 (INVALID)
   to uint64: 0 (INVALID)
 from single: f32(-0x1.50544400p-66:0x9ea82a22)
-  to double: f64(-0x1.50544400p-66:0x00bbd505444000) (INEXACT )
+  to double: f64(-0x1.50544400p-66:0x00bbd505444000) (OK)
to int32: 0 (INEXACT )
to int64: 0 (INEXACT )
   to uint32: 0 (INVALID)
@@ -72,19 +72,19 @@ from single: f32(0x1.p-25:0x3300)
   to uint32: 0 (INEXACT )
   to uint64: 0 (INEXACT )
 from single: f32(0x1.e600p-25:0x3373)
-  to double: f64(0x1.e600p-25:0x003e6e6000) (INEXACT )
+  to double: f64(0x1.e600p-25:0x003e6e6000) (OK)
to int32: 0 (INEXACT )
to int64: 0 (INEXACT )
   to uint32: 0 (INEXACT )
   to uint64: 0 (INEXACT )
 from single: f32(0x1.ff801a00p-15:0x387fc00d)
-  to double: f64(0x1.ff801a00p-15:0x003f0ff801a000) (INEXACT )
+  to double: f64(0x1.ff801a00p-15:0x003f0ff801a000) (OK)
to int32: 0 (INEXACT )
to int64: 0 (INEXACT )
   to uint32: 0 (INEXACT )
   to uint64: 0 (INEXACT )
 from single: f32(0x1.0c00p-14:0x3886)
-  to double: f64(0x1.0c00p-14:0x003f10c000) (INEXACT )
+  to double: f64(0x1.0c00p-14:0x003f10c000) (OK)
to int32: 0 (INEXACT )
to int64: 0 (INEXACT )
   to uint32: 0 (INEXACT )
@@ -96,7 +96,7 @@ from single: f32(0x1.p+0:0x3f80)
   to uint32: 1 (OK)
   to uint64: 1 (OK)
 from single: f32(0x1.0040p+0:0x3f802000)
-  to double: f64(0x1.0040p+0:0x003ff00400) (INEXACT )
+  to double: f64(0x1.0040p+0:0x003ff00400) (OK)
to int32: 1 (INEXACT )
to int64: 1 (INEXACT )
   to uint32: 1 (INEXACT )
@@ -108,61 +108,61 @@ from single: f32(0x1.p+1:0x4000)
   to uint32: 2 (OK)
   to uint64: 2 (OK)
 from single: f32(0x1.5bf0a800p+1:0x402df854)
-  to double: f64(0x1.5bf0a800p+1:0x004005bf0a8000) (INEXACT )
+  to double: f64(0x1.5bf0a800p+1:0x004005bf0a8000) (OK)
to int32: 2 (INEXACT )
to int64: 2 (INEXACT )
   to uint32: 2 (INEXACT )
   to uint64: 2 (INEXACT )
 from single: f32(0x1.921fb600p+1:0x40490fdb)
-  to double: f64(0x1.921fb600p+1:0x00400921fb6000) (INEXACT )
+  to double: f64(0x1.921fb600p+1:0x00400921fb6000) (OK)
to int32: 3 (INEXACT )
to int64: 3 (INEXACT )
   to uint32: 3 (INEXACT )
   to uint64: 3 (INEXACT )
 from single: f32(0x1.ffbep+15:0x477fdf00)
-  to double: f64(0x1.ffbep+15:0x0040effbe0) (INEXACT )
+  to double: f64(0x1.ffbep+15:0x0040effbe0) (OK)
to int32: 65503 (OK)
to int64: 65503 (OK)
   to uint32: 65503 (OK)
   to uint64: 65503 (OK)
 from single: f32(0x1.ffc0p+15:0x477fe000)
-  to 

Re: [PATCH v9 28/31] common-user: Add safe syscall handling for loongarch64 hosts

2021-12-14 Thread Richard Henderson

On 12/14/21 12:01 AM, WANG Xuerui wrote:

+move$t0, $a0/* signal_pending pointer */

...

+safe_syscall_start:
+/* If signal_pending is non-zero, don't do the call */
+ld.w$t1, $t0, 0
+bnez$t1, 2f
+syscall 0


We need a non-syscall clobbered register for signal_pending, per the bug fixed in 
5d9f3ea0817215ad4baac5aa30414e9ebbaaf0d6.


In the case of riscv, because of the way exceptions are delivered, there are no 
syscall-clobbered registers (by the time syscall is distinguished from interrupt, all 
registers have been saved).


In the case of mips, there are no non-syscall-clobbered registers that are not also 
call-saved or syscall arguments, so I had to allocate a stack frame and save/restore s0.


For loongarch64, according to glibc,

#define __SYSCALL_CLOBBERS \
  "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8", "memory"

which does suggest that a6 is unused, saved across the syscall, and also call-clobbered 
(so we don't have to allocate a stack frame).


I've had a browse through the loongarch kernel code and that seems to be all true. 
(Curiously, loongarch restores more registers than it saves on the way out of 
handle_syscall.  There may be a subtle reason for that, or room for improvement.)



r~



Re: [PATCH] monitor: move x-query-profile into accel/tcg to fix build

2021-12-14 Thread Daniel P . Berrangé
On Tue, Dec 14, 2021 at 06:22:07PM +, Alex Bennée wrote:
> As --enable-profiler isn't defended in CI we missed this breakage.
> Move the qmp handler into accel/tcg so we have access to the helpers
> we need. While we are at it ensure we gate the feature on CONFIG_TCG.
> 
> Signed-off-by: Alex Bennée 
> Suggested-by: Daniel P. Berrangé 
> Reported-by: Mark Cave-Ayland 
> Fixes: 37087fde0e ("qapi: introduce x-query-profile QMP command")
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/773
> ---
>  qapi/machine.json|  1 +
>  accel/tcg/cpu-exec.c | 31 +++
>  monitor/qmp-cmds.c   | 31 ---
>  3 files changed, 32 insertions(+), 31 deletions(-)
> 
> diff --git a/qapi/machine.json b/qapi/machine.json
> index 067e3f5378..0c9f24a712 100644
> --- a/qapi/machine.json
> +++ b/qapi/machine.json
> @@ -1492,6 +1492,7 @@
>  ##
>  { 'command': 'x-query-profile',
>'returns': 'HumanReadableText',
> +  'if': 'CONFIG_TCG',
>'features': [ 'unstable' ] }
>  
>  ##
> diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
> index 409ec8c38c..9498a16681 100644
> --- a/accel/tcg/cpu-exec.c
> +++ b/accel/tcg/cpu-exec.c
> @@ -1091,3 +1091,34 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
>  }
>  
>  #endif /* !CONFIG_USER_ONLY */

I think this #endif probably needs to be after the qmp_x_query_profile
impl, as it is for the other TCG QMP cmds  ?

> +
> +#ifdef CONFIG_PROFILER
> +
> +int64_t dev_time;
> +
> +HumanReadableText *qmp_x_query_profile(Error **errp)
> +{
> +g_autoptr(GString) buf = g_string_new("");
> +static int64_t last_cpu_exec_time;
> +int64_t cpu_exec_time;
> +int64_t delta;
> +
> +cpu_exec_time = tcg_cpu_exec_time();
> +delta = cpu_exec_time - last_cpu_exec_time;
> +
> +g_string_append_printf(buf, "async time  %" PRId64 " (%0.3f)\n",
> +   dev_time, dev_time / 
> (double)NANOSECONDS_PER_SECOND);
> +g_string_append_printf(buf, "qemu time   %" PRId64 " (%0.3f)\n",
> +   delta, delta / (double)NANOSECONDS_PER_SECOND);
> +last_cpu_exec_time = cpu_exec_time;
> +dev_time = 0;
> +
> +return human_readable_text_from_str(buf);
> +}
> +#else
> +HumanReadableText *qmp_x_query_profile(Error **errp)
> +{
> +error_setg(errp, "Internal profiler not compiled");
> +return NULL;
> +}
> +#endif
> diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c
> index 343353e27a..be5e44c569 100644
> --- a/monitor/qmp-cmds.c
> +++ b/monitor/qmp-cmds.c
> @@ -355,37 +355,6 @@ void qmp_display_reload(DisplayReloadOptions *arg, Error 
> **errp)
>  }
>  }
>  
> -#ifdef CONFIG_PROFILER
> -
> -int64_t dev_time;
> -
> -HumanReadableText *qmp_x_query_profile(Error **errp)
> -{
> -g_autoptr(GString) buf = g_string_new("");
> -static int64_t last_cpu_exec_time;
> -int64_t cpu_exec_time;
> -int64_t delta;
> -
> -cpu_exec_time = tcg_cpu_exec_time();
> -delta = cpu_exec_time - last_cpu_exec_time;
> -
> -g_string_append_printf(buf, "async time  %" PRId64 " (%0.3f)\n",
> -   dev_time, dev_time / 
> (double)NANOSECONDS_PER_SECOND);
> -g_string_append_printf(buf, "qemu time   %" PRId64 " (%0.3f)\n",
> -   delta, delta / (double)NANOSECONDS_PER_SECOND);
> -last_cpu_exec_time = cpu_exec_time;
> -dev_time = 0;
> -
> -return human_readable_text_from_str(buf);
> -}
> -#else
> -HumanReadableText *qmp_x_query_profile(Error **errp)
> -{
> -error_setg(errp, "Internal profiler not compiled");
> -return NULL;
> -}
> -#endif
> -
>  static int qmp_x_query_rdma_foreach(Object *obj, void *opaque)
>  {
>  RdmaProvider *rdma;
> -- 
> 2.30.2
> 

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH] docs: Add measurement calculation details to amd-memory-encryption.txt

2021-12-14 Thread Daniel P . Berrangé
On Tue, Dec 14, 2021 at 01:59:10PM +, Dov Murik wrote:
> Add a section explaining how the Guest Owner should calculate the
> expected guest launch measurement for SEV and SEV-ES.
> 
> Also update the name and link to the SEV API Spec document.
> 
> Signed-off-by: Dov Murik 
> Suggested-by: Daniel P. Berrangé 
> ---
>  docs/amd-memory-encryption.txt | 50 +++---
>  1 file changed, 46 insertions(+), 4 deletions(-)
> 
> diff --git a/docs/amd-memory-encryption.txt b/docs/amd-memory-encryption.txt
> index ffca382b5f..f97727482f 100644
> --- a/docs/amd-memory-encryption.txt
> +++ b/docs/amd-memory-encryption.txt
> @@ -43,7 +43,7 @@ The guest policy is passed as plaintext. A hypervisor may 
> choose to read it,
>  but should not modify it (any modification of the policy bits will result
>  in bad measurement). The guest policy is a 4-byte data structure containing
>  several flags that restricts what can be done on a running SEV guest.
> -See KM Spec section 3 and 6.2 for more details.
> +See SEV API Spec [1] section 3 and 6.2 for more details.
>  
>  The guest policy can be provided via the 'policy' property (see below)
>  
> @@ -88,7 +88,7 @@ expects.
>  LAUNCH_FINISH finalizes the guest launch and destroys the cryptographic
>  context.
>  
> -See SEV KM API Spec [1] 'Launching a guest' usage flow (Appendix A) for the
> +See SEV API Spec [1] 'Launching a guest' usage flow (Appendix A) for the
>  complete flow chart.
>  
>  To launch a SEV guest
> @@ -113,6 +113,45 @@ a SEV-ES guest:
>   - Requires in-kernel irqchip - the burden is placed on the hypervisor to
> manage booting APs.
>  
> +Calculating expected guest launch measurement
> +-
> +In order to verify the guest launch measurement, The Guest Owner must compute
> +it in the exact same way as it is calculated by the AMD-SP.  SEV API Spec [1]
> +section 6.5.1 describes the AMD-SP operations:
> +
> +GCTX.LD is finalized, producing the hash digest of all plaintext data
> +imported into the guest.
> +
> +The launch measurement is calculated as:
> +
> +HMAC(0x04 || API_MAJOR || API_MINOR || BUILD || GCTX.POLICY || GCTX.LD 
> || MNONCE; GCTX.TIK)
> +
> +where "||" represents concatenation.
> +
> +The values of API_MAJOR, API_MINOR, BUILD, and GCTX.POLICY can be obtained
> +from the 'query-sev' qmp command.
> +
> +The value of MNONCE is part of the response of 'query-sev-launch-measure': it
> +is the last 16 bytes of the base64-decoded data field (see SEV API Spec [1]
> +section 6.5.2 Table 52: LAUNCH_MEASURE Measurement Buffer).
> +
> +The value of GCTX.LD is SHA256(firmware_blob || kernel_hashes_blob || 
> vmsas_blob),
> +where:
> +
> +* firmware_blob is the content of the entire firmware flash file (for 
> example,
> +  OVMF.fd).

Lets add a caveat that the firmware flash should be built to be stateless
ie that it is not secure to attempt to measure a guest where the firmware
uses an NVRAM store.

> +* if kernel is used, and kernel-hashes=on, then kernel_hashes_blob is the
> +  content of PaddedSevHashTable (including the zero padding), which itself
> +  includes the hashes of kernel, initrd, and cmdline that are passed to the
> +  guest.  The PaddedSevHashTable struct is defined in target/i386/sev.c .
> +* if SEV-ES is enabled (policy & 0x4 != 0), vmsas_blob is the concatenation 
> of
> +  all VMSAs of the guest vcpus.  Each VMSA is 4096 bytes long; its content is
> +  defined inside Linux kernel code as struct vmcb_save_area, or in AMD APM
> +  Volume 2 [2] Table B-2: VMCB Layout, State Save Area.

Is there any practical guidance we can give apps on the way the VMSAs
can be expected to be initialized ? eg can they assume essentially
all fields in vmcb_save_area are 0 initialized except for certain
ones ? Is initialization likely to vary at all across KVM or EDK2
vesions or something ?

> +
> +If kernel hashes are not used, or SEV-ES is disabled, use empty blobs for
> +kernel_hashes_blob and vmsas_blob as needed.


Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




[PATCH] monitor: move x-query-profile into accel/tcg to fix build

2021-12-14 Thread Alex Bennée
As --enable-profiler isn't defended in CI we missed this breakage.
Move the qmp handler into accel/tcg so we have access to the helpers
we need. While we are at it ensure we gate the feature on CONFIG_TCG.

Signed-off-by: Alex Bennée 
Suggested-by: Daniel P. Berrangé 
Reported-by: Mark Cave-Ayland 
Fixes: 37087fde0e ("qapi: introduce x-query-profile QMP command")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/773
---
 qapi/machine.json|  1 +
 accel/tcg/cpu-exec.c | 31 +++
 monitor/qmp-cmds.c   | 31 ---
 3 files changed, 32 insertions(+), 31 deletions(-)

diff --git a/qapi/machine.json b/qapi/machine.json
index 067e3f5378..0c9f24a712 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -1492,6 +1492,7 @@
 ##
 { 'command': 'x-query-profile',
   'returns': 'HumanReadableText',
+  'if': 'CONFIG_TCG',
   'features': [ 'unstable' ] }
 
 ##
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 409ec8c38c..9498a16681 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -1091,3 +1091,34 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
 }
 
 #endif /* !CONFIG_USER_ONLY */
+
+#ifdef CONFIG_PROFILER
+
+int64_t dev_time;
+
+HumanReadableText *qmp_x_query_profile(Error **errp)
+{
+g_autoptr(GString) buf = g_string_new("");
+static int64_t last_cpu_exec_time;
+int64_t cpu_exec_time;
+int64_t delta;
+
+cpu_exec_time = tcg_cpu_exec_time();
+delta = cpu_exec_time - last_cpu_exec_time;
+
+g_string_append_printf(buf, "async time  %" PRId64 " (%0.3f)\n",
+   dev_time, dev_time / 
(double)NANOSECONDS_PER_SECOND);
+g_string_append_printf(buf, "qemu time   %" PRId64 " (%0.3f)\n",
+   delta, delta / (double)NANOSECONDS_PER_SECOND);
+last_cpu_exec_time = cpu_exec_time;
+dev_time = 0;
+
+return human_readable_text_from_str(buf);
+}
+#else
+HumanReadableText *qmp_x_query_profile(Error **errp)
+{
+error_setg(errp, "Internal profiler not compiled");
+return NULL;
+}
+#endif
diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c
index 343353e27a..be5e44c569 100644
--- a/monitor/qmp-cmds.c
+++ b/monitor/qmp-cmds.c
@@ -355,37 +355,6 @@ void qmp_display_reload(DisplayReloadOptions *arg, Error 
**errp)
 }
 }
 
-#ifdef CONFIG_PROFILER
-
-int64_t dev_time;
-
-HumanReadableText *qmp_x_query_profile(Error **errp)
-{
-g_autoptr(GString) buf = g_string_new("");
-static int64_t last_cpu_exec_time;
-int64_t cpu_exec_time;
-int64_t delta;
-
-cpu_exec_time = tcg_cpu_exec_time();
-delta = cpu_exec_time - last_cpu_exec_time;
-
-g_string_append_printf(buf, "async time  %" PRId64 " (%0.3f)\n",
-   dev_time, dev_time / 
(double)NANOSECONDS_PER_SECOND);
-g_string_append_printf(buf, "qemu time   %" PRId64 " (%0.3f)\n",
-   delta, delta / (double)NANOSECONDS_PER_SECOND);
-last_cpu_exec_time = cpu_exec_time;
-dev_time = 0;
-
-return human_readable_text_from_str(buf);
-}
-#else
-HumanReadableText *qmp_x_query_profile(Error **errp)
-{
-error_setg(errp, "Internal profiler not compiled");
-return NULL;
-}
-#endif
-
 static int qmp_x_query_rdma_foreach(Object *obj, void *opaque)
 {
 RdmaProvider *rdma;
-- 
2.30.2




Re: [PATCH v10 06/10] ACPI ERST: build the ACPI ERST table

2021-12-14 Thread Eric DeVolder

Ani, one quick question below.
eric

On 12/13/21 20:58, Ani Sinha wrote:

On Tue, Dec 14, 2021 at 2:57 AM Eric DeVolder  wrote:


Hi Ani,
inline response below.
Eric

On 12/12/21 07:43, Ani Sinha wrote:

.

On Thu, Dec 9, 2021 at 11:28 PM Eric DeVolder  wrote:


This builds the ACPI ERST table to inform OSPM how to communicate
with the acpi-erst device.


This patch starts in the middle of pci device code addition, between
erst_reg_ops and erst_post_load. I do not like this :(


Below you suggest moving the contents of this patch to the bottom of erst.c.
Before I do that, consider moving the contents to the top of the file instead, 
I believe that would
address the concerns cited here, and it would allow for the last line of the 
file to be the
type_init(), like other files.

I'll move it, just let me know if top or bottom.


Moving to the top is fine.
I've moved this to the top. The question is if you prefer this be integrated into the main erst.c 
patch, or still separated out?

thanks!
eric




Thanks!
eric






Signed-off-by: Eric DeVolder 
---
   hw/acpi/erst.c | 241 
+
   1 file changed, 241 insertions(+)

diff --git a/hw/acpi/erst.c b/hw/acpi/erst.c
index 81f5435..753425a 100644
--- a/hw/acpi/erst.c
+++ b/hw/acpi/erst.c
@@ -711,6 +711,247 @@ static const MemoryRegionOps erst_reg_ops = {
   .endianness = DEVICE_NATIVE_ENDIAN,
   };

+
+/***/
+/***/
+
+/* ACPI 4.0: Table 17-19 Serialization Instructions */
+#define INST_READ_REGISTER 0x00
+#define INST_READ_REGISTER_VALUE   0x01
+#define INST_WRITE_REGISTER0x02
+#define INST_WRITE_REGISTER_VALUE  0x03
+#define INST_NOOP  0x04
+#define INST_LOAD_VAR1 0x05
+#define INST_LOAD_VAR2 0x06
+#define INST_STORE_VAR10x07
+#define INST_ADD   0x08
+#define INST_SUBTRACT  0x09
+#define INST_ADD_VALUE 0x0A
+#define INST_SUBTRACT_VALUE0x0B
+#define INST_STALL 0x0C
+#define INST_STALL_WHILE_TRUE  0x0D
+#define INST_SKIP_NEXT_INSTRUCTION_IF_TRUE 0x0E
+#define INST_GOTO  0x0F
+#define INST_SET_SRC_ADDRESS_BASE  0x10
+#define INST_SET_DST_ADDRESS_BASE  0x11
+#define INST_MOVE_DATA 0x12


I prefer these definitions to come at the top of the file along with
other definitions.


+
+/* ACPI 4.0: 17.4.1.2 Serialization Instruction Entries */
+static void build_serialization_instruction_entry(GArray *table_data,


This function and buiild_erst() can come at the end of erst.c. They go
together and are doing a common but different operation from the
operations of the pci device - building the erst table. Hence, ther
code should be separate from pci device code. A new file would be an
overkill at this state IMHO but in the future if erst table generation
code gains more weight, it can be split into two files.


+uint8_t serialization_action,
+uint8_t instruction,
+uint8_t flags,
+uint8_t register_bit_width,
+uint64_t register_address,
+uint64_t value,
+uint64_t mask)
+{
+/* ACPI 4.0: Table 17-18 Serialization Instruction Entry */
+struct AcpiGenericAddress gas;
+
+/* Serialization Action */
+build_append_int_noprefix(table_data, serialization_action, 1);
+/* Instruction */
+build_append_int_noprefix(table_data, instruction , 1);
+/* Flags */
+build_append_int_noprefix(table_data, flags   , 1);
+/* Reserved */
+build_append_int_noprefix(table_data, 0   , 1);
+/* Register Region */
+gas.space_id = AML_SYSTEM_MEMORY;
+gas.bit_width = register_bit_width;
+gas.bit_offset = 0;
+switch (register_bit_width) {
+case 8:
+gas.access_width = 1;
+break;
+case 16:
+gas.access_width = 2;
+break;
+case 32:
+gas.access_width = 3;
+break;
+case 64:
+gas.access_width = 4;
+break;
+default:
+gas.access_width = 0;
+break;
+}
+gas.address = register_address;
+build_append_gas_from_struct(table_data, );
+/* Value */
+build_append_int_noprefix(table_data, value  , 8);
+/* Mask */
+build_append_int_noprefix(table_data, mask   , 8);
+}
+
+/* ACPI 4.0: 17.4.1 Serialization Action Table */
+void build_erst(GArray *table_data, BIOSLinker *linker, Object *erst_dev,
+const char *oem_id, const char *oem_table_id)
+{
+GArray *table_instruction_data;
+unsigned action;
+pcibus_t bar0, bar1;
+AcpiTable table = { .sig = "ERST", .rev = 1, .oem_id = oem_id,
+.oem_table_id = oem_table_id };
+
+bar0 = 

Re: [RFC PATCH 0/6] Removal of Aiocontext lock and usage of subtree drains in aborted transactions

2021-12-14 Thread Emanuele Giuseppe Esposito




On 13/12/2021 15:52, Stefan Hajnoczi wrote:

Off-topic: I don't understand the difference between the effects of
bdrv_drained_begin() and bdrv_subtree_drained_begin(). Both call
aio_disable_external(aio_context) and aio_poll(). bdrv_drained_begin()
only polls parents and itself, while bdrv_subtree_drained_begin() also
polls children. But why does that distinction matter? I wouldn't know
when to use one over the other.


Good point. Now I am wondering the same, so it would be great if anyone 
could clarify it.


Emanuele




Re: [PATCH for-7.0 0/6] target/arm: Implement LVA, LPA, LPA2 features

2021-12-14 Thread Richard Henderson

On 12/14/21 8:37 AM, Alex Bennée wrote:


Richard Henderson  writes:


These features are all related and relatively small.

Testing so far has been limited to booting a kernel
with 64k pages and VA and PA set to 52 bits, which
excercises LVA and LPA.


Do any distros ship with 64k pages that we could use for an avocado
test?


Well, RHEL 8 has 64k pages but with a 48-bit address space.  There are separate kernel 
configuration options for 52-bits.



r~



  1   2   3   >