[PATCH v2 09/10] dp8393x: fix CAM descriptor entry index

2021-06-24 Thread Mark Cave-Ayland
Currently when a LOAD CAM command is executed the entries are loaded into the
CAM from memory in order which is incorrect. According to the datasheet the
first entry in the CAM descriptor is the entry index which means that each
descriptor may update any single entry in the CAM rather than the Nth entry.

Decode the CAM entry index and use it store the descriptor in the appropriate
slot in the CAM. This fixes the issue where the MacOS toolbox loads a single
CAM descriptor into the final slot in order to perform a loopback test which
must succeed before the Ethernet port is enabled.

Signed-off-by: Mark Cave-Ayland 
---
 hw/net/dp8393x.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
index 6789bcd3af..172fd06694 100644
--- a/hw/net/dp8393x.c
+++ b/hw/net/dp8393x.c
@@ -270,7 +270,7 @@ static void dp8393x_update_irq(dp8393xState *s)
 static void dp8393x_do_load_cam(dp8393xState *s)
 {
 int width, size;
-uint16_t index = 0;
+uint16_t index;
 
 width = (s->regs[SONIC_DCR] & SONIC_DCR_DW) ? 2 : 1;
 size = sizeof(uint16_t) * 4 * width;
@@ -279,6 +279,7 @@ static void dp8393x_do_load_cam(dp8393xState *s)
 /* Fill current entry */
 address_space_read(&s->as, dp8393x_cdp(s),
MEMTXATTRS_UNSPECIFIED, s->data, size);
+index = dp8393x_get(s, width, 0) & 0xf;
 s->cam[index][0] = dp8393x_get(s, width, 1) & 0xff;
 s->cam[index][1] = dp8393x_get(s, width, 1) >> 8;
 s->cam[index][2] = dp8393x_get(s, width, 2) & 0xff;
@@ -291,7 +292,6 @@ static void dp8393x_do_load_cam(dp8393xState *s)
 /* Move to next entry */
 s->regs[SONIC_CDC]--;
 s->regs[SONIC_CDP] += size;
-index++;
 }
 
 /* Read CAM enable */
-- 
2.20.1




[PATCH v2 05/10] dp8393x: remove onboard PROM containing MAC address and checksum

2021-06-24 Thread Mark Cave-Ayland
According to the datasheet the dp8393x chipset does not contain any NVRAM 
capable
of storing a MAC address or checksum. Now that both the MIPS jazz and m68k q800
boards generate the PROM region and checksum themselves, remove the generated
PROM from the dp8393x device itself.

Signed-off-by: Mark Cave-Ayland 
---
 hw/net/dp8393x.c | 24 
 1 file changed, 24 deletions(-)

diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
index ea5b22f680..252c0a2664 100644
--- a/hw/net/dp8393x.c
+++ b/hw/net/dp8393x.c
@@ -30,8 +30,6 @@
 #include "qom/object.h"
 #include "trace.h"
 
-#define SONIC_PROM_SIZE 0x1000
-
 static const char *reg_names[] = {
 "CR", "DCR", "RCR", "TCR", "IMR", "ISR", "UTDA", "CTDA",
 "TPS", "TFC", "TSA0", "TSA1", "TFS", "URDA", "CRDA", "CRBA0",
@@ -157,7 +155,6 @@ struct dp8393xState {
 NICConf conf;
 NICState *nic;
 MemoryRegion mmio;
-MemoryRegion prom;
 
 /* Registers */
 uint8_t cam[16][6];
@@ -966,16 +963,12 @@ static void dp8393x_instance_init(Object *obj)
 dp8393xState *s = DP8393X(obj);
 
 sysbus_init_mmio(sbd, &s->mmio);
-sysbus_init_mmio(sbd, &s->prom);
 sysbus_init_irq(sbd, &s->irq);
 }
 
 static void dp8393x_realize(DeviceState *dev, Error **errp)
 {
 dp8393xState *s = DP8393X(dev);
-int i, checksum;
-uint8_t *prom;
-Error *local_err = NULL;
 
 address_space_init(&s->as, s->dma_mr, "dp8393x");
 memory_region_init_io(&s->mmio, OBJECT(dev), &dp8393x_ops, s,
@@ -986,23 +979,6 @@ static void dp8393x_realize(DeviceState *dev, Error **errp)
 qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
 
 s->watchdog = timer_new_ns(QEMU_CLOCK_VIRTUAL, dp8393x_watchdog, s);
-
-memory_region_init_rom(&s->prom, OBJECT(dev), "dp8393x-prom",
-   SONIC_PROM_SIZE, &local_err);
-if (local_err) {
-error_propagate(errp, local_err);
-return;
-}
-prom = memory_region_get_ram_ptr(&s->prom);
-checksum = 0;
-for (i = 0; i < 6; i++) {
-prom[i] = s->conf.macaddr.a[i];
-checksum += prom[i];
-if (checksum > 0xff) {
-checksum = (checksum + 1) & 0xff;
-}
-}
-prom[7] = 0xff - checksum;
 }
 
 static const VMStateDescription vmstate_dp8393x = {
-- 
2.20.1




[PATCH v2 02/10] dp8393x: convert to trace-events

2021-06-24 Thread Mark Cave-Ayland
Signed-off-by: Mark Cave-Ayland 
---
 hw/net/dp8393x.c| 55 +
 hw/net/trace-events | 17 ++
 2 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
index 56af08f0fe..ea5b22f680 100644
--- a/hw/net/dp8393x.c
+++ b/hw/net/dp8393x.c
@@ -28,14 +28,10 @@
 #include "qemu/timer.h"
 #include 
 #include "qom/object.h"
-
-/* #define DEBUG_SONIC */
+#include "trace.h"
 
 #define SONIC_PROM_SIZE 0x1000
 
-#ifdef DEBUG_SONIC
-#define DPRINTF(fmt, ...) \
-do { printf("sonic: " fmt , ##  __VA_ARGS__); } while (0)
 static const char *reg_names[] = {
 "CR", "DCR", "RCR", "TCR", "IMR", "ISR", "UTDA", "CTDA",
 "TPS", "TFC", "TSA0", "TSA1", "TFS", "URDA", "CRDA", "CRBA0",
@@ -45,12 +41,6 @@ static const char *reg_names[] = {
 "SR", "WT0", "WT1", "RSC", "CRCT", "FAET", "MPT", "MDT",
 "0x30", "0x31", "0x32", "0x33", "0x34", "0x35", "0x36", "0x37",
 "0x38", "0x39", "0x3a", "0x3b", "0x3c", "0x3d", "0x3e", "DCR2" };
-#else
-#define DPRINTF(fmt, ...) do {} while (0)
-#endif
-
-#define SONIC_ERROR(fmt, ...) \
-do { printf("sonic ERROR: %s: " fmt, __func__ , ## __VA_ARGS__); } while (0)
 
 #define SONIC_CR 0x00
 #define SONIC_DCR0x01
@@ -161,9 +151,7 @@ struct dp8393xState {
 bool big_endian;
 bool last_rba_is_full;
 qemu_irq irq;
-#ifdef DEBUG_SONIC
 int irq_level;
-#endif
 QEMUTimer *watchdog;
 int64_t wt_last_update;
 NICConf conf;
@@ -270,16 +258,14 @@ static void dp8393x_update_irq(dp8393xState *s)
 {
 int level = (s->regs[SONIC_IMR] & s->regs[SONIC_ISR]) ? 1 : 0;
 
-#ifdef DEBUG_SONIC
 if (level != s->irq_level) {
 s->irq_level = level;
 if (level) {
-DPRINTF("raise irq, isr is 0x%04x\n", s->regs[SONIC_ISR]);
+trace_dp8393x_raise_irq(s->regs[SONIC_ISR]);
 } else {
-DPRINTF("lower irq\n");
+trace_dp8393x_lower_irq();
 }
 }
-#endif
 
 qemu_set_irq(s->irq, level);
 }
@@ -302,9 +288,9 @@ static void dp8393x_do_load_cam(dp8393xState *s)
 s->cam[index][3] = dp8393x_get(s, width, 2) >> 8;
 s->cam[index][4] = dp8393x_get(s, width, 3) & 0xff;
 s->cam[index][5] = dp8393x_get(s, width, 3) >> 8;
-DPRINTF("load cam[%d] with %02x%02x%02x%02x%02x%02x\n", index,
-s->cam[index][0], s->cam[index][1], s->cam[index][2],
-s->cam[index][3], s->cam[index][4], s->cam[index][5]);
+trace_dp8393x_load_cam(index, s->cam[index][0], s->cam[index][1],
+   s->cam[index][2], s->cam[index][3],
+   s->cam[index][4], s->cam[index][5]);
 /* Move to next entry */
 s->regs[SONIC_CDC]--;
 s->regs[SONIC_CDP] += size;
@@ -315,7 +301,7 @@ static void dp8393x_do_load_cam(dp8393xState *s)
 address_space_read(&s->as, dp8393x_cdp(s),
MEMTXATTRS_UNSPECIFIED, s->data, size);
 s->regs[SONIC_CE] = dp8393x_get(s, width, 0);
-DPRINTF("load cam done. cam enable mask 0x%04x\n", s->regs[SONIC_CE]);
+trace_dp8393x_load_cam_done(s->regs[SONIC_CE]);
 
 /* Done */
 s->regs[SONIC_CR] &= ~SONIC_CR_LCAM;
@@ -338,9 +324,8 @@ static void dp8393x_do_read_rra(dp8393xState *s)
 s->regs[SONIC_CRBA1] = dp8393x_get(s, width, 1);
 s->regs[SONIC_RBWC0] = dp8393x_get(s, width, 2);
 s->regs[SONIC_RBWC1] = dp8393x_get(s, width, 3);
-DPRINTF("CRBA0/1: 0x%04x/0x%04x, RBWC0/1: 0x%04x/0x%04x\n",
-s->regs[SONIC_CRBA0], s->regs[SONIC_CRBA1],
-s->regs[SONIC_RBWC0], s->regs[SONIC_RBWC1]);
+trace_dp8393x_read_rra_regs(s->regs[SONIC_CRBA0], s->regs[SONIC_CRBA1],
+s->regs[SONIC_RBWC0], s->regs[SONIC_RBWC1]);
 
 /* Go to next entry */
 s->regs[SONIC_RRP] += size;
@@ -444,7 +429,7 @@ static void dp8393x_do_transmit_packets(dp8393xState *s)
 /* Read memory */
 size = sizeof(uint16_t) * 6 * width;
 s->regs[SONIC_TTDA] = s->regs[SONIC_CTDA];
-DPRINTF("Transmit packet at %08x\n", dp8393x_ttda(s));
+trace_dp8393x_transmit_packet(dp8393x_ttda(s));
 address_space_read(&s->as, dp8393x_ttda(s) + sizeof(uint16_t) * width,
MEMTXATTRS_UNSPECIFIED, s->data, size);
 tx_len = 0;
@@ -499,7 +484,7 @@ static void dp8393x_do_transmit_packets(dp8393xState *s)
 /* Remove existing FCS */
 tx_len -= 4;
 if (tx_len < 0) {
-SONIC_ERROR("tx_len is %d\n", tx_len);
+trace_dp8393x_transmit_txlen_error(tx_len);
 break;
 }
 }
@@ -618,7 +603,7 @@ static uint64_t dp8393x_read(void *opaque, hwaddr addr, 
unsigned int size)
 val = s->regs[reg];
 }
 
-DPRINTF("read 0x%04x from reg %s\n", val, reg_names[reg]);
+trace_dp8393x_read(reg, reg_names[reg], val, size);
 
 return s->big_endian ? val << 16 : val;
 }
@@ -630,7 +615,7 @@ st

[PATCH v2 04/10] hw/m68k/q800: move PROM and checksum calculation from dp8393x device to board

2021-06-24 Thread Mark Cave-Ayland
This is in preparation for each board to have its own separate bit storage
format and checksum for storing the MAC address.

Signed-off-by: Mark Cave-Ayland 
---
 hw/m68k/q800.c | 24 ++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/hw/m68k/q800.c b/hw/m68k/q800.c
index 11376daa85..491f283a17 100644
--- a/hw/m68k/q800.c
+++ b/hw/m68k/q800.c
@@ -70,6 +70,8 @@
 #define NUBUS_SUPER_SLOT_BASE 0x6000
 #define NUBUS_SLOT_BASE   0xf000
 
+#define SONIC_PROM_SIZE   0x1000
+
 /*
  * the video base, whereas it a Nubus address,
  * is needed by the kernel to have early display and
@@ -211,8 +213,10 @@ static void q800_init(MachineState *machine)
 int32_t initrd_size;
 MemoryRegion *rom;
 MemoryRegion *io;
+MemoryRegion *dp8393x_prom = g_new(MemoryRegion, 1);
+uint8_t *prom;
 const int io_slice_nb = (IO_SIZE / IO_SLICE) - 1;
-int i;
+int i, checksum;
 ram_addr_t ram_size = machine->ram_size;
 const char *kernel_filename = machine->kernel_filename;
 const char *initrd_filename = machine->initrd_filename;
@@ -319,9 +323,25 @@ static void q800_init(MachineState *machine)
 sysbus = SYS_BUS_DEVICE(dev);
 sysbus_realize_and_unref(sysbus, &error_fatal);
 sysbus_mmio_map(sysbus, 0, SONIC_BASE);
-sysbus_mmio_map(sysbus, 1, SONIC_PROM_BASE);
 sysbus_connect_irq(sysbus, 0, qdev_get_gpio_in(glue, 2));
 
+memory_region_init_rom(dp8393x_prom, NULL, "dp8393x-q800.prom",
+   SONIC_PROM_SIZE, &error_fatal);
+memory_region_add_subregion(get_system_memory(), SONIC_PROM_BASE,
+dp8393x_prom);
+
+/* Add MAC address with valid checksum to PROM */
+prom = memory_region_get_ram_ptr(dp8393x_prom);
+checksum = 0;
+for (i = 0; i < 6; i++) {
+prom[i] = nd_table[0].macaddr.a[i];
+checksum += prom[i];
+if (checksum > 0xff) {
+checksum = (checksum + 1) & 0xff;
+}
+}
+prom[7] = 0xff - checksum;
+
 /* SCC */
 
 dev = qdev_new(TYPE_ESCC);
-- 
2.20.1




[PATCH v2 06/10] qemu/bitops.h: add bitrev8 implementation

2021-06-24 Thread Mark Cave-Ayland
This will be required for an upcoming checksum calculation.

Signed-off-by: Mark Cave-Ayland 
---
 include/qemu/bitops.h | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
index 03213ce952..110c56e099 100644
--- a/include/qemu/bitops.h
+++ b/include/qemu/bitops.h
@@ -618,4 +618,26 @@ static inline uint64_t half_unshuffle64(uint64_t x)
 return x;
 }
 
+/**
+ * bitrev8:
+ * @x: 8-bit value to be reversed
+ *
+ * Given an input value with bits::
+ *
+ *   ABCDEFGH
+ *
+ * return the value with its bits reversed from left to right::
+ *
+ *   HGFEDCBA
+ *
+ * Returns: the bit-reversed value.
+ */
+static inline uint8_t bitrev8(uint8_t x)
+{
+x = ((x >> 1) & 0x55) | ((x << 1) & 0xaa);
+x = ((x >> 2) & 0x33) | ((x << 2) & 0xcc);
+x = (x >> 4) | (x << 4) ;
+return x;
+}
+
 #endif
-- 
2.20.1




[PATCH v2 03/10] hw/mips/jazz: move PROM and checksum calculation from dp8393x device to board

2021-06-24 Thread Mark Cave-Ayland
This is in preparation for each board to have its own separate bit storage
format and checksum for storing the MAC address.

Signed-off-by: Mark Cave-Ayland 
---
 hw/mips/jazz.c | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/hw/mips/jazz.c b/hw/mips/jazz.c
index 1e1cf8154e..89ca8bb910 100644
--- a/hw/mips/jazz.c
+++ b/hw/mips/jazz.c
@@ -119,6 +119,8 @@ static const MemoryRegionOps dma_dummy_ops = {
 #define MAGNUM_BIOS_SIZE   
\
 (BIOS_SIZE < MAGNUM_BIOS_SIZE_MAX ? BIOS_SIZE : MAGNUM_BIOS_SIZE_MAX)
 
+#define SONIC_PROM_SIZE 0x1000
+
 static void mips_jazz_init(MachineState *machine,
enum jazz_model_e jazz_model)
 {
@@ -137,6 +139,7 @@ static void mips_jazz_init(MachineState *machine,
 MemoryRegion *rtc = g_new(MemoryRegion, 1);
 MemoryRegion *i8042 = g_new(MemoryRegion, 1);
 MemoryRegion *dma_dummy = g_new(MemoryRegion, 1);
+MemoryRegion *dp8393x_prom = g_new(MemoryRegion, 1);
 NICInfo *nd;
 DeviceState *dev, *rc4030;
 SysBusDevice *sysbus;
@@ -228,6 +231,10 @@ static void mips_jazz_init(MachineState *machine,
   NULL, "dummy_dma", 0x1000);
 memory_region_add_subregion(address_space, 0x8000d000, dma_dummy);
 
+memory_region_init_rom(dp8393x_prom, NULL, "dp8393x-jazz.prom",
+   SONIC_PROM_SIZE, &error_fatal);
+memory_region_add_subregion(address_space, 0x8000b000, dp8393x_prom);
+
 /* ISA bus: IO space at 0x9000, mem space at 0x9100 */
 memory_region_init(isa_io, NULL, "isa-io", 0x0001);
 memory_region_init(isa_mem, NULL, "isa-mem", 0x0100);
@@ -275,6 +282,9 @@ static void mips_jazz_init(MachineState *machine,
 nd->model = g_strdup("dp83932");
 }
 if (strcmp(nd->model, "dp83932") == 0) {
+int checksum, i;
+uint8_t *prom;
+
 qemu_check_nic_model(nd, "dp83932");
 
 dev = qdev_new("dp8393x");
@@ -285,8 +295,19 @@ static void mips_jazz_init(MachineState *machine,
 sysbus = SYS_BUS_DEVICE(dev);
 sysbus_realize_and_unref(sysbus, &error_fatal);
 sysbus_mmio_map(sysbus, 0, 0x80001000);
-sysbus_mmio_map(sysbus, 1, 0x8000b000);
 sysbus_connect_irq(sysbus, 0, qdev_get_gpio_in(rc4030, 4));
+
+/* Add MAC address with valid checksum to PROM */
+prom = memory_region_get_ram_ptr(dp8393x_prom);
+checksum = 0;
+for (i = 0; i < 6; i++) {
+prom[i] = nd->macaddr.a[i];
+checksum += prom[i];
+if (checksum > 0xff) {
+checksum = (checksum + 1) & 0xff;
+}
+}
+prom[7] = 0xff - checksum;
 break;
 } else if (is_help_option(nd->model)) {
 error_report("Supported NICs: dp83932");
-- 
2.20.1




[PATCH v2 00/10] dp8393x: fixes for MacOS toolbox ROM

2021-06-24 Thread Mark Cave-Ayland
Here is the next set of patches from my attempts to boot MacOS under QEMU's
Q800 machine related to the Sonic network adapter.

Patches 1 and 2 sort out checkpatch and convert from DPRINTF macros to
trace-events.

The discussion for the v1 patchset concluded that the dp8393x device does
NOT have its own NVRAM (there is no mention of it on the datasheet) and so
patches 3 to 5 move the generation of the PROM to the q800 and jazz boards
separately to allow the formats to diverge.

Patch 6 adds an implementation of bitrev8 to bitops.h in preparation for
changing the q800 PROM storage format, whilst patch 7 updates the MAC address
storage and checksum for the q800 machine to match the format expected by the
MacOS toolbox ROM.

Patch 8 ensures that the CPU loads/stores are correctly converted to 16-bit
accesses for the network card and patch 9 fixes a bug when selecting the
index specified for CAM entries.

Finally since the MIPS magnum machine exists for both big-endian (mips64) and
little-endian (mips64el) configurations, patch 10 sets the dp8393x big_endian
property accordingly using a similar technique already used for the MIPS malta
machines.

Migration notes: the changes to the dp8393x PROM are a migration break, but we
don't care about this for now since a) the q800 machine will have more
breaking migration changes as further MacOS toolbox ROM support is upstreamed
and b) the magnum machine migration is currently broken (and has been for
quite some time).

Signed-off-by: Mark Cave-Ayland 


v2:
- Move PROM generation from dp8393x to q800 and magnum machines and remove
  the existing code from the device itself
- Add bitrev8 implementation to bitops.h so it can be used elsewhere in
  future. Use a shift/merge technique rather than a massive table lookup
  as we don't care about speed
- Add patch to set the big_endian property correctly depending upon whether
  a big-endian or little-endian configuration is being used


Mark Cave-Ayland (10):
  dp8393x: checkpatch fixes
  dp8393x: convert to trace-events
  hw/mips/jazz: move PROM and checksum calculation from dp8393x device
to board
  hw/m68k/q800: move PROM and checksum calculation from dp8393x device
to board
  dp8393x: remove onboard PROM containing MAC address and checksum
  qemu/bitops.h: add bitrev8 implementation
  hw/m68k/q800: fix PROM checksum and MAC address storage
  dp8393x: don't force 32-bit register access
  dp8393x: fix CAM descriptor entry index
  hw/mips/jazz: specify correct endian for dp8393x device

 hw/m68k/q800.c|  21 ++-
 hw/mips/jazz.c|  32 -
 hw/net/dp8393x.c  | 313 +++---
 hw/net/trace-events   |  17 +++
 include/qemu/bitops.h |  22 +++
 5 files changed, 231 insertions(+), 174 deletions(-)

-- 
2.20.1




[PATCH v2 01/10] dp8393x: checkpatch fixes

2021-06-24 Thread Mark Cave-Ayland
Also fix a simple comment typo of "constrainst" to "constraints".

Signed-off-by: Mark Cave-Ayland 
---
 hw/net/dp8393x.c | 231 +--
 1 file changed, 122 insertions(+), 109 deletions(-)

diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
index 533a8304d0..56af08f0fe 100644
--- a/hw/net/dp8393x.c
+++ b/hw/net/dp8393x.c
@@ -29,14 +29,14 @@
 #include 
 #include "qom/object.h"
 
-//#define DEBUG_SONIC
+/* #define DEBUG_SONIC */
 
 #define SONIC_PROM_SIZE 0x1000
 
 #ifdef DEBUG_SONIC
 #define DPRINTF(fmt, ...) \
 do { printf("sonic: " fmt , ##  __VA_ARGS__); } while (0)
-static const char* reg_names[] = {
+static const char *reg_names[] = {
 "CR", "DCR", "RCR", "TCR", "IMR", "ISR", "UTDA", "CTDA",
 "TPS", "TFC", "TSA0", "TSA1", "TFS", "URDA", "CRDA", "CRBA0",
 "CRBA1", "RBWC0", "RBWC1", "EOBC", "URRA", "RSA", "REA", "RRP",
@@ -185,7 +185,8 @@ struct dp8393xState {
 AddressSpace as;
 };
 
-/* Accessor functions for values which are formed by
+/*
+ * Accessor functions for values which are formed by
  * concatenating two 16 bit device registers. By putting these
  * in their own functions with a uint32_t return type we avoid the
  * pitfall of implicit sign extension where ((x << 16) | y) is a
@@ -350,8 +351,7 @@ static void dp8393x_do_read_rra(dp8393xState *s)
 }
 
 /* Warn the host if CRBA now has the last available resource */
-if (s->regs[SONIC_RRP] == s->regs[SONIC_RWP])
-{
+if (s->regs[SONIC_RRP] == s->regs[SONIC_RWP]) {
 s->regs[SONIC_ISR] |= SONIC_ISR_RBE;
 dp8393x_update_irq(s);
 }
@@ -364,7 +364,8 @@ static void dp8393x_do_software_reset(dp8393xState *s)
 {
 timer_del(s->watchdog);
 
-s->regs[SONIC_CR] &= ~(SONIC_CR_LCAM | SONIC_CR_RRRA | SONIC_CR_TXP | 
SONIC_CR_HTX);
+s->regs[SONIC_CR] &= ~(SONIC_CR_LCAM | SONIC_CR_RRRA | SONIC_CR_TXP |
+   SONIC_CR_HTX);
 s->regs[SONIC_CR] |= SONIC_CR_RST | SONIC_CR_RXDIS;
 }
 
@@ -490,8 +491,10 @@ static void dp8393x_do_transmit_packets(dp8393xState *s)
 
 /* Handle Ethernet checksum */
 if (!(s->regs[SONIC_TCR] & SONIC_TCR_CRCI)) {
-/* Don't append FCS there, to look like slirp packets
- * which don't have one */
+/*
+ * Don't append FCS there, to look like slirp packets
+ * which don't have one
+ */
 } else {
 /* Remove existing FCS */
 tx_len -= 4;
@@ -558,26 +561,34 @@ static void dp8393x_do_command(dp8393xState *s, uint16_t 
command)
 
 s->regs[SONIC_CR] |= (command & SONIC_CR_MASK);
 
-if (command & SONIC_CR_HTX)
+if (command & SONIC_CR_HTX) {
 dp8393x_do_halt_transmission(s);
-if (command & SONIC_CR_TXP)
+}
+if (command & SONIC_CR_TXP) {
 dp8393x_do_transmit_packets(s);
-if (command & SONIC_CR_RXDIS)
+}
+if (command & SONIC_CR_RXDIS) {
 dp8393x_do_receiver_disable(s);
-if (command & SONIC_CR_RXEN)
+}
+if (command & SONIC_CR_RXEN) {
 dp8393x_do_receiver_enable(s);
-if (command & SONIC_CR_STP)
+}
+if (command & SONIC_CR_STP) {
 dp8393x_do_stop_timer(s);
-if (command & SONIC_CR_ST)
+}
+if (command & SONIC_CR_ST) {
 dp8393x_do_start_timer(s);
-if (command & SONIC_CR_RST)
+}
+if (command & SONIC_CR_RST) {
 dp8393x_do_software_reset(s);
+}
 if (command & SONIC_CR_RRRA) {
 dp8393x_do_read_rra(s);
 s->regs[SONIC_CR] &= ~SONIC_CR_RRRA;
 }
-if (command & SONIC_CR_LCAM)
+if (command & SONIC_CR_LCAM) {
 dp8393x_do_load_cam(s);
+}
 }
 
 static uint64_t dp8393x_read(void *opaque, hwaddr addr, unsigned int size)
@@ -587,24 +598,24 @@ static uint64_t dp8393x_read(void *opaque, hwaddr addr, 
unsigned int size)
 uint16_t val = 0;
 
 switch (reg) {
-/* Update data before reading it */
-case SONIC_WT0:
-case SONIC_WT1:
-dp8393x_update_wt_regs(s);
-val = s->regs[reg];
-break;
-/* Accept read to some registers only when in reset mode */
-case SONIC_CAP2:
-case SONIC_CAP1:
-case SONIC_CAP0:
-if (s->regs[SONIC_CR] & SONIC_CR_RST) {
-val = s->cam[s->regs[SONIC_CEP] & 0xf][2* (SONIC_CAP0 - reg) + 
1] << 8;
-val |= s->cam[s->regs[SONIC_CEP] & 0xf][2* (SONIC_CAP0 - reg)];
-}
-break;
-/* All other registers have no special contrainst */
-default:
-val = s->regs[reg];
+/* Update data before reading it */
+case SONIC_WT0:
+case SONIC_WT1:
+dp8393x_update_wt_regs(s);
+val = s->regs[reg];
+break;
+/* Accept read to some registers only when in reset mode */
+case SONIC_CAP2:
+case SONIC_CAP1:
+case SONIC_CAP0:
+if (s->regs[SONIC_CR] & SONIC_CR_RST) {
+val = s->cam[s->regs[SONIC_CEP] & 0xf][2 *

[v4] migration: fix the memory overwriting risk in add_to_iovec

2021-06-24 Thread Lin Feng
From: Feng Lin 

When testing migration, a Segmentation fault qemu core is generated.
0  error_free (err=0x1)
1  0x7f8b862df647 in qemu_fclose (f=f@entry=0x55e06c247640)
2  0x7f8b8516d59a in migrate_fd_cleanup (s=s@entry=0x55e06c0e1ef0)
3  0x7f8b8516d66c in migrate_fd_cleanup_bh (opaque=0x55e06c0e1ef0)
4  0x7f8b8626a47f in aio_bh_poll (ctx=ctx@entry=0x55e06b5a16d0)
5  0x7f8b8626e71f in aio_dispatch (ctx=0x55e06b5a16d0)
6  0x7f8b8626a33d in aio_ctx_dispatch (source=, 
callback=, user_data=)
7  0x7f8b866bdba4 in g_main_context_dispatch ()
8  0x7f8b8626cde9 in glib_pollfds_poll ()
9  0x7f8b8626ce62 in os_host_main_loop_wait (timeout=)
10 0x7f8b8626cffd in main_loop_wait (nonblocking=nonblocking@entry=0)
11 0x7f8b862ef01f in main_loop ()
Using gdb print the struct QEMUFile f = {
  ...,
  iovcnt = 65, last_error = 21984,
  last_error_obj = 0x1, shutdown = true
}
Well iovcnt is overflow, because the max size of MAX_IOV_SIZE is 64.
struct QEMUFile {
...;
struct iovec iov[MAX_IOV_SIZE];
unsigned int iovcnt;
int last_error;
Error *last_error_obj;
bool shutdown;
};
iovcnt and last_error is overwrited by add_to_iovec().
Right now, add_to_iovec() increase iovcnt before check the limit.
And it seems that add_to_iovec() assumes that iovcnt will set to zero
in qemu_fflush(). But qemu_fflush() will directly return when f->shutdown
is true.

The situation may occur when libvirtd restart during migration, after
f->shutdown is set, before calling qemu_file_set_error() in
qemu_file_shutdown().

So the safiest way is checking the iovcnt before increasing it.

Signed-off-by: Feng Lin 
---
 migration/qemu-file.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index d6e03dbc0e..6879615197 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -416,6 +416,11 @@ static int add_to_iovec(QEMUFile *f, const uint8_t *buf, 
size_t size,
 {
 f->iov[f->iovcnt - 1].iov_len += size;
 } else {
+if (f->iovcnt >= MAX_IOV_SIZE) {
+/* Should only happen if a previous fflush failed */
+assert(f->shutdown || !qemu_file_is_writeable(f));
+return 1;
+}
 if (may_free) {
 set_bit(f->iovcnt, f->may_free);
 }
-- 
2.23.0




Re: [PATCH 0/5] dp8393x: fixes for MacOS toolbox ROM

2021-06-24 Thread Mark Cave-Ayland

On 25/06/2021 05:36, Finn Thain wrote:


On Thu, 24 Jun 2021, Mark Cave-Ayland wrote:


Thanks for the link and the detailed testing information. I've been
trying to understand why you had to set the MAC address in the ARC
firmware so I had a bit of an experiment here.

The reason that you need to do this is because of the NVRAM
configuration in your command line, in particular -global
ds1225y.size=8200. What this does is extend the NVRAM over the top of
the dp8393x-prom area where QEMU places the NIC MAC address and checksum
on startup, so the NVRAM captures the MAC address reads/writes instead.
The net effect of this is that the empty NVRAM initially reads all zeros
and why an initial setup is required to set the MAC address.

This can be seen quite clearly in the "info mtree" output:

 80009000-8000b007 (prio 0, i/o): nvram
 8000b000-8000bfff (prio 0, rom): dp8393x-prom

However if you completely drop -global ds1225y.size=8200 from your
command line then the NVRAM doesn't overrun into the dp8393x-prom area,
and the ARC firmware picks up the MAC address from QEMU correctly:

 80009000-8000afff (prio 0, i/o): nvram
 8000b000-8000bfff (prio 0, rom): dp8393x-prom

I've also looked over the entire SONIC datasheet to see if the PROM
format is documented, and according to that there is no non-volatile
storage available on the chip itself.


Yes, that's my understanding also. The relevant National Semicondutor
Application Notes seem to include a separate PROM. And if you closely
examine the Linux macsonic.c driver, you'll see that the PowerBook 5x0
models get a random MAC address because no-one (outside of Apple) knows
where the real MAC address is stored.


Agreed. This means that the revised patchset should now be doing the right 
thing here.

FWIW I felt that it had changed too much in its latest form to include your original 
Tested-by tag due to the extra PROM changes, so I'd be grateful if you could give it 
a quick test.



Testing shows that the checksum algorithm currently used for the dp8393x
device generates the same result as that generated by the ARC firmware,
which is known to be different than that used by the Q800 machine.

 From this I conclude that the PROM is provided by the board and not the
chipset, and therefore each machine should construct its own PROM
accordingly. I'll send a v2 patchset shortly with these changes which
shall also include the proposed endian patch.



If you potentially have both a ds1225y NVRAM and a dp8393x PROM (for the
magnum machine) how do you avoid ending up with conflicting state? Would
the two storage devices have to be mutually exclusive?


The ds1225y NVRAM is located between 0x80009000-0x8000afff and running the nvram file 
through hexdump shows only the first 0x1000 bytes contain any data, so any other 
changes made to NVRAM via the ARC firmware setup will be preserved.


The existing default behaviour (without -global ds1225y.size=8200) is that only the 
last few bytes at 0x8000b000 are mapped to the dp8393x PROM, and this area is marked 
read-only to ensure that the MAC address obtained by the guest OS always matches the 
one provided by the QEMU configuration.



ATB,

Mark.



[PATCH qemu v22] spapr: Implement Open Firmware client interface

2021-06-24 Thread Alexey Kardashevskiy
The PAPR platform describes an OS environment that's presented by
a combination of a hypervisor and firmware. The features it specifies
require collaboration between the firmware and the hypervisor.

Since the beginning, the runtime component of the firmware (RTAS) has
been implemented as a 20 byte shim which simply forwards it to
a hypercall implemented in qemu. The boot time firmware component is
SLOF - but a build that's specific to qemu, and has always needed to be
updated in sync with it. Even though we've managed to limit the amount
of runtime communication we need between qemu and SLOF, there's some,
and it has become increasingly awkward to handle as we've implemented
new features.

This implements a boot time OF client interface (CI) which is
enabled by a new "x-vof" pseries machine option (stands for "Virtual Open
Firmware). When enabled, QEMU implements the custom H_OF_CLIENT hcall
which implements Open Firmware Client Interface (OF CI). This allows
using a smaller stateless firmware which does not have to manage
the device tree.

The new "vof.bin" firmware image is included with source code under
pc-bios/. It also includes RTAS blob.

This implements a handful of CI methods just to get -kernel/-initrd
working. In particular, this implements the device tree fetching and
simple memory allocator - "claim" (an OF CI memory allocator) and updates
"/memory@0/available" to report the client about available memory.

This implements changing some device tree properties which we know how
to deal with, the rest is ignored. To allow changes, this skips
fdt_pack() when x-vof=on as not packing the blob leaves some room for
appending.

In absence of SLOF, this assigns phandles to device tree nodes to make
device tree traversing work.

When x-vof=on, this adds "/chosen" every time QEMU (re)builds a tree.

This adds basic instances support which are managed by a hash map
ihandle -> [phandle].

Before the guest started, the used memory is:
0..e60 - the initial firmware
8000..1 - stack
40.. - kernel
3ea.. - initramdisk

This OF CI does not implement "interpret".

Unlike SLOF, this does not format uninitialized nvram. Instead, this
includes a disk image with pre-formatted nvram.

With this basic support, this can only boot into kernel directly.
However this is just enough for the petitboot kernel and initradmdisk to
boot from any possible source. Note this requires reasonably recent guest
kernel with:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=df5be5be8735

The immediate benefit is much faster booting time which especially
crucial with fully emulated early CPU bring up environments. Also this
may come handy when/if GRUB-in-the-userspace sees light of the day.

This separates VOF and sPAPR in a hope that VOF bits may be reused by
other POWERPC boards which do not support pSeries.

This assumes potential support for booting from QEMU backends
such as blockdev or netdev without devices/drivers used.

Signed-off-by: Alexey Kardashevskiy 
---

The example command line is:

/home/aik/pbuild/qemu-killslof-localhost-ppc64/qemu-system-ppc64 \
-nodefaults \
-chardev stdio,id=STDIO0,signal=off,mux=on \
-device spapr-vty,id=svty0,reg=0x71000110,chardev=STDIO0 \
-mon id=MON0,chardev=STDIO0,mode=readline \
-nographic \
-vga none \
-enable-kvm \
-m 8G \
-machine 
pseries,x-vof=on,cap-cfpc=broken,cap-sbbc=broken,cap-ibs=broken,cap-ccf-assist=off
 \
-kernel pbuild/kernel-le-guest/vmlinux \
-initrd pb/rootfs.cpio.xz \
-drive 
id=DRIVE0,if=none,file=./p/qemu-killslof/pc-bios/vof-nvram.bin,format=raw \
-global spapr-nvram.drive=DRIVE0 \
-snapshot \
-smp 8,threads=8 \
-L /home/aik/t/qemu-ppc64-bios/ \
-trace events=qemu_trace_events \
-d guest_errors \
-chardev socket,id=SOCKET0,server,nowait,path=qemu.mon.tmux26 \
-mon chardev=SOCKET0,mode=control

---
Changes:
v22:
* dropped changes to ./configure and compile VOF always
* added various comments
* style fixes

v21:
* s/ld/lwz/ in entry.S
* moved CONFIG_VOF from default-configs/devices/ppc64-softmmu.mak to Kconfig
* made CONFIG_VOF optional
* s/l.lds/vof.lds/
* force 32 BE in spapr_machine_reset() instead of the firmware
* added checks for non-null methods of VofMachineIfClass
* moved OF_STACK_SIZE to vof.h, renamed to VOF_..., added a better comment
* added  path_offset wrapper for handling mixed case for addresses
after "@" in node names
* changed getprop() to check for actual "name" property in the fdt
* moved VOF_MEM_READ/VOF_MEM_WRITE to vof.h for sharing as (unlike similar
rtas_ld/ldl_be_*) they return error codes
* VOF_MEM_READ uses now address_space_read (it was address_space_read_full
before, not sure why)

v20:
* compile vof.bin with -mcpu=power4 for better compatibility
* s/std/stw/ in entry.S to make it work on ppc32
* fixed dt_available property to support both 32 and 64bit
* shuffled prom_args handling code
* do not enforce 32bit in MSR (again, to support 32bit platforms)

v19:
* put bootargs in the FDT
* moved setting prope

[PATCH v2 3/6] tests/acceptance: add replay kernel test for openrisc

2021-06-24 Thread Pavel Dovgalyuk
This patch adds record/replay test which boots Linux
kernel on openrisc platform. The test uses kernel binaries
taken from boot_linux_console test.

Signed-off-by: Pavel Dovgalyuk 
Reviewed-by: Willian Rampazzo 
---
 tests/acceptance/replay_kernel.py |   11 +++
 1 file changed, 11 insertions(+)

diff --git a/tests/acceptance/replay_kernel.py 
b/tests/acceptance/replay_kernel.py
index 7e7f4c8ccc..4c682b3f29 100644
--- a/tests/acceptance/replay_kernel.py
+++ b/tests/acceptance/replay_kernel.py
@@ -319,6 +319,17 @@ def test_ppc64_e500(self):
 file_path = self.fetch_asset(tar_url, asset_hash=tar_hash)
 self.do_test_advcal_2018(file_path, 'uImage', ('-cpu', 'e5500'))
 
+def test_or1k_sim(self):
+"""
+:avocado: tags=arch:or1k
+:avocado: tags=machine:or1k-sim
+"""
+tar_hash = '20334cdaf386108c530ff0badaecc955693027dd'
+tar_url = ('https://www.qemu-advent-calendar.org'
+   '/2018/download/day20.tar.xz')
+file_path = self.fetch_asset(tar_url, asset_hash=tar_hash)
+self.do_test_advcal_2018(file_path, 'vmlinux')
+
 def test_ppc_g3beige(self):
 """
 :avocado: tags=arch:ppc




[PATCH v2 2/6] tests/acceptance: add replay kernel test for ppc64

2021-06-24 Thread Pavel Dovgalyuk
This patch adds record/replay test which boots Linux
kernel on ppc64 platform. The test uses kernel binaries
taken from boot_linux_console test.

Signed-off-by: Pavel Dovgalyuk 
---
 tests/acceptance/replay_kernel.py |   13 +
 1 file changed, 13 insertions(+)

diff --git a/tests/acceptance/replay_kernel.py 
b/tests/acceptance/replay_kernel.py
index cdc22cb6d3..7e7f4c8ccc 100644
--- a/tests/acceptance/replay_kernel.py
+++ b/tests/acceptance/replay_kernel.py
@@ -367,6 +367,19 @@ def test_xtensa_lx60(self):
 self.do_test_advcal_2018(file_path, 'santas-sleigh-ride.elf',
  args=('-cpu', 'dc233c'))
 
+def test_ppc64_e500(self):
+"""
+:avocado: tags=arch:ppc64
+:avocado: tags=machine:ppce500
+:avocado: tags=cpu:e5500
+"""
+tar_hash = '6951d86d644b302898da2fd701739c9406527fe1'
+tar_url = ('https://www.qemu-advent-calendar.org'
+   '/2018/download/day19.tar.xz')
+file_path = self.fetch_asset(tar_url, asset_hash=tar_hash)
+self.do_test_advcal_2018(file_path, 'uImage', ('-cpu', 'e5500'))
+
+
 @skipUnless(os.getenv('AVOCADO_TIMEOUT_EXPECTED'), 'Test might timeout')
 class ReplayKernelSlow(ReplayKernelBase):
 # Override the timeout, because this kernel includes an inner




[PATCH v2 5/6] tests/acceptance: add replay kernel test for alpha

2021-06-24 Thread Pavel Dovgalyuk
This patch adds record/replay test which boots Linux
kernel on alpha platform. The test uses kernel binaries
taken from boot_linux_console test.

Signed-off-by: Pavel Dovgalyuk 
Reviewed-by: Willian Rampazzo 
---
 tests/acceptance/replay_kernel.py |   17 +
 1 file changed, 17 insertions(+)

diff --git a/tests/acceptance/replay_kernel.py 
b/tests/acceptance/replay_kernel.py
index ba2314d2ec..76af30b28b 100644
--- a/tests/acceptance/replay_kernel.py
+++ b/tests/acceptance/replay_kernel.py
@@ -224,6 +224,23 @@ def test_s390x_s390_ccw_virtio(self):
 self.run_rr(kernel_path, kernel_command_line, console_pattern, shift=9,
 args=('-nodefaults', '-smp', '1'))
 
+def test_alpha_clipper(self):
+"""
+:avocado: tags=arch:alpha
+:avocado: tags=machine:clipper
+"""
+kernel_url = ('http://archive.debian.org/debian/dists/lenny/main/'
+  'installer-alpha/20090123lenny10/images/cdrom/vmlinuz')
+kernel_hash = '3a943149335529e2ed3e74d0d787b85fb5671ba3'
+kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
+
+uncompressed_kernel = archive.uncompress(kernel_path, self.workdir)
+
+kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + 'console=ttyS0'
+console_pattern = 'Kernel command line: %s' % kernel_command_line
+self.run_rr(uncompressed_kernel, kernel_command_line, console_pattern, 
shift=9,
+args=('-nodefaults', ))
+
 def test_ppc64_pseries(self):
 """
 :avocado: tags=arch:ppc64




[PATCH v2 6/6] tests/acceptance: Linux boot test for record/replay

2021-06-24 Thread Pavel Dovgalyuk
From: Pavel Dovgalyuk 

This patch adds a test for record/replay, which boots Linux
image from the disk and interacts with the network.
The idea and code of this test is borrowed from boot_linux.py
This test includes only x86_64 platform. Other platforms and
machines will be added later after testing and improving
record/replay to completely support them.

Each test consists of the following phases:
 - downloading the disk image
 - recording the execution
 - replaying the execution

Replay does not validates the output, but waits until QEMU
finishes the execution. This is reasonable, because
QEMU usually hangs when replay goes wrong.

Signed-off-by: Pavel Dovgalyuk 
Reviewed-by: Willian Rampazzo 
---
 MAINTAINERS  |1 
 tests/acceptance/replay_linux.py |  116 ++
 2 files changed, 117 insertions(+)
 create mode 100644 tests/acceptance/replay_linux.py

diff --git a/MAINTAINERS b/MAINTAINERS
index 0ca6b7de94..35ecb2ce2c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2864,6 +2864,7 @@ F: include/sysemu/replay.h
 F: docs/replay.txt
 F: stubs/replay.c
 F: tests/acceptance/replay_kernel.py
+F: tests/acceptance/replay_linux.py
 F: tests/acceptance/reverse_debugging.py
 F: qapi/replay.json
 
diff --git a/tests/acceptance/replay_linux.py b/tests/acceptance/replay_linux.py
new file mode 100644
index 00..15953f9e49
--- /dev/null
+++ b/tests/acceptance/replay_linux.py
@@ -0,0 +1,116 @@
+# Record/replay test that boots a complete Linux system via a cloud image
+#
+# Copyright (c) 2020 ISP RAS
+#
+# Author:
+#  Pavel Dovgalyuk 
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+import os
+import logging
+import time
+
+from avocado import skipUnless
+from avocado.utils import cloudinit
+from avocado.utils import network
+from avocado.utils import vmimage
+from avocado.utils import datadrainer
+from avocado.utils.path import find_command
+from avocado_qemu import LinuxTest
+
+class ReplayLinux(LinuxTest):
+"""
+Boots a Linux system, checking for a successful initialization
+"""
+
+timeout = 1800
+chksum = None
+hdd = 'ide-hd'
+cd = 'ide-cd'
+bus = 'ide'
+
+def setUp(self):
+super(ReplayLinux, self).setUp()
+self.boot_path = self.download_boot()
+self.cloudinit_path = self.prepare_cloudinit()
+
+def vm_add_disk(self, vm, path, id, device):
+bus_string = ''
+if self.bus:
+bus_string = ',bus=%s.%d' % (self.bus, id,)
+vm.add_args('-drive', 'file=%s,snapshot,id=disk%s,if=none' % (path, 
id))
+vm.add_args('-drive',
+'driver=blkreplay,id=disk%s-rr,if=none,image=disk%s' % (id, id))
+vm.add_args('-device',
+'%s,drive=disk%s-rr%s' % (device, id, bus_string))
+
+def launch_and_wait(self, record, args, shift):
+vm = self.get_vm()
+vm.add_args('-smp', '1')
+vm.add_args('-m', '1024')
+vm.add_args('-object', 'filter-replay,id=replay,netdev=hub0port0')
+if args:
+vm.add_args(*args)
+self.vm_add_disk(vm, self.boot_path, 0, self.hdd)
+self.vm_add_disk(vm, self.cloudinit_path, 1, self.cd)
+logger = logging.getLogger('replay')
+if record:
+logger.info('recording the execution...')
+mode = 'record'
+else:
+logger.info('replaying the execution...')
+mode = 'replay'
+replay_path = os.path.join(self.workdir, 'replay.bin')
+vm.add_args('-icount', 'shift=%s,rr=%s,rrfile=%s' %
+(shift, mode, replay_path))
+
+start_time = time.time()
+
+vm.set_console()
+vm.launch()
+console_drainer = datadrainer.LineLogger(vm.console_socket.fileno(),
+logger=self.log.getChild('console'),
+stop_check=(lambda : not vm.is_running()))
+console_drainer.start()
+if record:
+cloudinit.wait_for_phone_home(('0.0.0.0', self.phone_home_port),
+  self.name)
+vm.shutdown()
+logger.info('finished the recording with log size %s bytes'
+% os.path.getsize(replay_path))
+else:
+vm.event_wait('SHUTDOWN', self.timeout)
+vm.shutdown(True)
+logger.info('successfully fihished the replay')
+elapsed = time.time() - start_time
+logger.info('elapsed time %.2f sec' % elapsed)
+return elapsed
+
+def run_rr(self, args=None, shift=7):
+t1 = self.launch_and_wait(True, args, shift)
+t2 = self.launch_and_wait(False, args, shift)
+logger = logging.getLogger('replay')
+logger.info('replay overhead {:.2%}'.format(t2 / t1 - 1))
+
+@skipUnless(os.getenv('AVOCADO_TIMEOUT_EXPECTED'), 'Test might timeout')
+class ReplayLinuxX8664(Rep

[PATCH v2 4/6] tests/acceptance: add replay kernel test for nios2

2021-06-24 Thread Pavel Dovgalyuk
This patch adds record/replay test which boots Linux
kernel on nios2 platform. The test uses kernel binaries
taken from boot_linux_console test.

Signed-off-by: Pavel Dovgalyuk 
Reviewed-by: Willian Rampazzo 
---
 tests/acceptance/replay_kernel.py |   11 +++
 1 file changed, 11 insertions(+)

diff --git a/tests/acceptance/replay_kernel.py 
b/tests/acceptance/replay_kernel.py
index 4c682b3f29..ba2314d2ec 100644
--- a/tests/acceptance/replay_kernel.py
+++ b/tests/acceptance/replay_kernel.py
@@ -330,6 +330,17 @@ def test_or1k_sim(self):
 file_path = self.fetch_asset(tar_url, asset_hash=tar_hash)
 self.do_test_advcal_2018(file_path, 'vmlinux')
 
+def test_nios2_10m50(self):
+"""
+:avocado: tags=arch:nios2
+:avocado: tags=machine:10m50-ghrd
+"""
+tar_hash = 'e4251141726c412ac0407c5a6bceefbbff018918'
+tar_url = ('https://www.qemu-advent-calendar.org'
+   '/2018/download/day14.tar.xz')
+file_path = self.fetch_asset(tar_url, asset_hash=tar_hash)
+self.do_test_advcal_2018(file_path, 'vmlinux.elf')
+
 def test_ppc_g3beige(self):
 """
 :avocado: tags=arch:ppc




[PATCH v2 1/6] tests/acceptance: add replay kernel test for s390

2021-06-24 Thread Pavel Dovgalyuk
This patch adds record/replay test which boots Linux
kernel on s390x platform. The test uses kernel binaries
taken from boot_linux_console test.

Signed-off-by: Pavel Dovgalyuk 
Reviewed-by: Willian Rampazzo 
---
 tests/acceptance/replay_kernel.py |   16 
 1 file changed, 16 insertions(+)

diff --git a/tests/acceptance/replay_kernel.py 
b/tests/acceptance/replay_kernel.py
index 71facdaa75..cdc22cb6d3 100644
--- a/tests/acceptance/replay_kernel.py
+++ b/tests/acceptance/replay_kernel.py
@@ -208,6 +208,22 @@ def test_arm_cubieboard_initrd(self):
   '-initrd', initrd_path,
   '-no-reboot'))
 
+def test_s390x_s390_ccw_virtio(self):
+"""
+:avocado: tags=arch:s390x
+:avocado: tags=machine:s390-ccw-virtio
+"""
+kernel_url = ('https://archives.fedoraproject.org/pub/archive'
+  
'/fedora-secondary/releases/29/Everything/s390x/os/images'
+  '/kernel.img')
+kernel_hash = 'e8e8439103ef8053418ef062644ffd46a7919313'
+kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
+
+kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + 'console=sclp0'
+console_pattern = 'Kernel command line: %s' % kernel_command_line
+self.run_rr(kernel_path, kernel_command_line, console_pattern, shift=9,
+args=('-nodefaults', '-smp', '1'))
+
 def test_ppc64_pseries(self):
 """
 :avocado: tags=arch:ppc64




[PATCH v2 0/6] More record/replay acceptance tests

2021-06-24 Thread Pavel Dovgalyuk
The following series adds new record/replay tests to the acceptance group.

The provided tests perform kernel boot and disk image boot scenarios.
For all of them recording and replaying phases are executed.
Tests were borrowed from existing boot_linux*.py tests.

New tests include kernel boot for s390x, ppc64, alpha, nios2, and openrisc,
and Linux boot with cloudinit image for x86_64.

v2 changes:
 - moved ppc64 test to the right script

---

Pavel Dovgaluk (1):
  tests/acceptance: Linux boot test for record/replay

Pavel Dovgalyuk (5):
  tests/acceptance: add replay kernel test for s390
  tests/acceptance: add replay kernel test for ppc64
  tests/acceptance: add replay kernel test for openrisc
  tests/acceptance: add replay kernel test for nios2
  tests/acceptance: add replay kernel test for alpha


 MAINTAINERS   |   1 +
 tests/acceptance/replay_kernel.py |  52 ++
 tests/acceptance/replay_linux.py  | 116 ++
 3 files changed, 169 insertions(+)
 create mode 100644 tests/acceptance/replay_linux.py

--
Pavel Dovgalyuk



Re: [PATCH 6/6] tests/acceptance: Linux boot test for record/replay

2021-06-24 Thread Pavel Dovgalyuk

On 24.06.2021 21:15, Willian Rampazzo wrote:

On Wed, Jun 23, 2021 at 3:45 PM Willian Rampazzo  wrote:


Hi Pavel,

On Thu, Jun 10, 2021 at 8:25 AM Pavel Dovgalyuk
 wrote:


From: Pavel Dovgalyuk 

This patch adds a test for record/replay, which boots Linux
image from the disk and interacts with the network.
The idea and code of this test is borrowed from boot_linux.py
This test includes only x86_64 platform. Other platforms and
machines will be added later after testing and improving
record/replay to completely support them.

Each test consists of the following phases:
  - downloading the disk image
  - recording the execution
  - replaying the execution

Replay does not validates the output, but waits until QEMU
finishes the execution. This is reasonable, because
QEMU usually hangs when replay goes wrong.



It took me some time to review this patch because I could not identify
what makes it an automated test. I mean, when I look at an automated
test I expect a pass/fail/skip output. I could not identify the
expected output of this test compared to the actual result. If I did
not miss anything, this test will always pass unless there is an
exception that, potentially, could not be related to the record/replay
mechanism.


I was looking at the current record/replay test, replay_kernel.py and
I noticed you followed the same pattern in this test. Although I do
not agree much with a test that does not have a specific
objective/check, I'm fine if this has value for you.


That's right. Test is targeted to checking that replay is successful.
There are no other execution results to check.





Also, as far as I could check, you inherit from the LinuxTest class
but only use the cloudinit methods. Most of the other methods are not
used or overridden. In this case, I think it is worth splitting the
LinuxTest with a new mixin utility class to handle the cloudinit part.
If you need help with that, let me know.


As this is more related to code design, I can split the cloudinit code
later and adjust your code.
In this case,

Reviewed-by: Willian Rampazzo 


Thanks.






Signed-off-by: Pavel Dovgalyuk 
---
  MAINTAINERS  |1
  tests/acceptance/replay_linux.py |  116 ++
  2 files changed, 117 insertions(+)
  create mode 100644 tests/acceptance/replay_linux.py

diff --git a/MAINTAINERS b/MAINTAINERS
index 7d9cd29042..9675a1095b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2863,6 +2863,7 @@ F: include/sysemu/replay.h
  F: docs/replay.txt
  F: stubs/replay.c
  F: tests/acceptance/replay_kernel.py
+F: tests/acceptance/replay_linux.py
  F: tests/acceptance/reverse_debugging.py
  F: qapi/replay.json

diff --git a/tests/acceptance/replay_linux.py b/tests/acceptance/replay_linux.py
new file mode 100644
index 00..15953f9e49
--- /dev/null
+++ b/tests/acceptance/replay_linux.py
@@ -0,0 +1,116 @@
+# Record/replay test that boots a complete Linux system via a cloud image
+#
+# Copyright (c) 2020 ISP RAS
+#
+# Author:
+#  Pavel Dovgalyuk 
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+import os
+import logging
+import time
+
+from avocado import skipUnless
+from avocado.utils import cloudinit
+from avocado.utils import network
+from avocado.utils import vmimage
+from avocado.utils import datadrainer
+from avocado.utils.path import find_command
+from avocado_qemu import LinuxTest
+
+class ReplayLinux(LinuxTest):
+"""
+Boots a Linux system, checking for a successful initialization
+"""
+
+timeout = 1800
+chksum = None
+hdd = 'ide-hd'
+cd = 'ide-cd'
+bus = 'ide'
+
+def setUp(self):
+super(ReplayLinux, self).setUp()
+self.boot_path = self.download_boot()
+self.cloudinit_path = self.prepare_cloudinit()
+
+def vm_add_disk(self, vm, path, id, device):
+bus_string = ''
+if self.bus:
+bus_string = ',bus=%s.%d' % (self.bus, id,)
+vm.add_args('-drive', 'file=%s,snapshot,id=disk%s,if=none' % (path, 
id))
+vm.add_args('-drive',
+'driver=blkreplay,id=disk%s-rr,if=none,image=disk%s' % (id, id))
+vm.add_args('-device',
+'%s,drive=disk%s-rr%s' % (device, id, bus_string))
+
+def launch_and_wait(self, record, args, shift):
+vm = self.get_vm()
+vm.add_args('-smp', '1')
+vm.add_args('-m', '1024')
+vm.add_args('-object', 'filter-replay,id=replay,netdev=hub0port0')
+if args:
+vm.add_args(*args)
+self.vm_add_disk(vm, self.boot_path, 0, self.hdd)
+self.vm_add_disk(vm, self.cloudinit_path, 1, self.cd)
+logger = logging.getLogger('replay')
+if record:
+logger.info('recording the execution...')
+mode = 'record'
+else:
+logger.info('replaying the execution...')
+mode = 'replay'
+replay_path = os.path.join(self.workdir

Re: [PATCH v2 0/1] Use correct trap number for *BSD

2021-06-24 Thread Warner Losh
On Thu, Jun 24, 2021 at 10:57 PM Warner Losh  wrote:

> This is a resend of a patch I sent back in March that was missing the
> proper
> includes due to a rebasing mistake.
>
> The issue is that all the BSDs use T_PAGEFLT to signal a page fault on x86,
> while linux uses 0xe. The patch harmonizes the different ways this can be
> spelled, as explained in the patch itself.
>

I forgot to mention that I've setup NetBSD and OpenBSD bhyve instances
to compile and test these changes to make double sure that they will pass
through the qemu CI.


> Warner Losh (1):
>   tcg: Use correct trap number for page faults on *BSD systems
>
>  accel/tcg/user-exec.c | 20 ++--
>  1 file changed, 18 insertions(+), 2 deletions(-)
>
> --
> 2.22.1
>
>


[PATCH] arm/aspeed: rainier: Add i2c eeproms and muxes

2021-06-24 Thread Joel Stanley
These are the devices documented by the Rainier device tree. With this
we can see the guest discovering the multiplexers and probing the eeprom
devices:

 i2c i2c-2: Added multiplexed i2c bus 16
 i2c i2c-2: Added multiplexed i2c bus 17
 i2c i2c-2: Added multiplexed i2c bus 18
 i2c i2c-2: Added multiplexed i2c bus 19
 i2c-mux-gpio i2cmux: 4 port mux on 1e78a180.i2c-bus adapter
 at24 20-0050: 8192 byte 24c64 EEPROM, writable, 1 bytes/write
 i2c i2c-4: Added multiplexed i2c bus 20
 at24 21-0051: 8192 byte 24c64 EEPROM, writable, 1 bytes/write
 i2c i2c-4: Added multiplexed i2c bus 21
 at24 22-0052: 8192 byte 24c64 EEPROM, writable, 1 bytes/write

Signed-off-by: Joel Stanley 
---
 hw/arm/aspeed.c | 56 +
 1 file changed, 56 insertions(+)

diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index 1301e8fdffb2..7ed22294c6eb 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -677,6 +677,10 @@ static void g220a_bmc_i2c_init(AspeedMachineState *bmc)
 static void rainier_bmc_i2c_init(AspeedMachineState *bmc)
 {
 AspeedSoCState *soc = &bmc->soc;
+I2CSlave *i2c_mux;
+
+smbus_eeprom_init_one(aspeed_i2c_get_bus(&soc->i2c, 0), 0x51,
+  g_malloc0(32 * 1024));
 
 /* The rainier expects a TMP275 but a TMP105 is compatible */
 i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 4), TYPE_TMP105,
@@ -685,11 +689,25 @@ static void rainier_bmc_i2c_init(AspeedMachineState *bmc)
  0x49);
 i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 4), TYPE_TMP105,
  0x4a);
+i2c_mux = i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 4),
+  "pca9546", 0x70);
+smbus_eeprom_init_one(pca954x_i2c_get_bus(i2c_mux, 0), 0x50,
+  g_malloc0(64 * 1024));
+smbus_eeprom_init_one(pca954x_i2c_get_bus(i2c_mux, 1), 0x51,
+  g_malloc0(64 * 1024));
+smbus_eeprom_init_one(pca954x_i2c_get_bus(i2c_mux, 2), 0x52,
+  g_malloc0(64 * 1024));
 
 i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 5), TYPE_TMP105,
  0x48);
 i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 5), TYPE_TMP105,
  0x49);
+i2c_mux = i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 5),
+  "pca9546", 0x70);
+smbus_eeprom_init_one(pca954x_i2c_get_bus(i2c_mux, 0), 0x50,
+  g_malloc0(64 * 1024));
+smbus_eeprom_init_one(pca954x_i2c_get_bus(i2c_mux, 1), 0x51,
+  g_malloc0(64 * 1024));
 
 i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 6), TYPE_TMP105,
  0x48);
@@ -697,6 +715,16 @@ static void rainier_bmc_i2c_init(AspeedMachineState *bmc)
  0x4a);
 i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 6), TYPE_TMP105,
  0x4b);
+i2c_mux = i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 6),
+  "pca9546", 0x70);
+smbus_eeprom_init_one(pca954x_i2c_get_bus(i2c_mux, 0), 0x50,
+  g_malloc0(64 * 1024));
+smbus_eeprom_init_one(pca954x_i2c_get_bus(i2c_mux, 1), 0x51,
+  g_malloc0(64 * 1024));
+smbus_eeprom_init_one(pca954x_i2c_get_bus(i2c_mux, 2), 0x50,
+  g_malloc0(64 * 1024));
+smbus_eeprom_init_one(pca954x_i2c_get_bus(i2c_mux, 3), 0x51,
+  g_malloc0(64 * 1024));
 
 /* Bus 7: TODO dps310@76 */
 /* Bus 7: TODO max31785@52 */
@@ -704,11 +732,19 @@ static void rainier_bmc_i2c_init(AspeedMachineState *bmc)
 /* Bus 7: TODO si7021-a20@20 */
 i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 7), TYPE_TMP105,
  0x48);
+smbus_eeprom_init_one(aspeed_i2c_get_bus(&soc->i2c, 7), 0x50,
+  g_malloc0(64 * 1024));
+smbus_eeprom_init_one(aspeed_i2c_get_bus(&soc->i2c, 7), 0x51,
+  g_malloc0(64 * 1024));
 
 i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 8), TYPE_TMP105,
  0x48);
 i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 8), TYPE_TMP105,
  0x4a);
+smbus_eeprom_init_one(aspeed_i2c_get_bus(&soc->i2c, 8), 0x50,
+  g_malloc0(64 * 1024));
+smbus_eeprom_init_one(aspeed_i2c_get_bus(&soc->i2c, 8), 0x51,
+  g_malloc0(64 * 1024));
 i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 8), "pca9552", 0x61);
 /* Bus 8: ucd90320@11 */
 /* Bus 8: ucd90320@b */
@@ -716,14 +752,34 @@ static void rainier_bmc_i2c_init(AspeedMachineState *bmc)
 
 i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 9), "tmp423", 0x4c);
 i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 9), "tmp423", 0x4d);
+smbus_eeprom_init_one(aspeed_i2c_get_bus(&soc->i2c, 9), 0x50,
+  

[PATCH v2 1/1] tcg: Use correct trap number for page faults on *BSD systems

2021-06-24 Thread Warner Losh
The trap number for a page fault on BSD systems is T_PAGEFLT not 0xe. 0xe is
used by Linux and represents the intel hardware trap vector. The BSD kernels,
however, translate this to T_PAGEFLT in their Xpage, Xtrap0e, Xtrap14, etc fault
handlers. This is true for i386 and x86_64, though the name of the trap hanlder
can very on the flavor of BSD. As far as I can tell, Linux doesn't provide a
define for this value. Invent a new one (PAGE_FAULT_TRAP) and use it instead to
avoid uglier ifdefs.

Signed-off-by: Mark Johnston 
Signed-off-by: Juergen Lock 
[ Rework to avoid ifdefs and expand it to i386 ]
Signed-off-by: Warner Losh 
---
 accel/tcg/user-exec.c | 20 ++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index fb2d43e6a9..e2d0165670 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -254,28 +254,35 @@ void *probe_access(CPUArchState *env, target_ulong addr, 
int size,
 
 #if defined(__NetBSD__)
 #include 
+#include 
 
 #define EIP_sig(context) ((context)->uc_mcontext.__gregs[_REG_EIP])
 #define TRAP_sig(context)((context)->uc_mcontext.__gregs[_REG_TRAPNO])
 #define ERROR_sig(context)   ((context)->uc_mcontext.__gregs[_REG_ERR])
 #define MASK_sig(context)((context)->uc_sigmask)
+#define PAGE_FAULT_TRAP  T_PAGEFLT
 #elif defined(__FreeBSD__) || defined(__DragonFly__)
 #include 
+#include 
 
 #define EIP_sig(context)  (*((unsigned long *)&(context)->uc_mcontext.mc_eip))
 #define TRAP_sig(context)((context)->uc_mcontext.mc_trapno)
 #define ERROR_sig(context)   ((context)->uc_mcontext.mc_err)
 #define MASK_sig(context)((context)->uc_sigmask)
+#define PAGE_FAULT_TRAP  T_PAGEFLT
 #elif defined(__OpenBSD__)
+#include 
 #define EIP_sig(context) ((context)->sc_eip)
 #define TRAP_sig(context)((context)->sc_trapno)
 #define ERROR_sig(context)   ((context)->sc_err)
 #define MASK_sig(context)((context)->sc_mask)
+#define PAGE_FAULT_TRAP  T_PAGEFLT
 #else
 #define EIP_sig(context) ((context)->uc_mcontext.gregs[REG_EIP])
 #define TRAP_sig(context)((context)->uc_mcontext.gregs[REG_TRAPNO])
 #define ERROR_sig(context)   ((context)->uc_mcontext.gregs[REG_ERR])
 #define MASK_sig(context)((context)->uc_sigmask)
+#define PAGE_FAULT_TRAP  0xe
 #endif
 
 int cpu_signal_handler(int host_signum, void *pinfo,
@@ -301,34 +308,42 @@ int cpu_signal_handler(int host_signum, void *pinfo,
 pc = EIP_sig(uc);
 trapno = TRAP_sig(uc);
 return handle_cpu_signal(pc, info,
- trapno == 0xe ? (ERROR_sig(uc) >> 1) & 1 : 0,
+ trapno == PAGE_FAULT_TRAP ?
+ (ERROR_sig(uc) >> 1) & 1 : 0,
  &MASK_sig(uc));
 }
 
 #elif defined(__x86_64__)
 
 #ifdef __NetBSD__
+#include 
 #define PC_sig(context)   _UC_MACHINE_PC(context)
 #define TRAP_sig(context) ((context)->uc_mcontext.__gregs[_REG_TRAPNO])
 #define ERROR_sig(context)((context)->uc_mcontext.__gregs[_REG_ERR])
 #define MASK_sig(context) ((context)->uc_sigmask)
+#define PAGE_FAULT_TRAP   T_PAGEFLT
 #elif defined(__OpenBSD__)
+#include 
 #define PC_sig(context)   ((context)->sc_rip)
 #define TRAP_sig(context) ((context)->sc_trapno)
 #define ERROR_sig(context)((context)->sc_err)
 #define MASK_sig(context) ((context)->sc_mask)
+#define PAGE_FAULT_TRAP   T_PAGEFLT
 #elif defined(__FreeBSD__) || defined(__DragonFly__)
 #include 
+#include 
 
 #define PC_sig(context)  (*((unsigned long *)&(context)->uc_mcontext.mc_rip))
 #define TRAP_sig(context) ((context)->uc_mcontext.mc_trapno)
 #define ERROR_sig(context)((context)->uc_mcontext.mc_err)
 #define MASK_sig(context) ((context)->uc_sigmask)
+#define PAGE_FAULT_TRAP   T_PAGEFLT
 #else
 #define PC_sig(context)   ((context)->uc_mcontext.gregs[REG_RIP])
 #define TRAP_sig(context) ((context)->uc_mcontext.gregs[REG_TRAPNO])
 #define ERROR_sig(context)((context)->uc_mcontext.gregs[REG_ERR])
 #define MASK_sig(context) ((context)->uc_sigmask)
+#define PAGE_FAULT_TRAP   0xe
 #endif
 
 int cpu_signal_handler(int host_signum, void *pinfo,
@@ -346,7 +361,8 @@ int cpu_signal_handler(int host_signum, void *pinfo,
 
 pc = PC_sig(uc);
 return handle_cpu_signal(pc, info,
- TRAP_sig(uc) == 0xe ? (ERROR_sig(uc) >> 1) & 1 : 
0,
+ TRAP_sig(uc) == PAGE_FAULT_TRAP ?
+ (ERROR_sig(uc) >> 1) & 1 : 0,
  &MASK_sig(uc));
 }
 
-- 
2.22.1




[PATCH v2 0/1] Use correct trap number for *BSD

2021-06-24 Thread Warner Losh
This is a resend of a patch I sent back in March that was missing the proper
includes due to a rebasing mistake.

Warner Losh (1):
  tcg: Use correct trap number for page faults on *BSD systems

 accel/tcg/user-exec.c | 20 ++--
 1 file changed, 18 insertions(+), 2 deletions(-)

-- 
2.22.1




[PATCH v2 0/1] Use correct trap number for *BSD

2021-06-24 Thread Warner Losh
This is a resend of a patch I sent back in March that was missing the proper
includes due to a rebasing mistake.

The issue is that all the BSDs use T_PAGEFLT to signal a page fault on x86,
while linux uses 0xe. The patch harmonizes the different ways this can be
spelled, as explained in the patch itself.

Warner Losh (1):
  tcg: Use correct trap number for page faults on *BSD systems

 accel/tcg/user-exec.c | 20 ++--
 1 file changed, 18 insertions(+), 2 deletions(-)

-- 
2.22.1




Re: [PATCH 0/5] dp8393x: fixes for MacOS toolbox ROM

2021-06-24 Thread Finn Thain
On Thu, 24 Jun 2021, Mark Cave-Ayland wrote:

> Thanks for the link and the detailed testing information. I've been 
> trying to understand why you had to set the MAC address in the ARC 
> firmware so I had a bit of an experiment here.
> 
> The reason that you need to do this is because of the NVRAM 
> configuration in your command line, in particular -global 
> ds1225y.size=8200. What this does is extend the NVRAM over the top of 
> the dp8393x-prom area where QEMU places the NIC MAC address and checksum 
> on startup, so the NVRAM captures the MAC address reads/writes instead. 
> The net effect of this is that the empty NVRAM initially reads all zeros 
> and why an initial setup is required to set the MAC address.
> 
> This can be seen quite clearly in the "info mtree" output:
> 
> 80009000-8000b007 (prio 0, i/o): nvram
> 8000b000-8000bfff (prio 0, rom): dp8393x-prom
> 
> However if you completely drop -global ds1225y.size=8200 from your 
> command line then the NVRAM doesn't overrun into the dp8393x-prom area, 
> and the ARC firmware picks up the MAC address from QEMU correctly:
> 
> 80009000-8000afff (prio 0, i/o): nvram
> 8000b000-8000bfff (prio 0, rom): dp8393x-prom
> 
> I've also looked over the entire SONIC datasheet to see if the PROM 
> format is documented, and according to that there is no non-volatile 
> storage available on the chip itself. 

Yes, that's my understanding also. The relevant National Semicondutor 
Application Notes seem to include a separate PROM. And if you closely 
examine the Linux macsonic.c driver, you'll see that the PowerBook 5x0 
models get a random MAC address because no-one (outside of Apple) knows 
where the real MAC address is stored.

> Testing shows that the checksum algorithm currently used for the dp8393x 
> device generates the same result as that generated by the ARC firmware, 
> which is known to be different than that used by the Q800 machine.
> 
> From this I conclude that the PROM is provided by the board and not the 
> chipset, and therefore each machine should construct its own PROM 
> accordingly. I'll send a v2 patchset shortly with these changes which 
> shall also include the proposed endian patch.
> 

If you potentially have both a ds1225y NVRAM and a dp8393x PROM (for the 
magnum machine) how do you avoid ending up with conflicting state? Would 
the two storage devices have to be mutually exclusive?



RE: [PATCH v4] ui/gtk: New -display gtk option 'full-screen-on-monitor'.

2021-06-24 Thread Khor, Swee Aun
Hi Khairul, 

> +gdk_monitor = gdk_display_get_monitor(window_display,
> +  
> opts->u.gtk.full_screen_on_monitor
> +  - 1);
> +if (gdk_monitor != NULL) {
> +monitor_status = true;
[Romli, Khairul Anuar] Do you think we should use gdk_display_get_monitor 
inside the if check against the NULL value rather than using a variable? Indeed 
that with cause some code readability difficulty but I don't see gdk_monitor is 
being used beyond this check.

SweeAun: I can do that.  

Regards,
SweeAun

-Original Message-
From: Romli, Khairul Anuar  
Sent: Friday, June 25, 2021 12:28 PM
To: Khor, Swee Aun 
Cc: qemu-devel@nongnu.org; kra...@redhat.com; arm...@redhat.com; 
ebl...@redhat.com; Kasireddy, Vivek ; Mazlan, Hazwan 
Arif 
Subject: RE: [PATCH v4] ui/gtk: New -display gtk option 
'full-screen-on-monitor'.

Hi Swee Aun,

I have some comment on the patch.

> -Original Message-
> From: Khor, Swee Aun 
> Sent: Thursday, June 24, 2021 4:43 PM
> To: qemu-devel@nongnu.org
> Cc: Khor, Swee Aun ; kra...@redhat.com; 
> arm...@redhat.com; ebl...@redhat.com; Romli, Khairul Anuar 
> ; Kasireddy, Vivek 
> ; Mazlan, Hazwan Arif 
> ; Khor
> Subject: [PATCH v4] ui/gtk: New -display gtk option 'full-screen-on-monitor'.
> 
> This lets user select monitor number to display QEMU in full screen 
> with - display gtk,full-screen-on-monitor=.
> 
> v2:
> - Added documentation for new member.
> - Renamed member name from monitor-num to monitor.
> 
> v3:
> - Cleaned up commit message subject and signed-off format.
> - Renamed member name from monitor to full-screen-on-monitor to make 
> clear this option automatically enables full screen.
> - Added more detail documentation to specify full-screen-on-monitor 
> option index started from 1.
> - Added code to check windows has been launched successfully at 
> specified monitor.
> 
> v4:
> - Used PRId64 format specifier for int64_t variable in warn_report().
> 
> Signed-off-by: Khor, Swee Aun 
> ---
>  qapi/ui.json| 10 +++---
>  qemu-options.hx |  2 +-
>  ui/gtk.c| 35 +++
>  3 files changed, 43 insertions(+), 4 deletions(-)
> 
> diff --git a/qapi/ui.json b/qapi/ui.json index 1052ca9c38..d775c29534 
> 100644
> --- a/qapi/ui.json
> +++ b/qapi/ui.json
> @@ -1035,13 +1035,17 @@
>  #   assuming the guest will resize the display to match
>  #   the window size then.  Otherwise it defaults to "off".
>  #   Since 3.1
> -#
> +# @full-screen-on-monitor: Monitor number to display QEMU in full screen.
> +#  Monitor number started from index 1. If total 
> number
> +#  of monitors is 3, possible values for this option 
> are
> +#  1, 2 or 3.
>  # Since: 2.12
>  #
>  ##
>  { 'struct'  : 'DisplayGTK',
> -  'data': { '*grab-on-hover' : 'bool',
> -'*zoom-to-fit'   : 'bool'  } }
> +  'data': { '*grab-on-hover'  : 'bool',
> +'*zoom-to-fit': 'bool',
> +'*full-screen-on-monitor' : 'int' } }
> 
>  ##
>  # @DisplayEGLHeadless:
> diff --git a/qemu-options.hx b/qemu-options.hx index
> 14258784b3..29836db663 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -1787,7 +1787,7 @@ DEF("display", HAS_ARG, QEMU_OPTION_display,
>  "[,window_close=on|off][,gl=on|core|es|off]\n"
>  #endif
>  #if defined(CONFIG_GTK)
> -"-display gtk[,grab_on_hover=on|off][,gl=on|off]|\n"
> +"-display gtk[,grab-on-hover=on|off][,gl=on|off][,full-screen-on-
> monitor=]\n"
>  #endif
>  #if defined(CONFIG_VNC)
>  "-display vnc=[,]\n"
> diff --git a/ui/gtk.c b/ui/gtk.c
> index 98046f577b..255f25cabd 100644
> --- a/ui/gtk.c
> +++ b/ui/gtk.c
> @@ -2189,6 +2189,10 @@ static void gtk_display_init(DisplayState *ds, 
> DisplayOptions *opts)
>  GdkDisplay *window_display;
>  GtkIconTheme *theme;
>  char *dir;
> +int monitor_n;
> +GdkScreen *gdk_screen;
> +GdkMonitor *gdk_monitor;
> +bool monitor_status = false;
> 
>  if (!gtkinit) {
>  fprintf(stderr, "gtk initialization failed\n"); @@ -2268,6 
> +2272,37 @@ static void gtk_display_init(DisplayState *ds, DisplayOptions 
> *opts)
>  gtk_menu_item_activate(GTK_MENU_ITEM(s->grab_on_hover_item));
>  }
>  gd_clipboard_init(s);
> +
> +if (opts->u.gtk.has_full_screen_on_monitor) {
> +monitor_n = gdk_display_get_n_monitors(window_display);
> +
> +if (opts->u.gtk.full_screen_on_monitor <= monitor_n &&
> +opts->u.gtk.full_screen_on_monitor > 0) {
> +gdk_screen = gdk_display_get_default_screen(window_display);
> +gtk_window_fullscreen_on_monitor(GTK_WINDOW(s->window),
> gdk_screen,
> + 
> opts->u.gtk.full_screen_on_monitor
> +

Qemu on Haiku

2021-06-24 Thread Richard Zak
Hello there! I noticed the message which appears when building qemu on
Haiku. I'd hate for Haiku to lose qemu, so I would like to help!

What is needed in terms of a build system for continuous integration? I'm
not familiar with CI systems, other than simply knowing what they do.

-- 
Regards,

Richard J. Zak
Professional Genius
PGP Key: https://keybase.io/rjzak/key.asc


[PULL 6/6] net/net.c: Add handler for passthrough filter command

2021-06-24 Thread Zhang Chen
Use the connection protocol,src port,dst port,src ip,dst ip as the key
to passthrough certain network traffic in object with network packet
processing function.

Signed-off-by: Zhang Chen 
---
 net/net.c | 199 +-
 1 file changed, 197 insertions(+), 2 deletions(-)

diff --git a/net/net.c b/net/net.c
index 00f2be7a58..9ede98d166 100644
--- a/net/net.c
+++ b/net/net.c
@@ -55,6 +55,8 @@
 #include "net/colo-compare.h"
 #include "net/filter.h"
 #include "qapi/string-output-visitor.h"
+#include "net/colo-compare.h"
+#include "qom/object_interfaces.h"
 
 /* Net bridge is currently not supported for W32. */
 #if !defined(_WIN32)
@@ -1195,14 +1197,207 @@ void qmp_netdev_del(const char *id, Error **errp)
 }
 }
 
+static int check_addr(InetSocketAddressBase *addr)
+{
+if (!addr || (addr->host && !qemu_isdigit(addr->host[0]))) {
+return -1;
+}
+
+if (atoi(addr->port) > 65536 || atoi(addr->port) < 0) {
+return -1;
+}
+
+return 0;
+}
+
+/* The initial version only supports colo-compare */
+static CompareState *passthrough_filter_check(IPFlowSpec *spec, Error **errp)
+{
+Object *container;
+Object *obj;
+CompareState *s;
+
+if (!spec->object_name) {
+error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "object-name",
+   "Need input object name");
+return NULL;
+}
+
+container = object_get_objects_root();
+obj = object_resolve_path_component(container, spec->object_name);
+if (!obj) {
+error_setg(errp, "object '%s' not found", spec->object_name);
+return NULL;
+}
+
+s = COLO_COMPARE(obj);
+
+if (!getprotobyname(spec->protocol)) {
+error_setg(errp, "Passthrough filter get wrong protocol");
+return NULL;
+}
+
+if (spec->source) {
+if (check_addr(spec->source)) {
+error_setg(errp, "Passthrough filter get wrong source");
+return NULL;
+}
+}
+
+if (spec->destination) {
+if (check_addr(spec->destination)) {
+error_setg(errp, "Passthrough filter get wrong destination");
+return NULL;
+}
+}
+
+return s;
+}
+
+/* The initial version only supports colo-compare */
+static COLOPassthroughEntry *passthrough_filter_find(CompareState *s,
+ COLOPassthroughEntry *ent)
+{
+COLOPassthroughEntry *next = NULL, *origin = NULL;
+
+if (!QLIST_EMPTY(&s->passthroughlist)) {
+QLIST_FOREACH_SAFE(origin, &s->passthroughlist, node, next) {
+if ((ent->l4_protocol.p_proto == origin->l4_protocol.p_proto) &&
+(ent->src_port == origin->src_port) &&
+(ent->dst_port == origin->dst_port) &&
+(ent->src_ip.s_addr == origin->src_ip.s_addr) &&
+(ent->dst_ip.s_addr == origin->dst_ip.s_addr)) {
+return origin;
+}
+}
+}
+
+return NULL;
+}
+
+/* The initial version only supports colo-compare */
+static void passthrough_filter_add(CompareState *s,
+   IPFlowSpec *spec,
+   Error **errp)
+{
+COLOPassthroughEntry *pass = NULL;
+
+pass = g_new0(COLOPassthroughEntry, 1);
+
+if (spec->protocol) {
+memcpy(&pass->l4_protocol, getprotobyname(spec->protocol),
+   sizeof(struct protoent));
+}
+
+if (spec->source) {
+if (!inet_aton(spec->source->host, &pass->src_ip)) {
+pass->src_ip.s_addr = 0;
+}
+
+pass->src_port = atoi(spec->source->port);
+}
+
+if (spec->destination) {
+if (!inet_aton(spec->destination->host, &pass->dst_ip)) {
+pass->dst_ip.s_addr = 0;
+}
+
+pass->dst_port = atoi(spec->destination->port);
+}
+
+qemu_mutex_lock(&s->passthroughlist_mutex);
+if (passthrough_filter_find(s, pass)) {
+error_setg(errp, "The pass through connection already exists");
+g_free(pass);
+qemu_mutex_unlock(&s->passthroughlist_mutex);
+return;
+}
+
+QLIST_INSERT_HEAD(&s->passthroughlist, pass, node);
+qemu_mutex_unlock(&s->passthroughlist_mutex);
+}
+
+/* The initial version only supports colo-compare */
+static void passthrough_filter_del(CompareState *s,
+   IPFlowSpec *spec,
+   Error **errp)
+{
+COLOPassthroughEntry *pass = NULL, *result = NULL;
+
+pass = g_new0(COLOPassthroughEntry, 1);
+
+if (spec->protocol) {
+memcpy(&pass->l4_protocol, getprotobyname(spec->protocol),
+   sizeof(struct protoent));
+}
+
+if (spec->source) {
+if (!inet_aton(spec->source->host, &pass->src_ip)) {
+pass->src_ip.s_addr = 0;
+}
+
+pass->src_port = atoi(spec->source->port);
+}
+
+if (spec->destination) {
+if (!inet_aton(spec->destination->host

[PULL 5/6] net/colo-compare: Add passthrough list to CompareState

2021-06-24 Thread Zhang Chen
Add passthrough list for each CompareState.

Signed-off-by: Zhang Chen 
---
 net/colo-compare.c | 28 
 net/colo-compare.h | 12 
 2 files changed, 40 insertions(+)

diff --git a/net/colo-compare.c b/net/colo-compare.c
index dcd24bb113..64e72c82f1 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -161,6 +161,7 @@ static int packet_enqueue(CompareState *s, int mode, 
Connection **con)
 ConnectionKey key;
 Packet *pkt = NULL;
 Connection *conn;
+COLOPassthroughEntry *pass, *next;
 int ret;
 
 if (mode == PRIMARY_IN) {
@@ -180,6 +181,31 @@ static int packet_enqueue(CompareState *s, int mode, 
Connection **con)
 }
 fill_connection_key(pkt, &key);
 
+/* Check COLO passthrough specifications */
+qemu_mutex_lock(&s->passthroughlist_mutex);
+if (!QLIST_EMPTY(&s->passthroughlist)) {
+QLIST_FOREACH_SAFE(pass, &s->passthroughlist, node, next) {
+if (key.ip_proto == pass->l4_protocol.p_proto) {
+if (pass->src_port == 0 || pass->src_port == key.dst_port) {
+if (pass->src_ip.s_addr == 0 ||
+pass->src_ip.s_addr == key.src.s_addr) {
+if (pass->dst_port == 0 ||
+pass->dst_port == key.src_port) {
+if (pass->dst_ip.s_addr == 0 ||
+pass->dst_ip.s_addr == key.dst.s_addr) {
+packet_destroy(pkt, NULL);
+pkt = NULL;
+qemu_mutex_unlock(&s->passthroughlist_mutex);
+return -1;
+}
+}
+}
+}
+}
+}
+}
+qemu_mutex_unlock(&s->passthroughlist_mutex);
+
 conn = connection_get(s->connection_track_table,
   &key,
   &s->conn_list);
@@ -1232,6 +1258,7 @@ static void colo_compare_complete(UserCreatable *uc, 
Error **errp)
 }
 
 g_queue_init(&s->conn_list);
+QLIST_INIT(&s->passthroughlist);
 
 s->connection_track_table = g_hash_table_new_full(connection_key_hash,
   connection_key_equal,
@@ -1246,6 +1273,7 @@ static void colo_compare_complete(UserCreatable *uc, 
Error **errp)
 qemu_cond_init(&event_complete_cond);
 colo_compare_active = true;
 }
+qemu_mutex_init(&s->passthroughlist_mutex);
 QTAILQ_INSERT_TAIL(&net_compares, s, next);
 qemu_mutex_unlock(&colo_compare_mutex);
 
diff --git a/net/colo-compare.h b/net/colo-compare.h
index 031b627a2f..995f28b833 100644
--- a/net/colo-compare.h
+++ b/net/colo-compare.h
@@ -23,6 +23,7 @@
 #include "migration/migration.h"
 #include "sysemu/iothread.h"
 #include "colo.h"
+#include 
 
 #define TYPE_COLO_COMPARE "colo-compare"
 typedef struct CompareState CompareState;
@@ -39,6 +40,15 @@ typedef struct COLOSendCo {
 int ret;
 } COLOSendCo;
 
+typedef struct COLOPassthroughEntry {
+struct protoent l4_protocol;
+int src_port;
+int dst_port;
+struct in_addr src_ip;
+struct in_addr dst_ip;
+QLIST_ENTRY(COLOPassthroughEntry) node;
+} COLOPassthroughEntry;
+
 /*
  *  + CompareState ++
  *  |   |
@@ -95,6 +105,8 @@ struct CompareState {
 
 QEMUBH *event_bh;
 enum colo_event event;
+QLIST_HEAD(, COLOPassthroughEntry) passthroughlist;
+QemuMutex passthroughlist_mutex;
 
 QTAILQ_ENTRY(CompareState) next;
 };
-- 
2.25.1




[PULL 3/6] hmp-commands: Add new HMP command for filter passthrough

2021-06-24 Thread Zhang Chen
Add hmp_passthrough_filter_add and hmp_passthrough_filter_del make user
can maintain object network passthrough list in human monitor

Signed-off-by: Zhang Chen 
---
 hmp-commands.hx   | 26 +++
 include/monitor/hmp.h |  2 ++
 monitor/hmp-cmds.c| 76 +++
 3 files changed, 104 insertions(+)

diff --git a/hmp-commands.hx b/hmp-commands.hx
index 8e45bce2cd..426a7d6cda 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1292,6 +1292,32 @@ SRST
   Remove host network device.
 ERST
 
+{
+.name   = "passthrough_filter_add",
+.args_type  = "protocol:s?,object-name:s,src:s?,dst:s?",
+.params = "[protocol] object-name [src] [dst]",
+.help   = "Add network passthrough rule to object passthrough 
list",
+.cmd= hmp_passthrough_filter_add,
+},
+
+SRST
+``passthrough_filter_add``
+  Add network stream to object passthrough list.
+ERST
+
+{
+.name   = "passthrough_filter_del",
+.args_type  = "protocol:s?,object-name:s,src:s?,dst:s?",
+.params = "[protocol] object-name [src] [dst]",
+.help   = "Delete network passthrough rule from object passthrough 
list",
+.cmd= hmp_passthrough_filter_del,
+},
+
+SRST
+``passthrough_filter_del``
+  Delete network stream from object passthrough list.
+ERST
+
 {
 .name   = "object_add",
 .args_type  = "object:S",
diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
index 3baa1058e2..ba6987e552 100644
--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
@@ -77,6 +77,8 @@ void hmp_device_del(Monitor *mon, const QDict *qdict);
 void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict);
 void hmp_netdev_add(Monitor *mon, const QDict *qdict);
 void hmp_netdev_del(Monitor *mon, const QDict *qdict);
+void hmp_passthrough_filter_add(Monitor *mon, const QDict *qdict);
+void hmp_passthrough_filter_del(Monitor *mon, const QDict *qdict);
 void hmp_getfd(Monitor *mon, const QDict *qdict);
 void hmp_closefd(Monitor *mon, const QDict *qdict);
 void hmp_sendkey(Monitor *mon, const QDict *qdict);
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 0942027208..26ff316c93 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -1638,6 +1638,82 @@ void hmp_netdev_del(Monitor *mon, const QDict *qdict)
 hmp_handle_error(mon, err);
 }
 
+void hmp_passthrough_filter_add(Monitor *mon, const QDict *qdict)
+{
+IPFlowSpec *spec = g_new0(IPFlowSpec, 1);
+char *src, *dst;
+Error *err = NULL;
+
+spec->protocol = g_strdup(qdict_get_try_str(qdict, "protocol"));
+spec->object_name = g_strdup(qdict_get_try_str(qdict, "object-name"));
+src = g_strdup(qdict_get_try_str(qdict, "src"));
+dst = g_strdup(qdict_get_try_str(qdict, "dst"));
+
+if (src) {
+spec->source = g_new0(InetSocketAddressBase, 1);
+
+if (inet_parse_base(spec->source, src, NULL)) {
+monitor_printf(mon, "Incorrect passthrough src address\n");
+goto out;
+}
+}
+
+if (dst) {
+spec->destination = g_new0(InetSocketAddressBase, 1);
+
+if (inet_parse_base(spec->destination, dst, NULL)) {
+monitor_printf(mon, "Incorrect passthrough dst address\n");
+goto out;
+}
+}
+
+qmp_passthrough_filter_add(spec, &err);
+
+out:
+g_free(src);
+g_free(dst);
+
+hmp_handle_error(mon, err);
+}
+
+void hmp_passthrough_filter_del(Monitor *mon, const QDict *qdict)
+{
+IPFlowSpec *spec = g_new0(IPFlowSpec, 1);
+char *src, *dst;
+Error *err = NULL;
+
+spec->protocol = g_strdup(qdict_get_try_str(qdict, "protocol"));
+spec->object_name = g_strdup(qdict_get_try_str(qdict, "object-name"));
+src = g_strdup(qdict_get_try_str(qdict, "src"));
+dst = g_strdup(qdict_get_try_str(qdict, "dst"));
+
+if (src) {
+spec->source = g_new0(InetSocketAddressBase, 1);
+
+if (inet_parse_base(spec->source, src, NULL)) {
+monitor_printf(mon, "Incorrect passthrough src address\n");
+goto out;
+}
+}
+
+if (dst) {
+spec->destination = g_new0(InetSocketAddressBase, 1);
+
+if (inet_parse_base(spec->destination, dst, NULL)) {
+monitor_printf(mon, "Incorrect passthrough dst address\n");
+goto out;
+}
+}
+
+qmp_passthrough_filter_del(spec, &err);
+
+out:
+g_free(src);
+g_free(dst);
+
+hmp_handle_error(mon, err);
+}
+
 void hmp_object_add(Monitor *mon, const QDict *qdict)
 {
 const char *options = qdict_get_str(qdict, "object");
-- 
2.25.1




[PULL 4/6] net/colo-compare: Move data structure and define to .h file.

2021-06-24 Thread Zhang Chen
Rename structure with COLO index and move it to .h file,
It make other modules can reuse COLO code.

Signed-off-by: Zhang Chen 
---
 net/colo-compare.c | 132 -
 net/colo-compare.h |  86 +
 2 files changed, 109 insertions(+), 109 deletions(-)

diff --git a/net/colo-compare.c b/net/colo-compare.c
index b100e7b51f..dcd24bb113 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -17,29 +17,18 @@
 #include "qemu/error-report.h"
 #include "trace.h"
 #include "qapi/error.h"
-#include "net/net.h"
 #include "net/eth.h"
 #include "qom/object_interfaces.h"
 #include "qemu/iov.h"
 #include "qom/object.h"
 #include "net/queue.h"
-#include "chardev/char-fe.h"
 #include "qemu/sockets.h"
-#include "colo.h"
-#include "sysemu/iothread.h"
 #include "net/colo-compare.h"
-#include "migration/colo.h"
-#include "migration/migration.h"
 #include "util.h"
 
 #include "block/aio-wait.h"
 #include "qemu/coroutine.h"
 
-#define TYPE_COLO_COMPARE "colo-compare"
-typedef struct CompareState CompareState;
-DECLARE_INSTANCE_CHECKER(CompareState, COLO_COMPARE,
- TYPE_COLO_COMPARE)
-
 static QTAILQ_HEAD(, CompareState) net_compares =
QTAILQ_HEAD_INITIALIZER(net_compares);
 
@@ -47,13 +36,13 @@ static NotifierList colo_compare_notifiers =
 NOTIFIER_LIST_INITIALIZER(colo_compare_notifiers);
 
 #define COMPARE_READ_LEN_MAX NET_BUFSIZE
-#define MAX_QUEUE_SIZE 1024
+#define MAX_COLO_QUEUE_SIZE 1024
 
 #define COLO_COMPARE_FREE_PRIMARY 0x01
 #define COLO_COMPARE_FREE_SECONDARY   0x02
 
-#define REGULAR_PACKET_CHECK_MS 1000
-#define DEFAULT_TIME_OUT_MS 3000
+#define COLO_REGULAR_PACKET_CHECK_MS 1000
+#define COLO_DEFAULT_TIME_OUT_MS 3000
 
 /* #define DEBUG_COLO_PACKETS */
 
@@ -64,87 +53,6 @@ static QemuCond event_complete_cond;
 static int event_unhandled_count;
 static uint32_t max_queue_size;
 
-/*
- *  + CompareState ++
- *  |   |
- *  +---+   +---+ +---+
- *  |   conn list   + - >  conn + --- >  conn + -- > ..
- *  +---+   +---+ +---+
- *  |   | |   | |  |
- *  +---+ +---v+  +---v++---v+ +---v+
- *|primary |  |secondary|primary | |secondary
- *|packet  |  |packet  +|packet  | |packet  +
- *++  ++++ ++
- *|   | |  |
- *+---v+  +---v++---v+ +---v+
- *|primary |  |secondary|primary | |secondary
- *|packet  |  |packet  +|packet  | |packet  +
- *++  ++++ ++
- *|   | |  |
- *+---v+  +---v++---v+ +---v+
- *|primary |  |secondary|primary | |secondary
- *|packet  |  |packet  +|packet  | |packet  +
- *++  ++++ ++
- */
-
-typedef struct SendCo {
-Coroutine *co;
-struct CompareState *s;
-CharBackend *chr;
-GQueue send_list;
-bool notify_remote_frame;
-bool done;
-int ret;
-} SendCo;
-
-typedef struct SendEntry {
-uint32_t size;
-uint32_t vnet_hdr_len;
-uint8_t *buf;
-} SendEntry;
-
-struct CompareState {
-Object parent;
-
-char *pri_indev;
-char *sec_indev;
-char *outdev;
-char *notify_dev;
-CharBackend chr_pri_in;
-CharBackend chr_sec_in;
-CharBackend chr_out;
-CharBackend chr_notify_dev;
-SocketReadState pri_rs;
-SocketReadState sec_rs;
-SocketReadState notify_rs;
-SendCo out_sendco;
-SendCo notify_sendco;
-bool vnet_hdr;
-uint64_t compare_timeout;
-uint32_t expired_scan_cycle;
-
-/*
- * Record the connection that through the NIC
- * Element type: Connection
- */
-GQueue conn_list;
-/* Record the connection without repetition */
-GHashTable *connection_track_table;
-
-IOThread *iothread;
-GMainContext *worker_context;
-QEMUTimer *packet_check_timer;
-
-QEMUBH *event_bh;
-enum colo_event event;
-
-QTAILQ_ENTRY(CompareState) next;
-};
-
-typedef struct CompareClass {
-ObjectClass parent_class;
-} CompareClass;
-
 enum {
 PRIMARY_IN = 0,
 SECONDARY_IN,
@@ -155,6 +63,12 @@ static const char *colo_mode[] = {
 [SECONDARY_IN] = "secondary",
 };
 
+typedef struct COLOSendEntry {
+uint32_t size;
+uint32_t vnet_hdr_len;
+uint8_t *buf;
+} COLOSendEntry;
+
 static int compare_chr_send(CompareState *s,
 uint8_t *buf,
 uint32_t size,
@@ -724,19 +638,19 @@ static void colo_compare_connection(void *opaque, void 
*user_data)
 
 

[PULL 2/6] util/qemu-sockets.c: Add inet_parse_base to handle InetSocketAddressBase

2021-06-24 Thread Zhang Chen
No need to carry the flag all the time in many scenarios.

Signed-off-by: Zhang Chen 
---
 include/qemu/sockets.h |  1 +
 util/qemu-sockets.c| 14 ++
 2 files changed, 15 insertions(+)

diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h
index 0c34bf2398..3a0f8fa8f2 100644
--- a/include/qemu/sockets.h
+++ b/include/qemu/sockets.h
@@ -32,6 +32,7 @@ int socket_set_fast_reuse(int fd);
 int inet_ai_family_from_address(InetSocketAddress *addr,
 Error **errp);
 int inet_parse(InetSocketAddress *addr, const char *str, Error **errp);
+int inet_parse_base(InetSocketAddressBase *addr, const char *str, Error 
**errp);
 int inet_connect(const char *str, Error **errp);
 int inet_connect_saddr(InetSocketAddress *saddr, Error **errp);
 
diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
index 080a240b74..cd7fa0b884 100644
--- a/util/qemu-sockets.c
+++ b/util/qemu-sockets.c
@@ -713,6 +713,20 @@ int inet_parse(InetSocketAddress *addr, const char *str, 
Error **errp)
 return 0;
 }
 
+int inet_parse_base(InetSocketAddressBase *base, const char *str, Error **errp)
+{
+InetSocketAddress *addr;
+int ret = 0;
+
+addr = g_new0(InetSocketAddress, 1);
+ret = inet_parse(addr, str, errp);
+
+base->host = addr->host;
+base->port = addr->port;
+
+g_free(addr);
+return ret;
+}
 
 /**
  * Create a blocking socket and connect it to an address.
-- 
2.25.1




[PULL 0/6] COLO-Proxy patches for 2021-06-25

2021-06-24 Thread Zhang Chen
Hi Jason, Please help to queue COLO-proxy patches to net branch.

Thanks
Chen

The following changes since commit b22726abdfa54592d6ad88f65b0297c0e8b363e2:

  Merge remote-tracking branch 
'remotes/vivier2/tags/linux-user-for-6.1-pull-request' into staging (2021-06-22 
16:07:53 +0100)

are available in the Git repository at:

  https://github.com/zhangckid/qemu.git master-colo-21jun25-pull-request

for you to fetch changes up to 5375645efde8892b05a8b7c7a088b63a7d1fd5aa:

  net/net.c: Add handler for passthrough filter command (2021-06-23 17:22:40 
+0800)



This series add passthrough support frame to object with network
processing function. The first object is colo-compare.



Zhang Chen (6):
  qapi/net: Add IPFlowSpec and QMP command for filter passthrough
  util/qemu-sockets.c: Add inet_parse_base to handle
InetSocketAddressBase
  hmp-commands: Add new HMP command for filter passthrough
  net/colo-compare: Move data structure and define to .h file.
  net/colo-compare: Add passthrough list to CompareState
  net/net.c: Add handler for passthrough filter command

 hmp-commands.hx|  26 ++
 include/monitor/hmp.h  |   2 +
 include/qemu/sockets.h |   1 +
 monitor/hmp-cmds.c |  76 +++
 net/colo-compare.c | 160 ++--
 net/colo-compare.h |  98 
 net/net.c  | 205 +
 qapi/net.json  |  78 
 util/qemu-sockets.c|  14 +++
 9 files changed, 551 insertions(+), 109 deletions(-)

-- 
2.25.1




[PULL 1/6] qapi/net: Add IPFlowSpec and QMP command for filter passthrough

2021-06-24 Thread Zhang Chen
Since the real user scenario does not need to monitor all traffic.
Add passthrough-filter-add and passthrough-filter-del to maintain
a network passthrough list in object with network packet processing
function. Add IPFlowSpec struct for all QMP commands.
Most the fields of IPFlowSpec are optional,except object-name.

Signed-off-by: Zhang Chen 
---
 net/net.c | 10 +++
 qapi/net.json | 78 +++
 2 files changed, 88 insertions(+)

diff --git a/net/net.c b/net/net.c
index 76bbb7c31b..00f2be7a58 100644
--- a/net/net.c
+++ b/net/net.c
@@ -1195,6 +1195,16 @@ void qmp_netdev_del(const char *id, Error **errp)
 }
 }
 
+void qmp_passthrough_filter_add(IPFlowSpec *spec, Error **errp)
+{
+/* TODO implement setup passthrough rule */
+}
+
+void qmp_passthrough_filter_del(IPFlowSpec *spec, Error **errp)
+{
+/* TODO implement delete passthrough rule */
+}
+
 static void netfilter_print_info(Monitor *mon, NetFilterState *nf)
 {
 char *str;
diff --git a/qapi/net.json b/qapi/net.json
index 7fab2e7cd8..bfe38faab5 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -7,6 +7,7 @@
 ##
 
 { 'include': 'common.json' }
+{ 'include': 'sockets.json' }
 
 ##
 # @set_link:
@@ -696,3 +697,80 @@
 ##
 { 'event': 'FAILOVER_NEGOTIATED',
   'data': {'device-id': 'str'} }
+
+##
+# @IPFlowSpec:
+#
+# IP flow specification.
+#
+# @protocol: Transport layer protocol like TCP/UDP, etc. The protocol is the
+#string instead of enum, because it can be passed to 
getprotobyname(3)
+#and avoid duplication with /etc/protocols.
+#
+# @object-name: The @object-name means a qemu object with network packet
+#   processing function, for example colo-compare, filtr-redirector
+#   filtr-mirror, etc. VM can running with multi network packet
+#   processing function objects. They can control different network
+#   data paths from netdev or chardev. So it needs the object-name
+#   to set the effective module.
+#
+# @source: Source address and port.
+#
+# @destination: Destination address and port.
+#
+# Since: 6.1
+##
+{ 'struct': 'IPFlowSpec',
+  'data': { '*protocol': 'str', 'object-name': 'str',
+'*source': 'InetSocketAddressBase',
+'*destination': 'InetSocketAddressBase' } }
+
+##
+# @passthrough-filter-add:
+#
+# Add passthrough entry IPFlowSpec to a qemu object with network packet
+# processing function, for example filtr-mirror, COLO-compare, etc.
+# The object-name is necessary. The protocol and source/destination IP and
+# source/destination ports are optional. if only inputs part of the
+# information, it will match all traffic.
+#
+# Returns: Nothing on success
+#
+# Since: 6.1
+#
+# Example:
+#
+# -> { "execute": "passthrough-filter-add",
+#  "arguments": { "protocol": "tcp", "object-name": "object0",
+#  "source": {"host": "192.168.1.1", "port": "1234"},
+#  "destination": {"host": "192.168.1.2", "port": "4321"} } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'passthrough-filter-add', 'boxed': true,
+ 'data': 'IPFlowSpec' }
+
+##
+# @passthrough-filter-del:
+#
+# Delete passthrough entry IPFlowSpec to a qemu object with network packet
+# processing function, for example filtr-mirror, COLO-compare, etc.
+# The object-name is necessary. The protocol and source/destination IP and
+# source/destination ports are optional. if only inputs part of the
+# information, only the exact same rule will be deleted.
+#
+# Returns: Nothing on success
+#
+# Since: 6.1
+#
+# Example:
+#
+# -> { "execute": "passthrough-filter-del",
+#  "arguments": { "protocol": "tcp", "object-name": "object0",
+#  "source": {"host": "192.168.1.1", "port": "1234"},
+#  "destination": {"host": "192.168.1.2", "port": "4321"} } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'passthrough-filter-del', 'boxed': true,
+ 'data': 'IPFlowSpec' }
-- 
2.25.1




Re: [PATCH qemu] hw/net/vmxnet3: Remove g_assert_not_reached() when VMXNET3_REG_ICR is written

2021-06-24 Thread Jason Wang



在 2021/6/25 上午10:19, Qiang Liu 写道:

Hi,
On Wed, Jun 23, 2021 at 11:23 AM Jason Wang  wrote:


在 2021/6/23 上午10:26, Qiang Liu 写道:

From: cyruscyliu 

A malicious guest user can write VMXNET3_REG_ICR to crash QEMU. This
patch remove the g_aasert_not_reached() there and make the access pass.

Fixes: 786fd2b0f87 ("VMXNET3 device implementation")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/309
Buglink: https://bugs.launchpad.net/qemu/+bug/1913923

Signed-off-by: Qiang Liu 


Do we need to warn about the unimplemented register?

If we remove the case branch, it will go to the default branch which
can warn users if
VMXNET_DEBUG_CB_ENABLED is defined, so there is no need to warn this
unimplemented register. Am I right?
```
vmxnet3_io_bar1_write(...
 default:
 VMW_CBPRN("Unknown Write to BAR1 [%" PRIx64 "] = %" PRIx64 ", size %d",
   addr, val, size);
 break;
```



Right.

I've queued this patch.

Thanks



Thanks



---
   hw/net/vmxnet3.c | 7 ---
   1 file changed, 7 deletions(-)

diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index eff299f629..a388918479 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -1786,13 +1786,6 @@ vmxnet3_io_bar1_write(void *opaque,
   vmxnet3_set_variable_mac(s, val, s->temp_mac);
   break;

-/* Interrupt Cause Register */
-case VMXNET3_REG_ICR:
-VMW_CBPRN("Write BAR1 [VMXNET3_REG_ICR] = %" PRIx64 ", size %d",
-  val, size);
-g_assert_not_reached();
-break;
-
   /* Event Cause Register */
   case VMXNET3_REG_ECR:
   VMW_CBPRN("Write BAR1 [VMXNET3_REG_ECR] = %" PRIx64 ", size %d",
--
2.30.2






[PATCH] nvdimm: add 'target-node' option

2021-06-24 Thread Jingqi Liu
Linux kernel version 5.1 brings in support for the volatile-use of
persistent memory as a hotplugged memory region (KMEM DAX).
When this feature is enabled, persistent memory can be seen as a
separate memory-only NUMA node(s). This newly-added memory can be
selected by its unique NUMA node.

Add 'target-node' option for 'nvdimm' device to indicate this NUMA
node. It can be extended to a new node after all existing NUMA nodes.

The 'node' option of 'pc-dimm' device is to add the DIMM to an
existing NUMA node. The 'node' should be in the available NUMA nodes.
For KMEM DAX mode, persistent memory can be in a new separate
memory-only NUMA node. The new node is created dynamically.
So users use 'target-node' to control whether persistent memory
is added to an existing NUMA node or a new NUMA node.

An example of configuration is as follows.

Using the following QEMU command:
 -object 
memory-backend-file,id=nvmem1,share=on,mem-path=/dev/dax0.0,size=3G,align=2M
 -device nvdimm,id=nvdimm1,memdev=mem1,label-size=128K,targe-node=2

To list DAX devices:
 # daxctl list -u
 {
   "chardev":"dax0.0",
   "size":"3.00 GiB (3.22 GB)",
   "target_node":2,
   "mode":"devdax"
 }

To create a namespace in Device-DAX mode as a standard memory:
 $ ndctl create-namespace --mode=devdax --map=mem
To reconfigure DAX device from devdax mode to a system-ram mode:
 $ daxctl reconfigure-device dax0.0 --mode=system-ram

There are two existing NUMA nodes in Guest. After these operations,
persistent memory is configured as a separate Node 2 and
can be used as a volatile memory. This NUMA node is dynamically
created according to 'target-node'.

Signed-off-by: Jingqi Liu 
---
 docs/nvdimm.txt | 93 +
 hw/acpi/nvdimm.c| 18 ++--
 hw/i386/acpi-build.c| 12 +-
 hw/mem/nvdimm.c | 23 +-
 include/hw/mem/nvdimm.h | 15 ++-
 util/nvdimm-utils.c | 22 ++
 6 files changed, 175 insertions(+), 8 deletions(-)

diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt
index 0aae682be3..083d954bb4 100644
--- a/docs/nvdimm.txt
+++ b/docs/nvdimm.txt
@@ -107,6 +107,99 @@ Note:
may result guest data corruption (e.g. breakage of guest file
system).
 
+Target node
+---
+
+Linux kernel version 5.1 brings in support for the volatile-use of
+persistent memory as a hotplugged memory region (KMEM DAX).
+When this feature is enabled, persistent memory can be seen as a
+separate memory-only NUMA node(s). This newly-added memory can be
+selected by its unique NUMA node.
+Add 'target-node' option for nvdimm device to indicate this NUMA node.
+It can be extended after all existing NUMA nodes.
+
+An example of configuration is presented below.
+
+Using the following QEMU command:
+ -object 
memory-backend-file,id=nvmem1,share=on,mem-path=/dev/dax0.0,size=3G,align=2M
+ -device nvdimm,id=nvdimm1,memdev=mem1,label-size=128K,targe-node=1
+
+The below operations are in Guest.
+
+To list available NUMA nodes using numactl:
+ # numactl -H
+ available: 1 nodes (0)
+ node 0 cpus: 0 1 2 3 4 5 6 7
+ node 0 size: 5933 MB
+ node 0 free: 5457 MB
+ node distances:
+ node   0
+   0:  10
+
+To create a namespace in Device-DAX mode as a standard memory from
+all the available capacity of NVDIMM:
+
+ # ndctl create-namespace --mode=devdax --map=mem
+ {
+   "dev":"namespace0.0",
+   "mode":"devdax",
+   "map":"mem",
+   "size":"3.00 GiB (3.22 GB)",
+   "uuid":"4e4d8293-dd3b-4e43-8ad9-7f3d2a8d1680",
+   "daxregion":{
+ "id":0,
+ "size":"3.00 GiB (3.22 GB)",
+ "align":2097152,
+ "devices":[
+   {
+ "chardev":"dax0.0",
+ "size":"3.00 GiB (3.22 GB)",
+ "target_node":1,
+ "mode":"devdax"
+   }
+ ]
+   },
+   "align":2097152
+ }
+
+To list DAX devices:
+ # daxctl list -u
+ {
+   "chardev":"dax0.0",
+   "size":"3.00 GiB (3.22 GB)",
+   "target_node":1,
+   "mode":"devdax"
+ }
+
+To reconfigure DAX device from devdax mode to a system-ram mode:
+ # daxctl reconfigure-device dax0.0 --mode=system-ram
+ [
+   {
+ "chardev":"dax0.0",
+ "size":3217031168,
+ "target_node":1,
+ "mode":"system-ram",
+ "movable":false
+   }
+ ]
+
+After this operation, persistent memory is configured as a separate NUMA node
+and can be used as a volatile memory.
+The new NUMA node is Node 1:
+ # numactl -H
+ available: 2 nodes (0-1)
+ node 0 cpus: 0 1 2 3 4 5 6 7
+ node 0 size: 5933 MB
+ node 0 free: 5339 MB
+ node 1 cpus:
+ node 1 size: 2816 MB
+ node 1 free: 2815 MB
+ node distances:
+ node   0   1
+   0:  10  20
+   1:  20  10
+
+
 Hotplug
 ---
 
diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index e3d5fe1939..376ad6fd58 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -228,11 +228,13 @@ nvdimm_build_structure_spa(GArray *structures, 
DeviceState *dev)
  NULL);
 uint64_t size = object_property_get_uint(OBJECT(dev), PC_DIMM_SIZE_PROP,
  NULL);
-

Re: [PATCH qemu] hw/net/vmxnet3: Remove g_assert_not_reached() when VMXNET3_REG_ICR is written

2021-06-24 Thread Qiang Liu
Hi,
On Wed, Jun 23, 2021 at 11:23 AM Jason Wang  wrote:
>
>
> 在 2021/6/23 上午10:26, Qiang Liu 写道:
> > From: cyruscyliu 
> >
> > A malicious guest user can write VMXNET3_REG_ICR to crash QEMU. This
> > patch remove the g_aasert_not_reached() there and make the access pass.
> >
> > Fixes: 786fd2b0f87 ("VMXNET3 device implementation")
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/309
> > Buglink: https://bugs.launchpad.net/qemu/+bug/1913923
> >
> > Signed-off-by: Qiang Liu 
>
>
> Do we need to warn about the unimplemented register?
If we remove the case branch, it will go to the default branch which
can warn users if
VMXNET_DEBUG_CB_ENABLED is defined, so there is no need to warn this
unimplemented register. Am I right?
```
vmxnet3_io_bar1_write(...
default:
VMW_CBPRN("Unknown Write to BAR1 [%" PRIx64 "] = %" PRIx64 ", size %d",
  addr, val, size);
break;
```

> Thanks
>
>
> > ---
> >   hw/net/vmxnet3.c | 7 ---
> >   1 file changed, 7 deletions(-)
> >
> > diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
> > index eff299f629..a388918479 100644
> > --- a/hw/net/vmxnet3.c
> > +++ b/hw/net/vmxnet3.c
> > @@ -1786,13 +1786,6 @@ vmxnet3_io_bar1_write(void *opaque,
> >   vmxnet3_set_variable_mac(s, val, s->temp_mac);
> >   break;
> >
> > -/* Interrupt Cause Register */
> > -case VMXNET3_REG_ICR:
> > -VMW_CBPRN("Write BAR1 [VMXNET3_REG_ICR] = %" PRIx64 ", size %d",
> > -  val, size);
> > -g_assert_not_reached();
> > -break;
> > -
> >   /* Event Cause Register */
> >   case VMXNET3_REG_ECR:
> >   VMW_CBPRN("Write BAR1 [VMXNET3_REG_ECR] = %" PRIx64 ", size %d",
> > --
> > 2.30.2
> >
>



[PATCH] ipmi/sim: fix watchdog_expired data type error in IPMIBmcSim struct

2021-06-24 Thread Jinhua Cao
1) watchdog_expired is set bool which value could only be 0 or 1,
but watchdog_expired every bit mean different Timer Use.

2) Use the command  -ipmitool mc get watchdog-  to query
ipmi-watchdog status in guest.
...
[root@localhost ~]# ipmitool mc watchdog get
Watchdog Timer Use: SMS/OS (0x44)
Watchdog Timer Is:  Started/Running
Watchdog Timer Actions: Hard Reset (0x01)
Pre-timeout interval:   0 seconds
Timer Expiration Flags: 0x00
Initial Countdown:  60 sec
Present Countdown:  57 sec
...
bool for watchdog_expired results -Timer Expiration Flags- always
be 0x00 or 0x01, but the -Timer Expiration Flags- indicts the Timer Use
after timeout. So change watchdog_expired data type from bool to uint8_t
to fix this problem.

Signed-off-by: Jinhua Cao 
---
 hw/ipmi/ipmi_bmc_sim.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/ipmi/ipmi_bmc_sim.c b/hw/ipmi/ipmi_bmc_sim.c
index 55fb81fa5a..905e091094 100644
--- a/hw/ipmi/ipmi_bmc_sim.c
+++ b/hw/ipmi/ipmi_bmc_sim.c
@@ -189,7 +189,7 @@ struct IPMIBmcSim {
 uint8_t  watchdog_use;
 uint8_t  watchdog_action;
 uint8_t  watchdog_pretimeout; /* In seconds */
-bool watchdog_expired;
+uint8_t  watchdog_expired;
 uint16_t watchdog_timeout; /* in 100's of milliseconds */
 
 bool watchdog_running;
@@ -2110,7 +2110,7 @@ static const VMStateDescription vmstate_ipmi_sim = {
 VMSTATE_UINT8(watchdog_use, IPMIBmcSim),
 VMSTATE_UINT8(watchdog_action, IPMIBmcSim),
 VMSTATE_UINT8(watchdog_pretimeout, IPMIBmcSim),
-VMSTATE_BOOL(watchdog_expired, IPMIBmcSim),
+VMSTATE_UINT8(watchdog_expired, IPMIBmcSim),
 VMSTATE_UINT16(watchdog_timeout, IPMIBmcSim),
 VMSTATE_BOOL(watchdog_running, IPMIBmcSim),
 VMSTATE_BOOL(watchdog_preaction_ran, IPMIBmcSim),
-- 
2.27.0




Re: [PULL 09/12] configure, meson: convert libusb detection to meson

2021-06-24 Thread Yonggang Luo
On Wed, Jun 23, 2021 at 8:27 PM Paolo Bonzini  wrote:
>
> Reviewed-by: Daniel P. Berrangé 
> Signed-off-by: Paolo Bonzini 
> ---
>  configure  | 27 ---
>  hw/usb/meson.build |  2 +-
>  meson.build| 11 +++
>  meson_options.txt  |  2 ++
>  4 files changed, 14 insertions(+), 28 deletions(-)
>
> diff --git a/configure b/configure
> index 237e99c3d0..e54d06b99e 100755
> --- a/configure
> +++ b/configure
> @@ -374,7 +374,7 @@ spice_protocol="auto"
>  rbd="auto"
>  smartcard="$default_feature"
>  u2f="auto"
> -libusb="$default_feature"
> +libusb="auto"
>  usb_redir="$default_feature"
>  opengl="$default_feature"
>  cpuid_h="no"
> @@ -1285,9 +1285,9 @@ for opt do
>;;
>--enable-u2f) u2f="enabled"
>;;
> -  --disable-libusb) libusb="no"
> +  --disable-libusb) libusb="disabled"
>;;
> -  --enable-libusb) libusb="yes"
> +  --enable-libusb) libusb="enabled"
>;;
>--disable-usb-redir) usb_redir="no"
>;;
> @@ -3994,20 +3994,6 @@ if test "$smartcard" != "no"; then
>  fi
>  fi
>
> -# check for libusb
> -if test "$libusb" != "no" ; then
> -if $pkg_config --atleast-version=1.0.13 libusb-1.0; then
> -libusb="yes"
> -libusb_cflags=$($pkg_config --cflags libusb-1.0)
> -libusb_libs=$($pkg_config --libs libusb-1.0)
> -else
> -if test "$libusb" = "yes"; then
> -feature_not_found "libusb" "Install libusb devel >= 1.0.13"
> -fi
> -libusb="no"
> -fi
> -fi
> -
>  # check for usbredirparser for usb network redirection support
>  if test "$usb_redir" != "no" ; then
>  if $pkg_config --atleast-version=0.6 libusbredirparser-0.5; then
> @@ -5631,12 +5617,6 @@ if test "$smartcard" = "yes" ; then
>echo "SMARTCARD_LIBS=$libcacard_libs" >> $config_host_mak
>  fi
>
> -if test "$libusb" = "yes" ; then
> -  echo "CONFIG_USB_LIBUSB=y" >> $config_host_mak
> -  echo "LIBUSB_CFLAGS=$libusb_cflags" >> $config_host_mak
> -  echo "LIBUSB_LIBS=$libusb_libs" >> $config_host_mak
> -fi
> -
>  if test "$usb_redir" = "yes" ; then
>echo "CONFIG_USB_REDIR=y" >> $config_host_mak
>echo "USB_REDIR_CFLAGS=$usb_redir_cflags" >> $config_host_mak
> @@ -6215,6 +6195,7 @@ if test "$skip_meson" = no; then
>  -Dkvm=$kvm -Dhax=$hax -Dwhpx=$whpx -Dhvf=$hvf -Dnvmm=$nvmm \
>  -Dxen=$xen -Dxen_pci_passthrough=$xen_pci_passthrough -Dtcg=$tcg
\
>  -Dcocoa=$cocoa -Dgtk=$gtk -Dmpath=$mpath -Dsdl=$sdl
-Dsdl_image=$sdl_image \
> +-Dlibusb=$libusb \
>  -Dvnc=$vnc -Dvnc_sasl=$vnc_sasl -Dvnc_jpeg=$vnc_jpeg
-Dvnc_png=$vnc_png \
>  -Dgettext=$gettext -Dxkbcommon=$xkbcommon -Du2f=$u2f
-Dvirtiofsd=$virtiofsd \
>  -Dcapstone=$capstone -Dslirp=$slirp -Dfdt=$fdt -Dbrlapi=$brlapi \
> diff --git a/hw/usb/meson.build b/hw/usb/meson.build
> index f357270d0b..bd3f8735b9 100644
> --- a/hw/usb/meson.build
> +++ b/hw/usb/meson.build
> @@ -72,7 +72,7 @@ if config_host.has_key('CONFIG_USB_REDIR')
>  endif
>
>  # usb pass-through
> -softmmu_ss.add(when: ['CONFIG_USB', 'CONFIG_USB_LIBUSB', libusb],
> +softmmu_ss.add(when: ['CONFIG_USB', libusb],
> if_true: files('host-libusb.c'),
> if_false: files('host-stub.c'))
>  softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('host-stub.c'))
> diff --git a/meson.build b/meson.build
> index c9266bd3cc..58d3a3bdc9 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -991,10 +991,12 @@ if 'CONFIG_USB_REDIR' in config_host
>  link_args:
config_host['USB_REDIR_LIBS'].split())
>  endif
>  libusb = not_found
> -if 'CONFIG_USB_LIBUSB' in config_host
> -  libusb = declare_dependency(compile_args:
config_host['LIBUSB_CFLAGS'].split(),
> -  link_args:
config_host['LIBUSB_LIBS'].split())
> +if not get_option('libusb').auto() or have_system
> +  libusb = dependency('libusb-1.0', required: get_option('libusb'),
> +  version: '>=1.0.13', method: 'pkg-config',
> +  kwargs: static_kwargs)
>  endif

Hi, I am not sure if it's right, but I think the dection may need convert
to this:

```
if not get_option('libusb').disabled()
  libusb = dependency('libusb-1.0', required: get_option('libusb').auto(),
  version: '>=1.0.13', method: 'pkg-config',
  kwargs: static_kwargs)
endif
```

> +
>  libpmem = not_found
>  if 'CONFIG_LIBPMEM' in config_host
>libpmem = declare_dependency(compile_args:
config_host['LIBPMEM_CFLAGS'].split(),
> @@ -1210,6 +1212,7 @@ config_host_data.set('CONFIG_SDL', sdl.found())
>  config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
>  config_host_data.set('CONFIG_SECCOMP', seccomp.found())
>  config_host_data.set('CONFIG_SNAPPY', snappy.found())
> +config_host_data.set('CONFIG_USB_LIBUSB', libusb.found())
>  config_host_data.set('CONFIG_VHOST_USER_BLK_SERVER',
have_vhost_user_blk_server)
>  config_host_data.set('CONFIG_VNC', vnc.found())
>  config_host_data.set('CONFIG_VN

RE: [v3] migration: fix the memory overwriting risk in add_to_iovec

2021-06-24 Thread linfeng (M)
* Dr. David Alan Gilbert(mailto:dgilb...@redhat.com) wrote:
> * Lin Feng (linfen...@huawei.com) wrote:
> > From: Feng Lin 
> >
> > When testing migration, a Segmentation fault qemu core is generated.
> > 0  error_free (err=0x1)
> > 1  0x7f8b862df647 in qemu_fclose (f=f@entry=0x55e06c247640)
> > 2  0x7f8b8516d59a in migrate_fd_cleanup (s=s@entry=0x55e06c0e1ef0)
> > 3  0x7f8b8516d66c in migrate_fd_cleanup_bh (opaque=0x55e06c0e1ef0)
> > 4  0x7f8b8626a47f in aio_bh_poll (ctx=ctx@entry=0x55e06b5a16d0)
> > 5  0x7f8b8626e71f in aio_dispatch (ctx=0x55e06b5a16d0)
> > 6  0x7f8b8626a33d in aio_ctx_dispatch (source=, 
> > callback=,
> user_data=)
> > 7  0x7f8b866bdba4 in g_main_context_dispatch ()
> > 8  0x7f8b8626cde9 in glib_pollfds_poll ()
> > 9  0x7f8b8626ce62 in os_host_main_loop_wait (timeout=)
> > 10 0x7f8b8626cffd in main_loop_wait (nonblocking=nonblocking@entry=0)
> > 11 0x7f8b862ef01f in main_loop ()
> > Using gdb print the struct QEMUFile f = {
> >   ...,
> >   iovcnt = 65, last_error = 21984,
> >   last_error_obj = 0x1, shutdown = true
> > }
> > Well iovcnt is overflow, because the max size of MAX_IOV_SIZE is 64.
> > struct QEMUFile {
> > ...;
> > struct iovec iov[MAX_IOV_SIZE];
> > unsigned int iovcnt;
> > int last_error;
> > Error *last_error_obj;
> > bool shutdown;
> > };
> > iovcnt and last_error is overwrited by add_to_iovec().
> > Right now, add_to_iovec() increase iovcnt before check the limit.
> > And it seems that add_to_iovec() assumes that iovcnt will set to zero
> > in qemu_fflush(). But qemu_fflush() will directly return when f->shutdown
> > is true.
> >
> > The situation may occur when libvirtd restart during migration, after
> > f->shutdown is set, before calling qemu_file_set_error() in
> > qemu_file_shutdown().
> >
> > So the safiest way is checking the iovcnt before increasing it.
> >
> > Signed-off-by: Feng Lin 
> > ---
> >  migration/qemu-file.c | 13 -
> >  1 file changed, 8 insertions(+), 5 deletions(-)
> >
> > diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> > index d6e03dbc0e..f6486cf7bc 100644
> > --- a/migration/qemu-file.c
> > +++ b/migration/qemu-file.c
> > @@ -416,6 +416,9 @@ static int add_to_iovec(QEMUFile *f, const uint8_t 
> > *buf, size_t size,
> >  {
> >  f->iov[f->iovcnt - 1].iov_len += size;
> >  } else {
> > +if (f->iovcnt >= MAX_IOV_SIZE) {
> > +goto fflush;
> > +}
> 
> Why call qemu_fflush in this case?
> If I understand what you're saying, then we only get to here if a
> previous qemu_fflush has failed, so this should fail as well?
Yes, that's what I mean.

> 
> How about, something like:
> if (f->iovcnt >= MAX_IOV_SIZE) {
> /* Should only happen if a previous fflush failed */
> assert(f->shutdown || !qemu_file_is_writeable(f));
> return 1;
> }
> 
> ?
At first, I'm just thinking that overwriting requires qemu_fflush to reset 
iovcnt and do not consider
the possibility of packet loss caused by other exceptions. It makes more sense 
to make an assertion
here. Thank you for your suggestions.
> 
> Dave
> 
> >  if (may_free) {
> >  set_bit(f->iovcnt, f->may_free);
> >  }
> > @@ -423,12 +426,12 @@ static int add_to_iovec(QEMUFile *f, const uint8_t 
> > *buf, size_t size,
> >  f->iov[f->iovcnt++].iov_len = size;
> >  }
> >
> > -if (f->iovcnt >= MAX_IOV_SIZE) {
> > -qemu_fflush(f);
> > -return 1;
> > +if (f->iovcnt < MAX_IOV_SIZE) {
> > +return 0;
> >  }
> > -
> > -return 0;
> > +fflush:
> > +qemu_fflush(f);
> > +return 1;
> >  }
> >
> >  static void add_buf_to_iovec(QEMUFile *f, size_t len)
> > --
> > 2.23.0
> >
> --
> Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK




Re: [PATCH v2] mc146818rtc: Make PF independent of PIE

2021-06-24 Thread Paolo Bonzini

On 23/06/21 16:24, Jason Thorpe wrote:

Second, the firmware could set a nonzero period, and this would cause
continuous interruptions of the guest after the firmware stops, due to
s->periodic_timer firing.  This is "optimized" by the bug that you are
fixing.  To keep the optimization you could:

- do the timer_mod in periodic_timer_update only if !PF || (PIE && 
lost_tick_policy==SLEW)

- in cmos_ioport_read, if !timer_pending(s->periodic_timer) call

periodic_timer_update(s, qemu_clock_get_ns(rtc_clock),
  s->period, true);

to update s->next_periodic_time for the next tick and ensure PF will be set.


I might be missing some subtlety here, but by my reading of
periodic_timer_update(), either one of those changes would result in a
delay of the next latching of PF by however many ns the CPU was late in
reading PF since the last time it was latched  Please correct me if I’m
wrong about this!


No, it shouldn't.  I may be wrong, but the process is the following:

- the current rtc_clock value is stored in cur_clock

- because period_change is true, the delay between writing PF and 
reading C is stored in lost_clock


- then the delay is compensated by next_irq_clock = cur_clock + period - 
lost_clock


The best way to confirm this would be by writing a testcase (there's 
already an mc146818 suite in tests/qtest).


Paolo


There exists software out there in the wild that depends on PF latching at 
regular intervals regardless if when the CPU reads, it, i.e.:

PF  PF  PF  PF  PF  PF
 CC  C  C  C

-- thorpej







Re: [PULL 00/12] Misc, mostly meson patches for 2021-06-23

2021-06-24 Thread Paolo Bonzini

On 24/06/21 21:09, Peter Maydell wrote:

This generates a new warning on one of my boxes:

[...]
Has header "sasl/sasl.h" : YES (cached)
Library sasl2 found: YES
Has header "security/pam_appl.h" : YES
Library pam found: YES
../meson.build:926: WARNING: could not link libpam, disabling


This is probably too old a libpam, or something like that.  What distro 
is it, and could you please attach the resulting meson-logs/meson-log.txt?


Thanks,

Paolo




Re: [PATCH 1/5] hw/isa/vt82c686: Replace magic numbers by definitions

2021-06-24 Thread BALATON Zoltan

On Thu, 24 Jun 2021, Philippe Mathieu-Daudé wrote:

Replace magic values of the Power Management / SMBus function (#4)
by definitions from the datasheet. The result is less compact, and
we can follow what the code does without having to recur to the
datasheet.


I'm not sure this is an improvement. With the values it's clear what is 
done but I can't follow how these magic constants are defined or what they 
do so no idea if this is still correct. I think if you want to review a 
device model then you should be familiar with the device or consult the 
data sheet. Otherwise you can't spot problems in the definition of these 
constants either. I'm not a fan of hiding things behind cryptic macros 
when you could just write it in a straightforward way that could be 
understood more clearly.


Regards,
BALATON Zoltan


Signed-off-by: Philippe Mathieu-Daudé 
---
hw/isa/vt82c686.c | 50 +++
1 file changed, 33 insertions(+), 17 deletions(-)

diff --git a/hw/isa/vt82c686.c b/hw/isa/vt82c686.c
index f57f3e70679..4ddcf2d398c 100644
--- a/hw/isa/vt82c686.c
+++ b/hw/isa/vt82c686.c
@@ -14,6 +14,7 @@
 */

#include "qemu/osdep.h"
+#include "hw/registerfields.h"
#include "hw/isa/vt82c686.h"
#include "hw/pci/pci.h"
#include "hw/qdev-properties.h"
@@ -38,6 +39,16 @@
#define TYPE_VIA_PM "via-pm"
OBJECT_DECLARE_SIMPLE_TYPE(ViaPMState, VIA_PM)

+REG8(PM_GEN_CFG0,   0x40)
+REG8(PM_GEN_CFG1,   0x41)
+FIELD(PM_GEN_CFG1, ACPI_IO_ENABLE,  7, 1)
+REG32(PM_IO_BASE,   0x48)
+FIELD(PM_IO_BASE, ADDR, 7, 9)
+REG32(SMBUS_IO_BASE,0x90)
+FIELD(SMBUS_IO_BASE, ADDR,  4, 12)
+REG8(SMBUS_HOST_CONFIG, 0xd2)
+FIELD(SMBUS_HOST_CONFIG, ENABLE,0, 1)
+
struct ViaPMState {
PCIDevice dev;
MemoryRegion io;
@@ -48,21 +59,24 @@ struct ViaPMState {

static void pm_io_space_update(ViaPMState *s)
{
-uint32_t pmbase = pci_get_long(s->dev.config + 0x48) & 0xff80UL;
+uint32_t pmbase = pci_get_long(s->dev.config + A_PM_IO_BASE);

memory_region_transaction_begin();
-memory_region_set_address(&s->io, pmbase);
-memory_region_set_enabled(&s->io, s->dev.config[0x41] & BIT(7));
+memory_region_set_address(&s->io, pmbase & R_PM_IO_BASE_ADDR_MASK);
+memory_region_set_enabled(&s->io, FIELD_EX32(s->dev.config[A_PM_GEN_CFG1],
+  PM_GEN_CFG1, ACPI_IO_ENABLE));
memory_region_transaction_commit();
}

static void smb_io_space_update(ViaPMState *s)
{
-uint32_t smbase = pci_get_long(s->dev.config + 0x90) & 0xfff0UL;
+uint32_t smbase = pci_get_long(s->dev.config + A_SMBUS_IO_BASE);

memory_region_transaction_begin();
-memory_region_set_address(&s->smb.io, smbase);
-memory_region_set_enabled(&s->smb.io, s->dev.config[0xd2] & BIT(0));
+memory_region_set_address(&s->smb.io, smbase & R_SMBUS_IO_BASE_ADDR_MASK);
+memory_region_set_enabled(&s->smb.io,
+  FIELD_EX32(s->dev.config[A_SMBUS_HOST_CONFIG],
+ SMBUS_HOST_CONFIG, ENABLE));
memory_region_transaction_commit();
}

@@ -98,19 +112,21 @@ static void pm_write_config(PCIDevice *d, uint32_t addr, 
uint32_t val, int len)

trace_via_pm_write(addr, val, len);
pci_default_write_config(d, addr, val, len);
-if (ranges_overlap(addr, len, 0x48, 4)) {
-uint32_t v = pci_get_long(s->dev.config + 0x48);
-pci_set_long(s->dev.config + 0x48, (v & 0xff80UL) | 1);
+if (ranges_overlap(addr, len, A_PM_IO_BASE, 4)) {
+uint32_t v = pci_get_long(s->dev.config + A_PM_IO_BASE);
+pci_set_long(s->dev.config + A_PM_IO_BASE,
+ (v & R_PM_IO_BASE_ADDR_MASK) | 1);
}
-if (range_covers_byte(addr, len, 0x41)) {
+if (range_covers_byte(addr, len, A_PM_GEN_CFG1)) {
pm_io_space_update(s);
}
-if (ranges_overlap(addr, len, 0x90, 4)) {
-uint32_t v = pci_get_long(s->dev.config + 0x90);
-pci_set_long(s->dev.config + 0x90, (v & 0xfff0UL) | 1);
+if (ranges_overlap(addr, len, A_SMBUS_IO_BASE, 4)) {
+uint32_t v = pci_get_long(s->dev.config + A_SMBUS_IO_BASE);
+pci_set_long(s->dev.config + A_SMBUS_IO_BASE,
+ (v & R_SMBUS_IO_BASE_ADDR_MASK) | 1);
}
-if (range_covers_byte(addr, len, 0xd2)) {
-s->dev.config[0xd2] &= 0xf;
+if (range_covers_byte(addr, len, A_SMBUS_HOST_CONFIG)) {
+s->dev.config[A_SMBUS_HOST_CONFIG] &= 0xf;
smb_io_space_update(s);
}
}
@@ -176,9 +192,9 @@ static void via_pm_reset(DeviceState *d)
memset(s->dev.config + PCI_CONFIG_HEADER_SIZE, 0,
   PCI_CONFIG_SPACE_SIZE - PCI_CONFIG_HEADER_SIZE);
/* Power Management IO base */
-pci_set_long(s->dev.config + 0x48, 1);
+pci_set_long(s->dev.config + A_PM_IO_BASE, 1);
/* SMBus IO base */
-pci_set_long(s->dev.config + 0x90, 1);
+pci_set_long(s->dev.config + A_SMBUS_IO_BASE, 1);

acpi_pm1_evt

Re: [PATCH 3/5] hw/pci-host/bonito: Allow PCI config accesses smaller than 32-bit

2021-06-24 Thread BALATON Zoltan

On Thu, 24 Jun 2021, Philippe Mathieu-Daudé wrote:

When running the official PMON firmware for the Fuloong 2E, we see
8-bit and 16-bit accesses to PCI config space:

 $ qemu-system-mips64el -M fuloong2e -bios pmon_2e.bin \
   -trace -trace bonito\* -trace pci_cfg\*

 pci_cfg_write vt82c686b-pm 05:4 @0x90 <- 0xeee1
 bonito_spciconf_small_access PCI config address is smaller then 32-bit, addr: 
0x4d2, size: 2
 pci_cfg_write vt82c686b-pm 05:4 @0xd2 <- 0x1
 pci_cfg_write vt82c686b-pm 05:4 @0x4 <- 0x1
 pci_cfg_write vt82c686b-isa 05:0 @0x4 <- 0x7
 bonito_spciconf_small_access PCI config address is smaller then 32-bit, addr: 
0x81, size: 1
 pci_cfg_read vt82c686b-isa 05:0 @0x81 -> 0x0
 bonito_spciconf_small_access PCI config address is smaller then 32-bit, addr: 
0x81, size: 1
 pci_cfg_write vt82c686b-isa 05:0 @0x81 <- 0x80
 bonito_spciconf_small_access PCI config address is smaller then 32-bit, addr: 
0x83, size: 1
 pci_cfg_write vt82c686b-isa 05:0 @0x83 <- 0x89
 bonito_spciconf_small_access PCI config address is smaller then 32-bit, addr: 
0x85, size: 1
 pci_cfg_write vt82c686b-isa 05:0 @0x85 <- 0x3
 bonito_spciconf_small_access PCI config address is smaller then 32-bit, addr: 
0x5a, size: 1
 pci_cfg_write vt82c686b-isa 05:0 @0x5a <- 0x7
 bonito_spciconf_small_access PCI config address is smaller then 32-bit, addr: 
0x85, size: 1
 pci_cfg_write vt82c686b-isa 05:0 @0x85 <- 0x1

Also this is what the Linux kernel does since it supports the Bonito
north bridge:
https://elixir.bootlin.com/linux/v2.6.15/source/arch/mips/pci/ops-bonito64.c#L85

So it seems safe to assume the datasheet is incomplete or outdated
regarding the address constraints.

This problem was exposed by commit 911629e6d3773a8adeab48b
("vt82c686: Fix SMBus IO base and configuration registers").

Reported-by: BALATON Zoltan 
Suggested-by: Jiaxun Yang 
Signed-off-by: Philippe Mathieu-Daudé 
---
hw/pci-host/bonito.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/pci-host/bonito.c b/hw/pci-host/bonito.c
index 751fdcec689..3c10608c9a2 100644
--- a/hw/pci-host/bonito.c
+++ b/hw/pci-host/bonito.c
@@ -187,7 +187,7 @@ FIELD(BONGENCFG, PCIQUEUE,  12, 1)
#define BONITO_PCICONF_FUN_MASK0x700/* [10:8] */
#define BONITO_PCICONF_FUN_OFFSET  8
#define BONITO_PCICONF_REG_MASK_DS (~3) /* Per datasheet */
-#define BONITO_PCICONF_REG_MASK0xFC
+#define BONITO_PCICONF_REG_MASK_HW 0xff /* As seen on hardware */


I think we didn't really see it on hardware just inferred this from what 
the firmware does. That's a slight difference but may worth noting so 
people later don't think this was really tested with real hardware. Maybe 
"As seen with PMON"? Also if this is a loongson thing as was thought in 
the thread in December then maybe the #define could be named that instead 
of _HW so if somebody wants to reuse this model later ad Bonito then know 
that it implements the Loongson version.


Regards,
BALATON Zoltan


#define BONITO_PCICONF_REG_OFFSET  0


@@ -466,7 +466,7 @@ static uint32_t bonito_sbridge_pciaddr(void *opaque, hwaddr 
addr)
 BONITO_PCICONF_IDSEL_OFFSET;
devno = ctz32(idsel);
funno = (cfgaddr & BONITO_PCICONF_FUN_MASK) >> BONITO_PCICONF_FUN_OFFSET;
-regno = (cfgaddr & BONITO_PCICONF_REG_MASK) >> BONITO_PCICONF_REG_OFFSET;
+regno = (cfgaddr & BONITO_PCICONF_REG_MASK_HW) >> 
BONITO_PCICONF_REG_OFFSET;

if (idsel == 0) {
error_report("error in bonito pci config address 0x" TARGET_FMT_plx


[PATCH 3/3] tests: acpi: pc: update expected DSDT blobs

2021-06-24 Thread Igor Mammedov
@@ -930,20 +930,20 @@ DefinitionBlock ("", "DSDT", 1, "BOCHS ", "BXPC", 
0x0001)
 Device (S00)
 {
 Name (_ADR, Zero)  // _ADR: Address
-Name (_SUN, Zero)  // _SUN: Slot User Number
+Name (ASUN, Zero)
 Method (_DSM, 4, Serialized)  // _DSM: Device-Specific Method
 {
-Return (PDSM (Arg0, Arg1, Arg2, Arg3, BSEL, _SUN))
+Return (PDSM (Arg0, Arg1, Arg2, Arg3, BSEL, ASUN))
 }
 }

 Device (S10)
 {
 Name (_ADR, 0x0002)  // _ADR: Address
-Name (_SUN, 0x02)  // _SUN: Slot User Number
+Name (ASUN, 0x02)
 Method (_DSM, 4, Serialized)  // _DSM: Device-Specific Method
 {
-Return (PDSM (Arg0, Arg1, Arg2, Arg3, BSEL, _SUN))
+Return (PDSM (Arg0, Arg1, Arg2, Arg3, BSEL, ASUN))
 }

 Method (_S1D, 0, NotSerialized)  // _S1D: S1 Device State

with a hank per bridge:

@@ -965,10 +965,10 @@ DefinitionBlock ("", "DSDT", 1, "BOCHS ", "BXPC", 
0x0001)
 Device (S18)
 {
 Name (_ADR, 0x0003)  // _ADR: Address
-Name (_SUN, 0x03)  // _SUN: Slot User Number
+Name (ASUN, 0x03)
 Method (_DSM, 4, Serialized)  // _DSM: Device-Specific Method
 {
-Return (PDSM (Arg0, Arg1, Arg2, Arg3, BSEL, _SUN))
+Return (PDSM (Arg0, Arg1, Arg2, Arg3, BSEL, ASUN))
 }
 }

Signed-off-by: Igor Mammedov 
---
 tests/qtest/bios-tables-test-allowed-diff.h |  10 --
 tests/data/acpi/pc/DSDT | Bin 6002 -> 6002 bytes
 tests/data/acpi/pc/DSDT.acpihmat| Bin 7327 -> 7327 bytes
 tests/data/acpi/pc/DSDT.bridge  | Bin 8668 -> 8668 bytes
 tests/data/acpi/pc/DSDT.cphp| Bin 6466 -> 6466 bytes
 tests/data/acpi/pc/DSDT.dimmpxm | Bin 7656 -> 7656 bytes
 tests/data/acpi/pc/DSDT.hpbridge| Bin 5969 -> 5969 bytes
 tests/data/acpi/pc/DSDT.ipmikcs | Bin 6074 -> 6074 bytes
 tests/data/acpi/pc/DSDT.memhp   | Bin 7361 -> 7361 bytes
 tests/data/acpi/pc/DSDT.nohpet  | Bin 5860 -> 5860 bytes
 tests/data/acpi/pc/DSDT.numamem | Bin 6008 -> 6008 bytes
 11 files changed, 10 deletions(-)

diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
b/tests/qtest/bios-tables-test-allowed-diff.h
index 6c83a3ef76..dfb8523c8b 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1,11 +1 @@
 /* List of comma-separated changed AML files to ignore */
-"tests/data/acpi/pc/DSDT",
-"tests/data/acpi/pc/DSDT.bridge",
-"tests/data/acpi/pc/DSDT.ipmikcs",
-"tests/data/acpi/pc/DSDT.cphp",
-"tests/data/acpi/pc/DSDT.memhp",
-"tests/data/acpi/pc/DSDT.numamem",
-"tests/data/acpi/pc/DSDT.nohpet",
-"tests/data/acpi/pc/DSDT.dimmpxm",
-"tests/data/acpi/pc/DSDT.acpihmat",
-"tests/data/acpi/pc/DSDT.hpbridge",
diff --git a/tests/data/acpi/pc/DSDT b/tests/data/acpi/pc/DSDT
index 
b9dd9b38e4ef720636ba19ccbdf262de8a6439d5..cc1223773e9c459a8d2f20666c051a74338d40b7
 100644
GIT binary patch
delta 61
zcmeyQ_eqb-CD&USu&WP9BD%?o
fCD_m)o+IATC5VTCfr)_wB+SKxRm0{lLaR9eXLTFp

delta 62
zcmbPlIp31YCDgG8yduO#5=kK@h~tjF>rvS
QxR@rdlak#0LuffC015RFTL1t6

diff --git a/tests/data/acpi/pc/DSDT.bridge b/tests/data/acpi/pc/DSDT.bridge
index 
a9b4d5659457f6de30b993962bce673c9413d81d..8c3a69946efd501e7eff0a73af309b553f13
 100644
GIT binary patch
delta 73
zcmccPe8-u~CD3CG}2zvw0}mS97Jc#e2SmmnSn1||j$
R$H}$)l8la3>@*3
PYxyM^<2O&|U(E>s%3KcC

diff --git a/tests/data/acpi/pc/DSDT.dimmpxm b/tests/data/acpi/pc/DSDT.dimmpxm
index 
e00a447f92b27f9a91be802eb11fe89dc0457e20..b56b2e089017f933f8a3089c4fd2389fb8ef1e40
 100644
GIT binary patch
delta 100
zcmaE1{lc2dCDwlr;t!7i

diff --git a/tests/data/acpi/pc/DSDT.hpbridge b/tests/data/acpi/pc/DSDT.hpbridge
index 
5d8ba195055f2eda74223323baeb88390ea36739..bb0593eeb8730d51a6f0fe51a00a00df9c83c419
 100644
GIT binary patch
delta 78
zcmcbpcTtbaCD@v8

delta 74
zcmcbpcTtbaCD|UB3{<`;7~t?$&P$dLS8Jvh6eE*@s2J*JPZs>3>@*3
b*YZj-#!o)ZYrqCl#>EU&C%0Le&x{iQ&x;Zm

diff --git a/tests/data/acpi/pc/DSDT.ipmikcs b/tests/data/acpi/pc/DSDT.ipmikcs
index 
01e53bd436698db6f6adfff584ec56cb99074a5f..2e618e49d357ae1d0ac20d822f71d676ea90f2fc
 100644
GIT binary patch
delta 53
zcmdm`ze}IXCDYt>y#(
Dp7{>n

delta 49
zcmdm`ze}IXCD
T3>=P={|HGkI&KydUd;&rQ4bK$

delta 65
zcmX?TdC-!}CDwF!J&Q(lNp6YCOe3*33;&u8yduO#5=kK@h~tj
UF>u6B{v#v_lw#Q|DZGLc04$Uc4*&oF

diff --git a/tests/data/acpi/pc/DSDT.nohpet b/tests/data/acpi/pc/DSDT.nohpet
index 
d4f0050533f970128774f825274177096a46c3b8..623f06a900d12500d2197d101f76f6875e92ed64
 100644
GIT binary patch
delta 61
zcmaE&`$U(^CD>0$#B;Y{|Qt696|m5IX<>

delta 59
zcmaE&`$U(^CDnU(Dn?CJxOh;H&?
f2{tr{=ZJ

[PATCH 1/2] tests: acpi: prepare for changing DSDT tables

2021-06-24 Thread Igor Mammedov
Signed-off-by: Igor Mammedov 
---
 tests/qtest/bios-tables-test-allowed-diff.h | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
b/tests/qtest/bios-tables-test-allowed-diff.h
index dfb8523c8b..6c83a3ef76 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1 +1,11 @@
 /* List of comma-separated changed AML files to ignore */
+"tests/data/acpi/pc/DSDT",
+"tests/data/acpi/pc/DSDT.bridge",
+"tests/data/acpi/pc/DSDT.ipmikcs",
+"tests/data/acpi/pc/DSDT.cphp",
+"tests/data/acpi/pc/DSDT.memhp",
+"tests/data/acpi/pc/DSDT.numamem",
+"tests/data/acpi/pc/DSDT.nohpet",
+"tests/data/acpi/pc/DSDT.dimmpxm",
+"tests/data/acpi/pc/DSDT.acpihmat",
+"tests/data/acpi/pc/DSDT.hpbridge",
-- 
2.27.0




Re: [PATCH 5/5] tests/acceptance: Test PMON on the Fuloong 2E machine

2021-06-24 Thread BALATON Zoltan

On Thu, 24 Jun 2021, Philippe Mathieu-Daudé wrote:

Test the PMON firmware. As the firmware is not redistributable,
it has to be downloaded manually first. Then it can be used by
providing its path via the PMON_BIN_PATH environment variable:

 $ PMON2E_BIN_PATH=~/images/fuloong2e/pmon_2e.bin \
   AVOCADO_ALLOW_UNTRUSTED_CODE=1 \
   avocado --show=app,console run tests/acceptance/machine_mips_fuloong2e.py
 Fetching asset from 
tests/acceptance/machine_mips_fuloong2e.py:MipsFuloong2e.test_linux_kernel_isa_serial
  (1/3) 
tests/acceptance/machine_mips_fuloong2e.py:MipsFuloong2e.test_pmon_serial_console:
 console: PMON2000 MIPS Initializing. Standby...
 console: ERRORPC= CONFIG=00030932
 console: PRID=6302
 console: Init SDRAM Done!
 console: Sizing caches...
 console: Init caches...
 console: godson2 caches found
 console: Init caches done, cfg = 00030932
 console: Copy PMON to execute location...
 console: copy text section done.
 console: Copy PMON to execute location done.
 Uncompressing BiosOK,Booting Bios
 PASS (0.25 s)
  (2/3) 
tests/acceptance/machine_mips_fuloong2e.py:MipsFuloong2e.test_pmon_framebuffer_console:
 [...]
 Uncompressing BiosOK,Booting Bios
 console: FREQ
 console: FREI
 console: DONE
 console: TTYI
 console: TTYD
 console: ENVI
 console: MAPV
 console: Mfg  0, Id 60
 console: STDV
 console: SBDD
 console: PPCIH
 console: PCIS
 console: PCIR
 console: PCIW
 console: NETI
 console: RTCL
 console: PCID
 console: VGAI
 console: Default MODE_ID 2
 console: starting radeon init...
 console: radeon init done
 console: FRBI
 console: cfb_console init,fb=b400
 console: Video: Drawing the logo ...
 console: CONSOLE_SIZE 450560HSTI
 PASS (4.10 s)
  (3/3) 
tests/acceptance/machine_mips_fuloong2e.py:MipsFuloong2e.test_linux_kernel_isa_serial:
 console: Linux version 2.6.27.7lemote (root@debian) (gcc version 4.1.3 
20080623 (prerelease) (Debian 4.1.2-23)) #6 Fri Dec 12 00:11:25 CST 2008
 console: busclock=3300, cpuclock=-2145008360,memsize=256,highmemsize=0
 console: console [early0] enabled
 console: CPU revision is: 6302 (ICT Loongson-2)
 PASS (0.19 s)
 RESULTS: PASS 3 | ERROR 0 | FAIL 0 | SKIP 0 | WARN 0 | INTERRUPT 0 | 
CANCEL 0
 JOB TIME   : 5.10 s

Signed-off-by: Philippe Mathieu-Daudé 
---
tests/acceptance/machine_mips_fuloong2e.py | 62 ++
1 file changed, 62 insertions(+)

diff --git a/tests/acceptance/machine_mips_fuloong2e.py 
b/tests/acceptance/machine_mips_fuloong2e.py
index 0ac285e2af1..4854ba98560 100644
--- a/tests/acceptance/machine_mips_fuloong2e.py
+++ b/tests/acceptance/machine_mips_fuloong2e.py
@@ -8,15 +8,77 @@
# SPDX-License-Identifier: GPL-2.0-or-later

import os
+import time

from avocado import skipUnless
from avocado_qemu import Test
from avocado_qemu import wait_for_console_pattern

+from tesseract_utils import tesseract_available, tesseract_ocr
+
class MipsFuloong2e(Test):

timeout = 60

+@skipUnless(os.getenv('AVOCADO_ALLOW_UNTRUSTED_CODE'), 'untrusted code')
+@skipUnless(os.getenv('PMON2E_BIN_PATH'), 'PMON2E_BIN_PATH not available')
+def test_pmon_serial_console(self):
+"""
+:avocado: tags=arch:mips64el
+:avocado: tags=machine:fuloong2e
+:avocado: tags=endian:little
+:avocado: tags=device:bonito64
+:avocado: tags=device:via686b
+"""
+pmon_hash = 'c812e1695d7b2320036f3ef494976969' # v1.1.2
+pmon_path = self.fetch_asset('file://' + os.getenv('PMON2E_BIN_PATH'),
+ asset_hash=pmon_hash, algorithm='md5')
+
+self.vm.set_console()
+self.vm.add_args('-bios', pmon_path)
+self.vm.launch()
+wait_for_console_pattern(self, 'PMON2000 MIPS Initializing. 
Standby...')
+wait_for_console_pattern(self, 'Booting Bios')
+
+@skipUnless(os.getenv('AVOCADO_ALLOW_UNTRUSTED_CODE'), 'untrusted code')
+# Tesseract 4 adds a new OCR engine based on LSTM neural networks. The
+# new version is faster and more accurate than version 3. The drawback is
+# that it is still alpha-level software.
+@skipUnless(tesseract_available(4), 'tesseract v4 OCR tool not available')
+@skipUnless(os.getenv('PMON2E_BIN_PATH'), 'PMON2E_BIN_PATH not available')
+def test_pmon_framebuffer_console(self):
+"""
+:avocado: tags=arch:mips64el
+:avocado: tags=machine:fuloong2e
+:avocado: tags=endian:little
+:avocado: tags=device:bonito64
+:avocado: tags=device:ati-vga
+"""
+screenshot_path = os.path.join(self.workdir, 'dump.ppm')
+
+pmon_hash = 'c812e1695d7b2320036f3ef494976969' # v1.1.2
+pmon_path = self.fetch_asset('file://' + os.getenv('PMON2E_BIN_PATH'),
+ asset_hash=pmon_hash, algorithm='md5')
+
+self.vm.set_console()
+self.vm.add_args('-bios', pmon_path,
+ '-vga', 'std',
+  

[PATCH 0/2] pc: acpi: revert back to 5.2 PCI slot enumeration

2021-06-24 Thread Igor Mammedov
Commit b7f23f62e40 (pci: acpi: add _DSM method to PCI devices),
regressed network interface naming for Linux guests in some cases.
This reverts PCI slot enumeration to its state before 6.0.
For details see 2/3 patch.

Thanks Stefan for troubleshooting!

Reported-by: john.suc...@ekinops.com  
CC: stefa...@redhat.com
CC: qemu-sta...@nongnu.org
CC: m...@redhat.com
CC: marcel.apfelb...@gmail.com
CC: berra...@redhat.com

Igor Mammedov (2):
  tests: acpi: prepare for changing DSDT tables
  acpi: pc: revert back to v5.2 PCI slot enumeration

 tests/qtest/bios-tables-test-allowed-diff.h | 10 ++
 hw/i386/acpi-build.c|  9 +++--
 2 files changed, 17 insertions(+), 2 deletions(-)

-- 
2.27.0




[PATCH 2/2] acpi: pc: revert back to v5.2 PCI slot enumeration

2021-06-24 Thread Igor Mammedov
Commit [1] moved _SUN variable from only hot-pluggable to
all devices. This made linux kernel enumerate extra slots
that weren't present before. If extra slot happens to be
be enumerated first and there is a device in th same slot
but on other bridge, linux kernel will add -N suffix to
slot name of the later, thus changing NIC name compared to
QEMU 5.2. This in some case confuses systemd, if it is
using SLOT NIC naming scheme and interface name becomes
not the same as it was under QEMU-5.2.

Reproducer QEMU CLI:
  -M pc-i440fx-5.2 -nodefaults \
  -device pci-bridge,chassis_nr=1,id=pci.1,bus=pci.0,addr=0x3 \
  -device virtio-net-pci,id=nic1,bus=pci.1,addr=0x1 \
  -device virtio-net-pci,id=nic2,bus=pci.1,addr=0x2 \
  -device virtio-net-pci,id=nic3,bus=pci.1,addr=0x3

with RHEL8 guest produces following results:
  v5.2:
 kernel: virtio_net virtio0 ens1: renamed from eth0
 kernel: virtio_net virtio2 ens3: renamed from eth2
 kernel: virtio_net virtio1 enp1s2: renamed from eth1
  (slot 2 is assigned to empty bus 0 slot and virtio1
   is assigned to 2-2 slot, and renaming falls back,
   for some reason, to path based naming scheme)

  v6.0:
 kernel: virtio_net virtio0 ens1: renamed from eth0
 kernel: virtio_net virtio2 ens3: renamed from eth2
 systemd-udevd[299]: Error changing net interface name 'eth1' to 'ens3': 
File exists
 systemd-udevd[299]: could not rename interface '3' from 'eth1' to 'ens3': 
File exists
  (with commit [1] kernel assigns virtio2 to 3-2 slot
   since bridge advertises _SUN=0x3 and kernel assigns
   slot 3 to bridge. Still it manages to rename virtio2
   correctly to ens3, however systemd gets confused with virtio1
   where slot allocation exactly the same (2-2) as in 5.2 case
   and tries to rename it to ens3 which is rightfully taken by
   virtio2)

I'm not sure what breaks in systemd interface renaming (it probably
should be investigated), but on QEMU side we can safely revert
_SUN to 5.2 behavior (i.e. avoid cold-plugged bridges and non
hot-pluggable device classes), without breaking acpi-index, which uses
slot numbers but it doesn't have to use _SUN, it could use an arbitrary
variable name that has the same slot value).
It will help existing VMs to keep networking with non trivial
configs in working order since systemd will do its interface
renaming magic as it used to do.

1)
Fixes: b7f23f62e40 (pci: acpi: add _DSM method to PCI devices)
Signed-off-by: Igor Mammedov 
---
shameless plug: one more reason to use new acpi-index for interface
naming, so naming won't depend on enumeration rules kernel or systemd
use (so far it's available only for 'pc' machine, but with Julia's
acpi pci hotplug reviewed, there is a hope for q35 also supporting it
since 6.1).
---
 hw/i386/acpi-build.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 796ffc6f5c..357437ff1d 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -435,11 +435,15 @@ static void build_append_pci_bus_devices(Aml 
*parent_scope, PCIBus *bus,
 aml_append(dev, aml_name_decl("_ADR", aml_int(slot << 16)));
 
 if (bsel) {
-aml_append(dev, aml_name_decl("_SUN", aml_int(slot)));
+/*
+ * Can't declare _SUN here for every device as it changes 'slot'
+ * enumeration order in linux kernel, so use another variable for 
it
+ */
+aml_append(dev, aml_name_decl("ASUN", aml_int(slot)));
 method = aml_method("_DSM", 4, AML_SERIALIZED);
 aml_append(method, aml_return(
 aml_call6("PDSM", aml_arg(0), aml_arg(1), aml_arg(2),
-  aml_arg(3), aml_name("BSEL"), aml_name("_SUN"))
+  aml_arg(3), aml_name("BSEL"), aml_name("ASUN"))
 ));
 aml_append(dev, method);
 }
@@ -466,6 +470,7 @@ static void build_append_pci_bus_devices(Aml *parent_scope, 
PCIBus *bus,
 aml_append(method, aml_return(aml_int(s3d)));
 aml_append(dev, method);
 } else if (hotplug_enabled_dev) {
+aml_append(dev, aml_name_decl("_SUN", aml_int(slot)));
 /* add _EJ0 to make slot hotpluggable  */
 method = aml_method("_EJ0", 1, AML_NOTSERIALIZED);
 aml_append(method,
-- 
2.27.0




Re: [PATCH v4 01/34] modules: add modinfo macros

2021-06-24 Thread Eduardo Habkost
On Thu, Jun 24, 2021 at 12:38:03PM +0200, Gerd Hoffmann wrote:
> Add macros for module info annotations.
> 
> Instead of having that module meta-data stored in lists in util/module.c
> place directly in the module source code.
> 
[...]
> +/* module implements QOM type  */
> +#define module_obj(name) modinfo(obj, name)

Can we make OBJECT_DEFINE_TYPE*() use this macro automatically?

-- 
Eduardo




[PATCH 2/5] hw/pci-host/bonito: Trace PCI config accesses smaller than 32-bit

2021-06-24 Thread Philippe Mathieu-Daudé
Per the datasheet section "5.7.5. Accessing PCI configuration space"
the address must be 32-bit aligned. Trace eventual accesses not
aligned to 32-bit.

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/pci-host/bonito.c | 8 
 hw/pci-host/trace-events | 3 +++
 2 files changed, 11 insertions(+)

diff --git a/hw/pci-host/bonito.c b/hw/pci-host/bonito.c
index afb3d1f81d5..751fdcec689 100644
--- a/hw/pci-host/bonito.c
+++ b/hw/pci-host/bonito.c
@@ -52,6 +52,7 @@
 #include "hw/misc/unimp.h"
 #include "hw/registerfields.h"
 #include "qom/object.h"
+#include "trace.h"
 
 /* #define DEBUG_BONITO */
 
@@ -185,6 +186,7 @@ FIELD(BONGENCFG, PCIQUEUE,  12, 1)
 #define BONITO_PCICONF_IDSEL_OFFSET11
 #define BONITO_PCICONF_FUN_MASK0x700/* [10:8] */
 #define BONITO_PCICONF_FUN_OFFSET  8
+#define BONITO_PCICONF_REG_MASK_DS (~3) /* Per datasheet */
 #define BONITO_PCICONF_REG_MASK0xFC
 #define BONITO_PCICONF_REG_OFFSET  0
 
@@ -495,6 +497,9 @@ static void bonito_spciconf_write(void *opaque, hwaddr 
addr, uint64_t val,
 if (pciaddr == 0x) {
 return;
 }
+if (addr & ~BONITO_PCICONF_REG_MASK_DS) {
+trace_bonito_spciconf_small_access(addr, size);
+}
 
 /* set the pci address in s->config_reg */
 phb->config_reg = (pciaddr) | (1u << 31);
@@ -521,6 +526,9 @@ static uint64_t bonito_spciconf_read(void *opaque, hwaddr 
addr, unsigned size)
 if (pciaddr == 0x) {
 return MAKE_64BIT_MASK(0, size * 8);
 }
+if (addr & ~BONITO_PCICONF_REG_MASK_DS) {
+trace_bonito_spciconf_small_access(addr, size);
+}
 
 /* set the pci address in s->config_reg */
 phb->config_reg = (pciaddr) | (1u << 31);
diff --git a/hw/pci-host/trace-events b/hw/pci-host/trace-events
index f4b3a50cb0b..630e9fcc5e7 100644
--- a/hw/pci-host/trace-events
+++ b/hw/pci-host/trace-events
@@ -1,5 +1,8 @@
 # See docs/devel/tracing.rst for syntax documentation.
 
+# bonito.c
+bonito_spciconf_small_access(uint64_t addr, unsigned size) "PCI config address 
is smaller then 32-bit, addr: 0x%"PRIx64", size: %u"
+
 # grackle.c
 grackle_set_irq(int irq_num, int level) "set_irq num %d level %d"
 
-- 
2.31.1




[PATCH 3/5] hw/pci-host/bonito: Allow PCI config accesses smaller than 32-bit

2021-06-24 Thread Philippe Mathieu-Daudé
When running the official PMON firmware for the Fuloong 2E, we see
8-bit and 16-bit accesses to PCI config space:

  $ qemu-system-mips64el -M fuloong2e -bios pmon_2e.bin \
-trace -trace bonito\* -trace pci_cfg\*

  pci_cfg_write vt82c686b-pm 05:4 @0x90 <- 0xeee1
  bonito_spciconf_small_access PCI config address is smaller then 32-bit, addr: 
0x4d2, size: 2
  pci_cfg_write vt82c686b-pm 05:4 @0xd2 <- 0x1
  pci_cfg_write vt82c686b-pm 05:4 @0x4 <- 0x1
  pci_cfg_write vt82c686b-isa 05:0 @0x4 <- 0x7
  bonito_spciconf_small_access PCI config address is smaller then 32-bit, addr: 
0x81, size: 1
  pci_cfg_read vt82c686b-isa 05:0 @0x81 -> 0x0
  bonito_spciconf_small_access PCI config address is smaller then 32-bit, addr: 
0x81, size: 1
  pci_cfg_write vt82c686b-isa 05:0 @0x81 <- 0x80
  bonito_spciconf_small_access PCI config address is smaller then 32-bit, addr: 
0x83, size: 1
  pci_cfg_write vt82c686b-isa 05:0 @0x83 <- 0x89
  bonito_spciconf_small_access PCI config address is smaller then 32-bit, addr: 
0x85, size: 1
  pci_cfg_write vt82c686b-isa 05:0 @0x85 <- 0x3
  bonito_spciconf_small_access PCI config address is smaller then 32-bit, addr: 
0x5a, size: 1
  pci_cfg_write vt82c686b-isa 05:0 @0x5a <- 0x7
  bonito_spciconf_small_access PCI config address is smaller then 32-bit, addr: 
0x85, size: 1
  pci_cfg_write vt82c686b-isa 05:0 @0x85 <- 0x1

Also this is what the Linux kernel does since it supports the Bonito
north bridge:
https://elixir.bootlin.com/linux/v2.6.15/source/arch/mips/pci/ops-bonito64.c#L85

So it seems safe to assume the datasheet is incomplete or outdated
regarding the address constraints.

This problem was exposed by commit 911629e6d3773a8adeab48b
("vt82c686: Fix SMBus IO base and configuration registers").

Reported-by: BALATON Zoltan 
Suggested-by: Jiaxun Yang 
Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/pci-host/bonito.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/pci-host/bonito.c b/hw/pci-host/bonito.c
index 751fdcec689..3c10608c9a2 100644
--- a/hw/pci-host/bonito.c
+++ b/hw/pci-host/bonito.c
@@ -187,7 +187,7 @@ FIELD(BONGENCFG, PCIQUEUE,  12, 1)
 #define BONITO_PCICONF_FUN_MASK0x700/* [10:8] */
 #define BONITO_PCICONF_FUN_OFFSET  8
 #define BONITO_PCICONF_REG_MASK_DS (~3) /* Per datasheet */
-#define BONITO_PCICONF_REG_MASK0xFC
+#define BONITO_PCICONF_REG_MASK_HW 0xff /* As seen on hardware */
 #define BONITO_PCICONF_REG_OFFSET  0
 
 
@@ -466,7 +466,7 @@ static uint32_t bonito_sbridge_pciaddr(void *opaque, hwaddr 
addr)
  BONITO_PCICONF_IDSEL_OFFSET;
 devno = ctz32(idsel);
 funno = (cfgaddr & BONITO_PCICONF_FUN_MASK) >> BONITO_PCICONF_FUN_OFFSET;
-regno = (cfgaddr & BONITO_PCICONF_REG_MASK) >> BONITO_PCICONF_REG_OFFSET;
+regno = (cfgaddr & BONITO_PCICONF_REG_MASK_HW) >> 
BONITO_PCICONF_REG_OFFSET;
 
 if (idsel == 0) {
 error_report("error in bonito pci config address 0x" TARGET_FMT_plx
-- 
2.31.1




[PATCH 1/5] hw/isa/vt82c686: Replace magic numbers by definitions

2021-06-24 Thread Philippe Mathieu-Daudé
Replace magic values of the Power Management / SMBus function (#4)
by definitions from the datasheet. The result is less compact, and
we can follow what the code does without having to recur to the
datasheet.

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/isa/vt82c686.c | 50 +++
 1 file changed, 33 insertions(+), 17 deletions(-)

diff --git a/hw/isa/vt82c686.c b/hw/isa/vt82c686.c
index f57f3e70679..4ddcf2d398c 100644
--- a/hw/isa/vt82c686.c
+++ b/hw/isa/vt82c686.c
@@ -14,6 +14,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "hw/registerfields.h"
 #include "hw/isa/vt82c686.h"
 #include "hw/pci/pci.h"
 #include "hw/qdev-properties.h"
@@ -38,6 +39,16 @@
 #define TYPE_VIA_PM "via-pm"
 OBJECT_DECLARE_SIMPLE_TYPE(ViaPMState, VIA_PM)
 
+REG8(PM_GEN_CFG0,   0x40)
+REG8(PM_GEN_CFG1,   0x41)
+FIELD(PM_GEN_CFG1, ACPI_IO_ENABLE,  7, 1)
+REG32(PM_IO_BASE,   0x48)
+FIELD(PM_IO_BASE, ADDR, 7, 9)
+REG32(SMBUS_IO_BASE,0x90)
+FIELD(SMBUS_IO_BASE, ADDR,  4, 12)
+REG8(SMBUS_HOST_CONFIG, 0xd2)
+FIELD(SMBUS_HOST_CONFIG, ENABLE,0, 1)
+
 struct ViaPMState {
 PCIDevice dev;
 MemoryRegion io;
@@ -48,21 +59,24 @@ struct ViaPMState {
 
 static void pm_io_space_update(ViaPMState *s)
 {
-uint32_t pmbase = pci_get_long(s->dev.config + 0x48) & 0xff80UL;
+uint32_t pmbase = pci_get_long(s->dev.config + A_PM_IO_BASE);
 
 memory_region_transaction_begin();
-memory_region_set_address(&s->io, pmbase);
-memory_region_set_enabled(&s->io, s->dev.config[0x41] & BIT(7));
+memory_region_set_address(&s->io, pmbase & R_PM_IO_BASE_ADDR_MASK);
+memory_region_set_enabled(&s->io, FIELD_EX32(s->dev.config[A_PM_GEN_CFG1],
+  PM_GEN_CFG1, ACPI_IO_ENABLE));
 memory_region_transaction_commit();
 }
 
 static void smb_io_space_update(ViaPMState *s)
 {
-uint32_t smbase = pci_get_long(s->dev.config + 0x90) & 0xfff0UL;
+uint32_t smbase = pci_get_long(s->dev.config + A_SMBUS_IO_BASE);
 
 memory_region_transaction_begin();
-memory_region_set_address(&s->smb.io, smbase);
-memory_region_set_enabled(&s->smb.io, s->dev.config[0xd2] & BIT(0));
+memory_region_set_address(&s->smb.io, smbase & R_SMBUS_IO_BASE_ADDR_MASK);
+memory_region_set_enabled(&s->smb.io,
+  FIELD_EX32(s->dev.config[A_SMBUS_HOST_CONFIG],
+ SMBUS_HOST_CONFIG, ENABLE));
 memory_region_transaction_commit();
 }
 
@@ -98,19 +112,21 @@ static void pm_write_config(PCIDevice *d, uint32_t addr, 
uint32_t val, int len)
 
 trace_via_pm_write(addr, val, len);
 pci_default_write_config(d, addr, val, len);
-if (ranges_overlap(addr, len, 0x48, 4)) {
-uint32_t v = pci_get_long(s->dev.config + 0x48);
-pci_set_long(s->dev.config + 0x48, (v & 0xff80UL) | 1);
+if (ranges_overlap(addr, len, A_PM_IO_BASE, 4)) {
+uint32_t v = pci_get_long(s->dev.config + A_PM_IO_BASE);
+pci_set_long(s->dev.config + A_PM_IO_BASE,
+ (v & R_PM_IO_BASE_ADDR_MASK) | 1);
 }
-if (range_covers_byte(addr, len, 0x41)) {
+if (range_covers_byte(addr, len, A_PM_GEN_CFG1)) {
 pm_io_space_update(s);
 }
-if (ranges_overlap(addr, len, 0x90, 4)) {
-uint32_t v = pci_get_long(s->dev.config + 0x90);
-pci_set_long(s->dev.config + 0x90, (v & 0xfff0UL) | 1);
+if (ranges_overlap(addr, len, A_SMBUS_IO_BASE, 4)) {
+uint32_t v = pci_get_long(s->dev.config + A_SMBUS_IO_BASE);
+pci_set_long(s->dev.config + A_SMBUS_IO_BASE,
+ (v & R_SMBUS_IO_BASE_ADDR_MASK) | 1);
 }
-if (range_covers_byte(addr, len, 0xd2)) {
-s->dev.config[0xd2] &= 0xf;
+if (range_covers_byte(addr, len, A_SMBUS_HOST_CONFIG)) {
+s->dev.config[A_SMBUS_HOST_CONFIG] &= 0xf;
 smb_io_space_update(s);
 }
 }
@@ -176,9 +192,9 @@ static void via_pm_reset(DeviceState *d)
 memset(s->dev.config + PCI_CONFIG_HEADER_SIZE, 0,
PCI_CONFIG_SPACE_SIZE - PCI_CONFIG_HEADER_SIZE);
 /* Power Management IO base */
-pci_set_long(s->dev.config + 0x48, 1);
+pci_set_long(s->dev.config + A_PM_IO_BASE, 1);
 /* SMBus IO base */
-pci_set_long(s->dev.config + 0x90, 1);
+pci_set_long(s->dev.config + A_SMBUS_IO_BASE, 1);
 
 acpi_pm1_evt_reset(&s->ar);
 acpi_pm1_cnt_reset(&s->ar);
-- 
2.31.1




[PATCH 4/5] tests/acceptance: Test Linux on the Fuloong 2E machine

2021-06-24 Thread Philippe Mathieu-Daudé
Test the kernel from Lemote rescue image:
http://dev.lemote.com/files/resource/download/rescue/rescue-yl
Once downloaded, set the RESCUE_YL_PATH environment variable
to point to the downloaded image and test as:

  $ RESCUE_YL_PATH=~/images/fuloong2e/rescue-yl \
AVOCADO_ALLOW_UNTRUSTED_CODE=1 \
avocado --show=app,console run tests/acceptance/machine_mips_fuloong2e.py
  Fetching asset from 
tests/acceptance/machine_mips_fuloong2e.py:MipsFuloong2e.test_linux_kernel_isa_serial
   (1/1) 
tests/acceptance/machine_mips_fuloong2e.py:MipsFuloong2e.test_linux_kernel_isa_serial:
  console: Linux version 2.6.27.7lemote (root@debian) (gcc version 4.1.3 
20080623 (prerelease) (Debian 4.1.2-23)) #6 Fri Dec 12 00:11:25 CST 2008
  console: busclock=3300, cpuclock=-2145008360,memsize=256,highmemsize=0
  console: console [early0] enabled
  console: CPU revision is: 6302 (ICT Loongson-2)
  PASS (0.16 s)
  JOB TIME   : 0.51 s

Signed-off-by: Philippe Mathieu-Daudé 
---
 MAINTAINERS|  1 +
 tests/acceptance/machine_mips_fuloong2e.py | 42 ++
 2 files changed, 43 insertions(+)
 create mode 100644 tests/acceptance/machine_mips_fuloong2e.py

diff --git a/MAINTAINERS b/MAINTAINERS
index 1a041eaf864..1c515b4ba14 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1176,6 +1176,7 @@ F: hw/isa/vt82c686.c
 F: hw/pci-host/bonito.c
 F: hw/usb/vt82c686-uhci-pci.c
 F: include/hw/isa/vt82c686.h
+F: tests/acceptance/machine_mips_fuloong2e.py
 
 Loongson-3 virtual platforms
 M: Huacai Chen 
diff --git a/tests/acceptance/machine_mips_fuloong2e.py 
b/tests/acceptance/machine_mips_fuloong2e.py
new file mode 100644
index 000..0ac285e2af1
--- /dev/null
+++ b/tests/acceptance/machine_mips_fuloong2e.py
@@ -0,0 +1,42 @@
+# Functional tests for the Lemote Fuloong-2E machine.
+#
+# Copyright (c) 2019 Philippe Mathieu-Daudé 
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+import os
+
+from avocado import skipUnless
+from avocado_qemu import Test
+from avocado_qemu import wait_for_console_pattern
+
+class MipsFuloong2e(Test):
+
+timeout = 60
+
+@skipUnless(os.getenv('AVOCADO_ALLOW_UNTRUSTED_CODE'), 'untrusted code')
+@skipUnless(os.getenv('RESCUE_YL_PATH'), 'RESCUE_YL_PATH not available')
+def test_linux_kernel_isa_serial(self):
+"""
+:avocado: tags=arch:mips64el
+:avocado: tags=machine:fuloong2e
+:avocado: tags=endian:little
+:avocado: tags=device:bonito64
+:avocado: tags=device:via686b
+"""
+# Recovery system for the Yeeloong laptop
+# (enough to test the fuloong2e southbridge, accessing its ISA bus)
+# http://dev.lemote.com/files/resource/download/rescue/rescue-yl
+kernel_hash = 'ec4d1bd89a8439c41033ca63db60160cc6d6f09a'
+kernel_path = self.fetch_asset('file://' + os.getenv('RESCUE_YL_PATH'),
+   asset_hash=kernel_hash)
+
+self.vm.set_console()
+self.vm.add_args('-kernel', kernel_path)
+self.vm.launch()
+wait_for_console_pattern(self, 'Linux version 2.6.27.7lemote')
+cpu_revision = 'CPU revision is: 6302 (ICT Loongson-2)'
+wait_for_console_pattern(self, cpu_revision)
-- 
2.31.1




[PATCH 5/5] tests/acceptance: Test PMON on the Fuloong 2E machine

2021-06-24 Thread Philippe Mathieu-Daudé
Test the PMON firmware. As the firmware is not redistributable,
it has to be downloaded manually first. Then it can be used by
providing its path via the PMON_BIN_PATH environment variable:

  $ PMON2E_BIN_PATH=~/images/fuloong2e/pmon_2e.bin \
AVOCADO_ALLOW_UNTRUSTED_CODE=1 \
avocado --show=app,console run tests/acceptance/machine_mips_fuloong2e.py
  Fetching asset from 
tests/acceptance/machine_mips_fuloong2e.py:MipsFuloong2e.test_linux_kernel_isa_serial
   (1/3) 
tests/acceptance/machine_mips_fuloong2e.py:MipsFuloong2e.test_pmon_serial_console:
  console: PMON2000 MIPS Initializing. Standby...
  console: ERRORPC= CONFIG=00030932
  console: PRID=6302
  console: Init SDRAM Done!
  console: Sizing caches...
  console: Init caches...
  console: godson2 caches found
  console: Init caches done, cfg = 00030932
  console: Copy PMON to execute location...
  console: copy text section done.
  console: Copy PMON to execute location done.
  Uncompressing BiosOK,Booting Bios
  PASS (0.25 s)
   (2/3) 
tests/acceptance/machine_mips_fuloong2e.py:MipsFuloong2e.test_pmon_framebuffer_console:
  [...]
  Uncompressing BiosOK,Booting Bios
  console: FREQ
  console: FREI
  console: DONE
  console: TTYI
  console: TTYD
  console: ENVI
  console: MAPV
  console: Mfg  0, Id 60
  console: STDV
  console: SBDD
  console: PPCIH
  console: PCIS
  console: PCIR
  console: PCIW
  console: NETI
  console: RTCL
  console: PCID
  console: VGAI
  console: Default MODE_ID 2
  console: starting radeon init...
  console: radeon init done
  console: FRBI
  console: cfb_console init,fb=b400
  console: Video: Drawing the logo ...
  console: CONSOLE_SIZE 450560HSTI
  PASS (4.10 s)
   (3/3) 
tests/acceptance/machine_mips_fuloong2e.py:MipsFuloong2e.test_linux_kernel_isa_serial:
  console: Linux version 2.6.27.7lemote (root@debian) (gcc version 4.1.3 
20080623 (prerelease) (Debian 4.1.2-23)) #6 Fri Dec 12 00:11:25 CST 2008
  console: busclock=3300, cpuclock=-2145008360,memsize=256,highmemsize=0
  console: console [early0] enabled
  console: CPU revision is: 6302 (ICT Loongson-2)
  PASS (0.19 s)
  RESULTS: PASS 3 | ERROR 0 | FAIL 0 | SKIP 0 | WARN 0 | INTERRUPT 0 | 
CANCEL 0
  JOB TIME   : 5.10 s

Signed-off-by: Philippe Mathieu-Daudé 
---
 tests/acceptance/machine_mips_fuloong2e.py | 62 ++
 1 file changed, 62 insertions(+)

diff --git a/tests/acceptance/machine_mips_fuloong2e.py 
b/tests/acceptance/machine_mips_fuloong2e.py
index 0ac285e2af1..4854ba98560 100644
--- a/tests/acceptance/machine_mips_fuloong2e.py
+++ b/tests/acceptance/machine_mips_fuloong2e.py
@@ -8,15 +8,77 @@
 # SPDX-License-Identifier: GPL-2.0-or-later
 
 import os
+import time
 
 from avocado import skipUnless
 from avocado_qemu import Test
 from avocado_qemu import wait_for_console_pattern
 
+from tesseract_utils import tesseract_available, tesseract_ocr
+
 class MipsFuloong2e(Test):
 
 timeout = 60
 
+@skipUnless(os.getenv('AVOCADO_ALLOW_UNTRUSTED_CODE'), 'untrusted code')
+@skipUnless(os.getenv('PMON2E_BIN_PATH'), 'PMON2E_BIN_PATH not available')
+def test_pmon_serial_console(self):
+"""
+:avocado: tags=arch:mips64el
+:avocado: tags=machine:fuloong2e
+:avocado: tags=endian:little
+:avocado: tags=device:bonito64
+:avocado: tags=device:via686b
+"""
+pmon_hash = 'c812e1695d7b2320036f3ef494976969' # v1.1.2
+pmon_path = self.fetch_asset('file://' + os.getenv('PMON2E_BIN_PATH'),
+ asset_hash=pmon_hash, algorithm='md5')
+
+self.vm.set_console()
+self.vm.add_args('-bios', pmon_path)
+self.vm.launch()
+wait_for_console_pattern(self, 'PMON2000 MIPS Initializing. 
Standby...')
+wait_for_console_pattern(self, 'Booting Bios')
+
+@skipUnless(os.getenv('AVOCADO_ALLOW_UNTRUSTED_CODE'), 'untrusted code')
+# Tesseract 4 adds a new OCR engine based on LSTM neural networks. The
+# new version is faster and more accurate than version 3. The drawback is
+# that it is still alpha-level software.
+@skipUnless(tesseract_available(4), 'tesseract v4 OCR tool not available')
+@skipUnless(os.getenv('PMON2E_BIN_PATH'), 'PMON2E_BIN_PATH not available')
+def test_pmon_framebuffer_console(self):
+"""
+:avocado: tags=arch:mips64el
+:avocado: tags=machine:fuloong2e
+:avocado: tags=endian:little
+:avocado: tags=device:bonito64
+:avocado: tags=device:ati-vga
+"""
+screenshot_path = os.path.join(self.workdir, 'dump.ppm')
+
+pmon_hash = 'c812e1695d7b2320036f3ef494976969' # v1.1.2
+pmon_path = self.fetch_asset('file://' + os.getenv('PMON2E_BIN_PATH'),
+ asset_hash=pmon_hash, algorithm='md5')
+
+self.vm.set_console()
+self.vm.add_args('-bios', pmon_path,
+ '-vga', 'std',
+  

[PATCH 0/5] hw/mips: Fix the Fuloong 2E machine with PMON bios

2021-06-24 Thread Philippe Mathieu-Daudé
Commit 911629e6d37 ("vt82c686: Fix SMBus IO base and configuration
registers") exposed a "bug" in the Bonito north bridge. Fix it
and add tests.

Thanks to Zoltan for support while debugging :)

Philippe Mathieu-Daudé (5):
  hw/isa/vt82c686: Replace magic numbers by definitions
  hw/pci-host/bonito: Trace PCI config accesses smaller than 32-bit
  hw/pci-host/bonito: Allow PCI config accesses smaller than 32-bit
  tests/acceptance: Test Linux on the Fuloong 2E machine
  tests/acceptance: Test PMON on the Fuloong 2E machine

 hw/isa/vt82c686.c  |  50 ++
 hw/pci-host/bonito.c   |  12 ++-
 MAINTAINERS|   1 +
 hw/pci-host/trace-events   |   3 +
 tests/acceptance/machine_mips_fuloong2e.py | 104 +
 5 files changed, 151 insertions(+), 19 deletions(-)
 create mode 100644 tests/acceptance/machine_mips_fuloong2e.py

-- 
2.31.1




[PATCH v3 1/3] target/ppc: fix address translation bug for radix mmus

2021-06-24 Thread Bruno Larsen (billionai)
This commit attempts to fix the first bug mentioned by Richard Henderson in
https://lists.nongnu.org/archive/html/qemu-devel/2021-05/msg06247.html

To sumarize the bug here, when radix-style mmus are translating an
address, they might need to call a second level of translation, with
hypervisor privileges. However, the way it was being done up until
this point meant that the second level translation had the same
privileges as the first level. This would only happen when a TCG guest
was emulating KVM, which is why it hasn't been discovered yet.

This patch attempts to correct that by making radix64_*_xlate functions
receive the mmu_idx, and passing one with the correct permission for the
second level translation.

The mmuidx macros added by this patch are only correct for non-bookE
mmus, because BookE style set the IS and DS bits inverted and there
might be other subtle differences. However, there doesn't seem to be
BookE cpus that have radix-style mmus, so we left a comment there to
document the issue, in case a machine does have that and was missed.

As part of this cleanup, we now need to send the correct mmmu_idx
when calling get_phys_page_debug, otherwise we might not be able to see the
memory that the CPU could

Suggested-by: Richard Henderson 
Signed-off-by: Bruno Larsen (billionai) 
Reviewed-by: Richard Henderson 
---
 target/ppc/mmu-book3s-v3.h | 13 +
 target/ppc/mmu-radix64.c   | 37 +
 target/ppc/mmu-radix64.h   |  2 +-
 target/ppc/mmu_helper.c|  8 +---
 4 files changed, 40 insertions(+), 20 deletions(-)

diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h
index a1326df969..c89d0bccfd 100644
--- a/target/ppc/mmu-book3s-v3.h
+++ b/target/ppc/mmu-book3s-v3.h
@@ -47,6 +47,19 @@ struct prtb_entry {
 uint64_t prtbe0, prtbe1;
 };
 
+/*
+ * These correspond to the mmu_idx values computed in
+ * hreg_compute_hflags_value. See the tables therein
+ *
+ * They are here because some bits are inverted for BookE MMUs
+ * not necessarily because they only work for BookS. However,
+ * we only needed to change BookS MMUs, we left the functions
+ * here to avoid other possible bugs for untested MMUs
+ */
+static inline bool mmuidx_pr(int idx) { return !(idx & 1); }
+static inline bool mmuidx_real(int idx) { return idx & 2; }
+static inline bool mmuidx_hv(int idx) { return idx & 4; }
+
 #ifdef TARGET_PPC64
 
 static inline bool ppc64_use_proc_tbl(PowerPCCPU *cpu)
diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index cbd404bfa4..5b0e62e676 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -155,7 +155,7 @@ static void ppc_radix64_raise_hsi(PowerPCCPU *cpu, 
MMUAccessType access_type,
 
 static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType access_type,
uint64_t pte, int *fault_cause, int *prot,
-   bool partition_scoped)
+   int mmu_idx, bool partition_scoped)
 {
 CPUPPCState *env = &cpu->env;
 int need_prot;
@@ -173,7 +173,8 @@ static bool ppc_radix64_check_prot(PowerPCCPU *cpu, 
MMUAccessType access_type,
 /* Determine permissions allowed by Encoded Access Authority */
 if (!partition_scoped && (pte & R_PTE_EAA_PRIV) && msr_pr) {
 *prot = 0;
-} else if (msr_pr || (pte & R_PTE_EAA_PRIV) || partition_scoped) {
+} else if (mmuidx_pr(mmu_idx) || (pte & R_PTE_EAA_PRIV) ||
+   partition_scoped) {
 *prot = ppc_radix64_get_prot_eaa(pte);
 } else { /* !msr_pr && !(pte & R_PTE_EAA_PRIV) && !partition_scoped */
 *prot = ppc_radix64_get_prot_eaa(pte);
@@ -299,7 +300,7 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU 
*cpu,
   ppc_v3_pate_t pate,
   hwaddr *h_raddr, int *h_prot,
   int *h_page_size, bool pde_addr,
-  bool guest_visible)
+  int mmu_idx, bool guest_visible)
 {
 int fault_cause = 0;
 hwaddr pte_addr;
@@ -310,7 +311,8 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU 
*cpu,
 if (ppc_radix64_walk_tree(CPU(cpu)->as, g_raddr, pate.dw0 & PRTBE_R_RPDB,
   pate.dw0 & PRTBE_R_RPDS, h_raddr, h_page_size,
   &pte, &fault_cause, &pte_addr) ||
-ppc_radix64_check_prot(cpu, access_type, pte, &fault_cause, h_prot, 
true)) {
+ppc_radix64_check_prot(cpu, access_type, pte,
+   &fault_cause, h_prot, mmu_idx, true)) {
 if (pde_addr) { /* address being translated was that of a guest pde */
 fault_cause |= DSISR_PRTABLE_FAULT;
 }
@@ -332,7 +334,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu,
 vaddr eaddr, uint64_t pi

[PATCH v3 3/3] target/ppc: changed ppc_hash64_xlate to use mmu_idx

2021-06-24 Thread Bruno Larsen (billionai)
Changed hash64 address translation to use the supplied mmu_idx instead
of using the one stored in the msr, for parity purposes (other book3s
MMUs already use it).

Signed-off-by: Bruno Larsen (billionai) 
---
 target/ppc/mmu-hash64.c | 43 -
 target/ppc/mmu-hash64.h |  2 +-
 target/ppc/mmu_helper.c |  2 +-
 3 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
index c1b98a97e9..191da21a5d 100644
--- a/target/ppc/mmu-hash64.c
+++ b/target/ppc/mmu-hash64.c
@@ -366,10 +366,9 @@ static inline int ppc_hash64_pte_noexec_guard(PowerPCCPU 
*cpu,
 }
 
 /* Check Basic Storage Protection */
-static int ppc_hash64_pte_prot(PowerPCCPU *cpu,
+static int ppc_hash64_pte_prot(PowerPCCPU *cpu, int mmu_idx,
ppc_slb_t *slb, ppc_hash_pte64_t pte)
 {
-CPUPPCState *env = &cpu->env;
 unsigned pp, key;
 /*
  * Some pp bit combinations have undefined behaviour, so default
@@ -377,7 +376,7 @@ static int ppc_hash64_pte_prot(PowerPCCPU *cpu,
  */
 int prot = 0;
 
-key = !!(msr_pr ? (slb->vsid & SLB_VSID_KP)
+key = !!(mmuidx_pr(mmu_idx) ? (slb->vsid & SLB_VSID_KP)
  : (slb->vsid & SLB_VSID_KS));
 pp = (pte.pte1 & HPTE64_R_PP) | ((pte.pte1 & HPTE64_R_PP0) >> 61);
 
@@ -744,17 +743,17 @@ static bool ppc_hash64_use_vrma(CPUPPCState *env)
 }
 }
 
-static void ppc_hash64_set_isi(CPUState *cs, uint64_t error_code)
+static void ppc_hash64_set_isi(CPUState *cs, int mmu_idx, uint64_t error_code)
 {
 CPUPPCState *env = &POWERPC_CPU(cs)->env;
 bool vpm;
 
-if (msr_ir) {
+if (!mmuidx_real(mmu_idx)) {
 vpm = !!(env->spr[SPR_LPCR] & LPCR_VPM1);
 } else {
 vpm = ppc_hash64_use_vrma(env);
 }
-if (vpm && !msr_hv) {
+if (vpm && !mmuidx_hv(mmu_idx)) {
 cs->exception_index = POWERPC_EXCP_HISI;
 } else {
 cs->exception_index = POWERPC_EXCP_ISI;
@@ -762,17 +761,17 @@ static void ppc_hash64_set_isi(CPUState *cs, uint64_t 
error_code)
 env->error_code = error_code;
 }
 
-static void ppc_hash64_set_dsi(CPUState *cs, uint64_t dar, uint64_t dsisr)
+static void ppc_hash64_set_dsi(CPUState *cs, int mmu_idx, uint64_t dar, 
uint64_t dsisr)
 {
 CPUPPCState *env = &POWERPC_CPU(cs)->env;
 bool vpm;
 
-if (msr_dr) {
+if (!mmuidx_real(mmu_idx)) {
 vpm = !!(env->spr[SPR_LPCR] & LPCR_VPM1);
 } else {
 vpm = ppc_hash64_use_vrma(env);
 }
-if (vpm && !msr_hv) {
+if (vpm && !mmuidx_hv(mmu_idx)) {
 cs->exception_index = POWERPC_EXCP_HDSI;
 env->spr[SPR_HDAR] = dar;
 env->spr[SPR_HDSISR] = dsisr;
@@ -874,7 +873,7 @@ static int build_vrma_slbe(PowerPCCPU *cpu, ppc_slb_t *slb)
 }
 
 bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
-  hwaddr *raddrp, int *psizep, int *protp,
+  hwaddr *raddrp, int *psizep, int *protp, int mmu_idx,
   bool guest_visible)
 {
 CPUState *cs = CPU(cpu);
@@ -897,7 +896,7 @@ bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, 
MMUAccessType access_type,
  */
 
 /* 1. Handle real mode accesses */
-if (access_type == MMU_INST_FETCH ? !msr_ir : !msr_dr) {
+if (mmuidx_real(mmu_idx)) {
 /*
  * Translation is supposedly "off", but in real mode the top 4
  * effective address bits are (mostly) ignored
@@ -909,7 +908,7 @@ bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, 
MMUAccessType access_type,
  * In virtual hypervisor mode, there's nothing to do:
  *   EA == GPA == qemu guest address
  */
-} else if (msr_hv || !env->has_hv_mode) {
+} else if (mmuidx_hv(mmu_idx) || !env->has_hv_mode) {
 /* In HV mode, add HRMOR if top EA bit is clear */
 if (!(eaddr >> 63)) {
 raddr |= env->spr[SPR_HRMOR];
@@ -937,13 +936,13 @@ bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, 
MMUAccessType access_type,
 }
 switch (access_type) {
 case MMU_INST_FETCH:
-ppc_hash64_set_isi(cs, SRR1_PROTFAULT);
+ppc_hash64_set_isi(cs, mmu_idx, SRR1_PROTFAULT);
 break;
 case MMU_DATA_LOAD:
-ppc_hash64_set_dsi(cs, eaddr, DSISR_PROTFAULT);
+ppc_hash64_set_dsi(cs, mmu_idx, eaddr, DSISR_PROTFAULT);
 break;
 case MMU_DATA_STORE:
-ppc_hash64_set_dsi(cs, eaddr,
+ppc_hash64_set_dsi(cs, mmu_idx, eaddr,
DSISR_PROTFAULT | DSISR_ISSTORE);
 break;
 default:
@@ -996,7 +995,7 @@ bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, 
MMUAccessType access_type,
 /* 3. Check for segment level no-execute violation */
 if (access_type == MMU_INST_FETC

[PATCH v3 2/3] target/ppc: change ppc_hash32_xlate to use mmu_idx

2021-06-24 Thread Bruno Larsen (billionai)
Changed hash32 address translation to use the supplied mmu_idx, instead
of using what was stored in the msr, for parity purposes (radix64
already uses that).

Signed-off-by: Bruno Larsen (billionai) 
---
 target/ppc/mmu-hash32.c | 18 +-
 target/ppc/mmu-hash32.h |  2 +-
 target/ppc/mmu_helper.c |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
index 6a07c345e4..9a477a181f 100644
--- a/target/ppc/mmu-hash32.c
+++ b/target/ppc/mmu-hash32.c
@@ -25,6 +25,7 @@
 #include "kvm_ppc.h"
 #include "internal.h"
 #include "mmu-hash32.h"
+#include "mmu-book3s-v3.h"
 #include "exec/log.h"
 
 /* #define DEBUG_BAT */
@@ -86,13 +87,12 @@ static int ppc_hash32_pp_prot(int key, int pp, int nx)
 return prot;
 }
 
-static int ppc_hash32_pte_prot(PowerPCCPU *cpu,
+static int ppc_hash32_pte_prot(PowerPCCPU *cpu, int mmu_idx,
target_ulong sr, ppc_hash_pte32_t pte)
 {
-CPUPPCState *env = &cpu->env;
 unsigned pp, key;
 
-key = !!(msr_pr ? (sr & SR32_KP) : (sr & SR32_KS));
+key = !!(mmuidx_pr(mmu_idx) ? (sr & SR32_KP) : (sr & SR32_KS));
 pp = pte.pte1 & HPTE32_R_PP;
 
 return ppc_hash32_pp_prot(key, pp, !!(sr & SR32_NX));
@@ -221,12 +221,12 @@ static hwaddr ppc_hash32_bat_lookup(PowerPCCPU *cpu, 
target_ulong ea,
 static bool ppc_hash32_direct_store(PowerPCCPU *cpu, target_ulong sr,
 target_ulong eaddr,
 MMUAccessType access_type,
-hwaddr *raddr, int *prot,
+hwaddr *raddr, int *prot, int mmu_idx,
 bool guest_visible)
 {
 CPUState *cs = CPU(cpu);
 CPUPPCState *env = &cpu->env;
-int key = !!(msr_pr ? (sr & SR32_KP) : (sr & SR32_KS));
+int key = !!(mmuidx_pr(mmu_idx) ? (sr & SR32_KP) : (sr & SR32_KS));
 
 qemu_log_mask(CPU_LOG_MMU, "direct store...\n");
 
@@ -425,7 +425,7 @@ static hwaddr ppc_hash32_pte_raddr(target_ulong sr, 
ppc_hash_pte32_t pte,
 }
 
 bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
-  hwaddr *raddrp, int *psizep, int *protp,
+  hwaddr *raddrp, int *psizep, int *protp, int mmu_idx,
   bool guest_visible)
 {
 CPUState *cs = CPU(cpu);
@@ -441,7 +441,7 @@ bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, 
MMUAccessType access_type,
 *psizep = TARGET_PAGE_BITS;
 
 /* 1. Handle real mode accesses */
-if (access_type == MMU_INST_FETCH ? !msr_ir : !msr_dr) {
+if (mmuidx_real(mmu_idx)) {
 /* Translation is off */
 *raddrp = eaddr;
 *protp = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
@@ -483,7 +483,7 @@ bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, 
MMUAccessType access_type,
 /* 4. Handle direct store segments */
 if (sr & SR32_T) {
 return ppc_hash32_direct_store(cpu, sr, eaddr, access_type,
-   raddrp, protp, guest_visible);
+   raddrp, protp, mmu_idx, guest_visible);
 }
 
 /* 5. Check for segment level no-execute violation */
@@ -520,7 +520,7 @@ bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, 
MMUAccessType access_type,
 
 /* 7. Check access permissions */
 
-prot = ppc_hash32_pte_prot(cpu, sr, pte);
+prot = ppc_hash32_pte_prot(cpu, mmu_idx, sr, pte);
 
 if (need_prot & ~prot) {
 /* Access right violation */
diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h
index 8694eccabd..807d9bc6e8 100644
--- a/target/ppc/mmu-hash32.h
+++ b/target/ppc/mmu-hash32.h
@@ -5,7 +5,7 @@
 
 hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash);
 bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
-  hwaddr *raddrp, int *psizep, int *protp,
+  hwaddr *raddrp, int *psizep, int *protp, int mmu_idx,
   bool guest_visible);
 
 /*
diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c
index 9dcdf88597..a3381e1aa0 100644
--- a/target/ppc/mmu_helper.c
+++ b/target/ppc/mmu_helper.c
@@ -2922,7 +2922,7 @@ static bool ppc_xlate(PowerPCCPU *cpu, vaddr eaddr, 
MMUAccessType access_type,
 case POWERPC_MMU_32B:
 case POWERPC_MMU_601:
 return ppc_hash32_xlate(cpu, eaddr, access_type,
-raddrp, psizep, protp, guest_visible);
+raddrp, psizep, protp, mmu_idx, guest_visible);
 
 default:
 return ppc_jumbo_xlate(cpu, eaddr, access_type, raddrp,
-- 
2.17.1




[PATCH v3 0/3] Clean up MMU translation

2021-06-24 Thread Bruno Larsen (billionai)
This is the final part of the MMU fixes that were needed to support
disable-tcg, focusing on a possible bug on the second level address
translation of radix64 MMUs, and some changes to hash32 and hash64 to
work the same as rtadix64.

Changes for v3:
 * removed patches that were already applied
 * fixed comments on last patch
 * added 2 new patches

Changes for v2:
 * rebase on ppc-for-6.1
 * added the bugfix

Bruno Larsen (billionai) (3):
  target/ppc: fix address translation bug for radix mmus
  target/ppc: change ppc_hash32_xlate to use mmu_idx
  target/ppc: changed ppc_hash64_xlate to use mmu_idx

 target/ppc/mmu-book3s-v3.h | 13 
 target/ppc/mmu-hash32.c| 18 
 target/ppc/mmu-hash32.h|  2 +-
 target/ppc/mmu-hash64.c| 43 +++---
 target/ppc/mmu-hash64.h|  2 +-
 target/ppc/mmu-radix64.c   | 37 ++--
 target/ppc/mmu-radix64.h   |  2 +-
 target/ppc/mmu_helper.c| 12 ++-
 8 files changed, 74 insertions(+), 55 deletions(-)

-- 
2.17.1




Re: [PATCH v2 2/2] docs/devel: tvg-plugins: add execlog plugin description

2021-06-24 Thread Alex Bennée


Alexandre IOOSS  writes:

> [[PGP Signed Part:Undecided]]
> On 6/22/21 12:37 PM, Alex Bennée wrote:
>> Alexandre IOOSS  writes:
>> 
>>> [[PGP Signed Part:Undecided]]
>>> On 6/22/21 10:48 AM, Alex Bennée wrote:
 Alexandre Iooss  writes:
> [...]
> +
> +The execlog tool traces executed instructions with memory access. It can 
> be used
> +for debugging and security analysis purposes.
 We should probably mention that this will generate a lot of output.
 Running the admittedly memory heavy softmmu memory test:
 ./aarch64-softmmu/qemu-system-aarch64 -D test.out -d plugin \
   -plugin contrib/plugins/libexeclog.so  \
   -cpu max -serial mon:stdio -M virt \
   -display none -semihosting-config chardev=serial0 \
   -kernel ./tests/tcg/aarch64-softmmu/memory
 generates a 8.6Gb text file. I suspect once this is merged you might
 want to look at options to target the instrumentation at areas of
 specific interest or abbreviate information.
>>>
>>> Yes! In my downstream version I am triggering the beginning and the
>>> end of trace acquisition by matching two virtual addresses of GPIO
>>> device access. This works in my case because I'm also using the same
>>> GPIO for triggering an oscilloscope, but maybe we would like to
>>> upstream something more generic.
>>>
>>> I'm still thinking about this (maybe for a later patch) but I believe
>>> it would be nice to have the following:
>>>   - If no argument is given to the plugin, log everything.
>>>   - Allow the user to specify either a memory address, an instruction
>>> virtual address or an opcode that would start the acquisition.
>>>   - Same to stop the acquisition.
>> Sounds reasonable to me.
>> 
>>> This would look like this to start/stop acquisition using GPIO PA8 on
>>> STM32VLDISCOVERY:
>>>
>>>./arm-softmmu/qemu-system-arm -M stm32vldiscovery \
>>>  -kernel ./firmware.elf -d plugin \
>>>  -plugin libexeclog.so,arg=mem:1073809424,arg=mem:1073809424
>> I quite like the formats you can use for -dfilter, for example:
>>0x1000+0x100,0x2100-0x100,0x3000..0x3100
>> it might even be worth exposing qemu_set_dfilter_ranges as a helper
>> function to plugins to avoid copy and paste.
>
> We could expose "-dfilter", but maybe it is better to reserve it to
> filter the output of the plugin rather than triggering the tracing?

I meant the parsing code for dfilter style expressions, the dfilter
itself ;-)

> I could implement a format similar to dfilter to configure triggering.
> This would enable someone to start logging on any access to a memory
> range.
>
>> So what would your above command trigger? A write to 1073809424
>> would
>> start the trace and the next write to the same address would stop it?
>> 
>
> Yes exactly. In this case the first access set the GPIO high, and the
> second access set it low.
>
> I don't believe the plugin can access the value stored in memory (i.e.
> differentiating between setting a GPIO output high or low). I don't
> find this problematic in my case, but maybe it could be for someone
> else.

Not currently but in principle it wouldn't be too hard to do. It would
just be extra data to copy into a TCG Arg. We would probably want to
make it optional though.

>
> From the discussion I see the following possible patches:
> 1. Add an argument to trigger the beginning with one address (memory
> or instruction).
> 2. Add an argument to trigger the end with one address (memory or
> instruction).
> 3. Add the support for ranges (in "dfilter" style).
> 4. (maybe) Add the support to trigger on an opcode.
> 5. Add support for "-dfilter" to filter the logging output.
>
> Thanks,
> -- Alexandre
>
> [[End of PGP Signed Part]]


-- 
Alex Bennée



Re: Difficulties to access guest memory in TCG plugins

2021-06-24 Thread Alex Bennée


Philippe Mathieu-Daudé  writes:

> Cc'ing the maintainer: Alex.
>
> On 6/23/21 5:08 PM, Kevin Mambu wrote:
>> Hi, everyone,
>> I need to put in place a plugin which is able to modify memory for a
>> specific project, and until now I managed to do so using
>> cpu_memory_rw_debug().

Out of interest what is your use-case for this? The project has very
deliberately avoided adding such an API for writing memory for plugins
to avoid people attempting to bypass the GPL by doing things in plugins.

>> However, the function happens to fail at times, and when looking at the
>> source code of cpu_memory_rw_debug() on elixir, the issue seems to be
>> either the memory being locked by another part of QEMU code or some kind
>> of page fault.
>> My prior researches tend to put the blame on the RCU lock mechanism, and
>> I was wondering if there was another, fail-safe, function I could use to
>> access guest memory from my plug-in ?

I'm not surprised. If you look at tlb_plugin_lookup you'll see the
existing introspection code has to jump through some hoops to avoid race
conditions that can cause updates to the internal TLB status. If we
wanted to support reading values from memory that's probably doable -
but for writing it would be hard to do in an idempotent way.

>> 
>> Thank you in advance,
>> 
>> *Kévin Mambu*
>> Sent with ProtonMail  Secure Email.
>> 


-- 
Alex Bennée



Re: Extracting PC information from QEMU/KVM during single-step

2021-06-24 Thread Steven Raasch
Peter -

Thanks for the clarification, that makes perfect sense.

In this case, using GDB is clearly the best way to go about the tracing.

I appreciate your time!

-S

On Thu, Jun 24, 2021 at 1:51 PM Peter Maydell 
wrote:

> On Thu, 24 Jun 2021 at 18:09, Steven Raasch  wrote:
> > NOTE: I do not yet understand how gdb interacts with the virtual
> machine. I have experience with GDB, but only at a linux app-debug level. I
> don't grok how gdb on a linux host works with QEMU running a windows guest.
> > My *assumption* is that the VM continues to run while an app is being
> debugged with GDB can be stopped, stepped, etc. If this is the case, I
> would expect that the VM's sense of time will continue to move forward
> while the app is paused. This would be an issue for my time-sensitive app.
>
> No, that's not how it works. A gdb connected to QEMU's gdbstub is a
> bit like a hardware JTAG debugger connected to a real CPU, if that
> helps. When gdb gets control the entire VM is stopped; stepping
> steps one instruction of whatever the VM is doing. gdb and the
> gdbstub have no understanding of individual processes running
> inside the guest OS -- single stepping will happily step through
> the app, into interrupt handlers, across the OS context switching
> and into other processes, etc.
>
> -- PMM
>


Re: [PULL 30/43] vt82c686: Fix SMBus IO base and configuration registers

2021-06-24 Thread Philippe Mathieu-Daudé
On 6/24/21 8:29 PM, BALATON Zoltan wrote:
> On Thu, 24 Jun 2021, BALATON Zoltan wrote:
>> On Thu, 24 Jun 2021, Philippe Mathieu-Daudé wrote:
>>> On 6/24/21 7:00 PM, BALATON Zoltan wrote:
 On Thu, 24 Jun 2021, Philippe Mathieu-Daudé wrote:
> On 6/24/21 6:16 PM, Philippe Mathieu-Daudé wrote:
>> On 6/24/21 6:01 PM, Philippe Mathieu-Daudé wrote:
>>> On 6/24/21 5:46 PM, Philippe Mathieu-Daudé wrote:
 Hi Zoltan,

 On 2/21/21 3:34 PM, Philippe Mathieu-Daudé wrote:
> From: BALATON Zoltan 
>
> The base address of the SMBus io ports and its enabled status
> is set
> by registers in the PCI config space but this was not correctly
> emulated. Instead the SMBus registers were mapped on realize to
> the
> base address set by a property to the address expected by
> fuloong2e
> firmware.
>
> Fix the base and config register handling to more closely model
> hardware which allows to remove the property and allows the
> guest to
> control this mapping. Do all this in reset instead of realize
> so it's
> correctly updated on reset.

 This commit broken running PMON on Fuloong2E:
 https://www.mail-archive.com/qemu-devel@nongnu.org/msg752605.html
 console: PMON2000 MIPS Initializing. Standby...
 console: ERRORPC= CONFIG=00030932
 console: PRID=6302
 console: DIMM read
 console: 00ff
 console: 00ff
 console: 00ff
 console: 00ff
 console: 00ff
 console: 00ff
 console: 00ff
 console: 00ff
 console: 00ff
 console: 00ff
 ...

 From here the console loops displaying this value...
>>>
>>> Tracing:
>>>
>> pci_cfg_write vt82c686b-pm 05:4 @0x90 <- 0xeee1
>
> Offset 93-90 – SMBus I/O Base
> ... RW
> 15-4 I/O Base (16-byte I/O space) default = 00h
> pci_cfg_write vt82c686b-pm 05:4 @0x90 <- 0xeee1
>
>> pci_cfg_write vt82c686b-pm 05:4 @0xd0 <- 0x1
>
> Offset D2 – SMBus Host Configuration . RW
> SMBus Host Controller Enable
> 0 Disable SMB controller functions . default
> 1 Enable SMB controller functions
> pci_cfg_write vt82c686b-pm 05:4 @0xd0 <- 0x1
>
> Hmm the datasheet indeed document 0xd2... why is the guest accessing
> 0xd0 to enable the function? It seems this is the problem, since if
> I replace d2 -> d0 PMON boots. See below [*].
>>>
 Expected:

 console: PMON2000 MIPS Initializing. Standby...
 console: ERRORPC= CONFIG=00030932
 console: PRID=6302
 console: DIMM read
 console: 0080
 console: read memory type
 console: read number of rows
 ...
>>>
>  static void pm_write_config(PCIDevice *d, uint32_t addr, uint32_t
> val, int len)
>  {
> +    VT686PMState *s = VT82C686B_PM(d);
> +
>  trace_via_pm_write(addr, val, len);
>  pci_default_write_config(d, addr, val, len);
> +    if (ranges_overlap(addr, len, 0x90, 4)) {
> +    uint32_t v = pci_get_long(s->dev.config + 0x90);
> +    pci_set_long(s->dev.config + 0x90, (v & 0xfff0UL) | 1);
> +    }
> +    if (range_covers_byte(addr, len, 0xd2)) {
> +    s->dev.config[0xd2] &= 0xf;
> +    smb_io_space_update(s);
>
> [*] So the guest writing at 0xd0, this block is skipped, the
> I/O region never enabled.

 Could it be it does word or dword i/o to access multiple addresses at
 once. Wasn't there a recent change to memory regions that could break
 this? Is adjusting valid access sizes to the mem region ops needed now
 to have the memory region handle this?
>>>
>>> Do you mean it was buggy earlier, so to accept a guest write at 0xd0
>>> the code had to handle the 0xd2 address? 0xd2 is the address in the
>>> datasheet, so I doubt.
>>
>> No, I meant that instead of writing a byte to 0xd2 the guest might
>> write a dword to 0xd0 which also overlaps 0xd2 and would change that
>> but it does not reach the device for some reason. But in your trace
>> there was:
>>
 mr_ops_write mr 0x5583912b2e00 (south-bridge-pci-config) addr
 0x1fe80490 value 0xeee1 size 4
 mr_ops_write mr 0x5583912b2e00 (south-bridge-pci-config) addr
 0x1fe804d2 value 0x1 size 2
>>>
>>> These are:
>>> pci_cfg_write vt82c686b-pm 05:4 @0x90 <- 0xeee1
>>> pci_cfg_write vt82c686b-pm 05:4 @0xd0 <- 0x1
>>
>> Where size is 2 so it would not reach 0xd2 but the address part above
>> is 0x1fe804d2 which somehow comes out as 0xd0 in the PCI trace so
>> looks like something strips the low bits within PCI code and the guest
>> does 

Re: [PULL 00/12] Misc, mostly meson patches for 2021-06-23

2021-06-24 Thread Peter Maydell
On Wed, 23 Jun 2021 at 13:16, Paolo Bonzini  wrote:
>
> The following changes since commit b22726abdfa54592d6ad88f65b0297c0e8b363e2:
>
>   Merge remote-tracking branch 
> 'remotes/vivier2/tags/linux-user-for-6.1-pull-request' into staging 
> (2021-06-22 16:07:53 +0100)
>
> are available in the Git repository at:
>
>   https://gitlab.com/bonzini/qemu.git tags/for-upstream
>
> for you to fetch changes up to 0f38448da7ab61a23fc35f57276a7272d6e4d984:
>
>   KVM: Fix dirty ring mmap incorrect size due to renaming accident 
> (2021-06-23 12:20:59 +0200)
>
> 
> * Some Meson test conversions
> * KVM dirty page ring buffer fix
> * KVM TSC scaling support
>

This generates a new warning on one of my boxes:

[...]
Has header "sasl/sasl.h" : YES (cached)
Library sasl2 found: YES
Has header "security/pam_appl.h" : YES
Library pam found: YES
../meson.build:926: WARNING: could not link libpam, disabling

-- PMM



Re: [v3] migration: fix the memory overwriting risk in add_to_iovec

2021-06-24 Thread Dr. David Alan Gilbert
* Lin Feng (linfen...@huawei.com) wrote:
> From: Feng Lin 
> 
> When testing migration, a Segmentation fault qemu core is generated.
> 0  error_free (err=0x1)
> 1  0x7f8b862df647 in qemu_fclose (f=f@entry=0x55e06c247640)
> 2  0x7f8b8516d59a in migrate_fd_cleanup (s=s@entry=0x55e06c0e1ef0)
> 3  0x7f8b8516d66c in migrate_fd_cleanup_bh (opaque=0x55e06c0e1ef0)
> 4  0x7f8b8626a47f in aio_bh_poll (ctx=ctx@entry=0x55e06b5a16d0)
> 5  0x7f8b8626e71f in aio_dispatch (ctx=0x55e06b5a16d0)
> 6  0x7f8b8626a33d in aio_ctx_dispatch (source=, 
> callback=, user_data=)
> 7  0x7f8b866bdba4 in g_main_context_dispatch ()
> 8  0x7f8b8626cde9 in glib_pollfds_poll ()
> 9  0x7f8b8626ce62 in os_host_main_loop_wait (timeout=)
> 10 0x7f8b8626cffd in main_loop_wait (nonblocking=nonblocking@entry=0)
> 11 0x7f8b862ef01f in main_loop ()
> Using gdb print the struct QEMUFile f = {
>   ...,
>   iovcnt = 65, last_error = 21984,
>   last_error_obj = 0x1, shutdown = true
> }
> Well iovcnt is overflow, because the max size of MAX_IOV_SIZE is 64.
> struct QEMUFile {
> ...;
> struct iovec iov[MAX_IOV_SIZE];
> unsigned int iovcnt;
> int last_error;
> Error *last_error_obj;
> bool shutdown;
> };
> iovcnt and last_error is overwrited by add_to_iovec().
> Right now, add_to_iovec() increase iovcnt before check the limit.
> And it seems that add_to_iovec() assumes that iovcnt will set to zero
> in qemu_fflush(). But qemu_fflush() will directly return when f->shutdown
> is true.
> 
> The situation may occur when libvirtd restart during migration, after
> f->shutdown is set, before calling qemu_file_set_error() in
> qemu_file_shutdown().
> 
> So the safiest way is checking the iovcnt before increasing it.
> 
> Signed-off-by: Feng Lin 
> ---
>  migration/qemu-file.c | 13 -
>  1 file changed, 8 insertions(+), 5 deletions(-)
> 
> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> index d6e03dbc0e..f6486cf7bc 100644
> --- a/migration/qemu-file.c
> +++ b/migration/qemu-file.c
> @@ -416,6 +416,9 @@ static int add_to_iovec(QEMUFile *f, const uint8_t *buf, 
> size_t size,
>  {
>  f->iov[f->iovcnt - 1].iov_len += size;
>  } else {
> +if (f->iovcnt >= MAX_IOV_SIZE) {
> +goto fflush;
> +}

Why call qemu_fflush in this case?
If I understand what you're saying, then we only get to here if a
previous qemu_fflush has failed, so this should fail as well?

How about, something like:
if (f->iovcnt >= MAX_IOV_SIZE) {
/* Should only happen if a previous fflush failed */
assert(f->shutdown || !qemu_file_is_writeable(f));
return 1;
}

?

Dave

>  if (may_free) {
>  set_bit(f->iovcnt, f->may_free);
>  }
> @@ -423,12 +426,12 @@ static int add_to_iovec(QEMUFile *f, const uint8_t 
> *buf, size_t size,
>  f->iov[f->iovcnt++].iov_len = size;
>  }
>  
> -if (f->iovcnt >= MAX_IOV_SIZE) {
> -qemu_fflush(f);
> -return 1;
> +if (f->iovcnt < MAX_IOV_SIZE) {
> +return 0;
>  }
> -
> -return 0;
> +fflush:
> +qemu_fflush(f);
> +return 1;
>  }
>  
>  static void add_buf_to_iovec(QEMUFile *f, size_t len)
> -- 
> 2.23.0
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK




[PATCH v4 3/5] ui: Create sync objects and fences only for blobs

2021-06-24 Thread Vivek Kasireddy
Create sync objects and fences only for dmabufs that are blobs. Once a
fence is created (after glFlush) and is signalled,
graphic_hw_gl_flushed() will be called and virtio-gpu cmd processing
will be resumed.

Cc: Gerd Hoffmann 
Signed-off-by: Vivek Kasireddy 
---
 hw/display/virtio-gpu-udmabuf.c |  1 +
 include/ui/console.h|  1 +
 include/ui/egl-helpers.h|  1 +
 include/ui/gtk.h|  1 +
 ui/gtk-egl.c| 20 
 ui/gtk-gl-area.c| 20 
 ui/gtk.c| 13 +
 7 files changed, 57 insertions(+)

diff --git a/hw/display/virtio-gpu-udmabuf.c b/hw/display/virtio-gpu-udmabuf.c
index 3c01a415e7..c6f7f58784 100644
--- a/hw/display/virtio-gpu-udmabuf.c
+++ b/hw/display/virtio-gpu-udmabuf.c
@@ -185,6 +185,7 @@ static VGPUDMABuf
 dmabuf->buf.stride = fb->stride;
 dmabuf->buf.fourcc = qemu_pixman_to_drm_format(fb->format);
 dmabuf->buf.fd = res->dmabuf_fd;
+dmabuf->buf.allow_fences = true;
 
 dmabuf->scanout_id = scanout_id;
 QTAILQ_INSERT_HEAD(&g->dmabuf.bufs, dmabuf, next);
diff --git a/include/ui/console.h b/include/ui/console.h
index 49978fdae3..93a0378e1d 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h
@@ -170,6 +170,7 @@ typedef struct QemuDmaBuf {
 bool  y0_top;
 void  *sync;
 int   fence_fd;
+bool  allow_fences;
 } QemuDmaBuf;
 
 typedef struct DisplayState DisplayState;
diff --git a/include/ui/egl-helpers.h b/include/ui/egl-helpers.h
index 2c3ba92b53..2fb6e0dd6b 100644
--- a/include/ui/egl-helpers.h
+++ b/include/ui/egl-helpers.h
@@ -19,6 +19,7 @@ typedef struct egl_fb {
 GLuint texture;
 GLuint framebuffer;
 bool delete_texture;
+QemuDmaBuf *dmabuf;
 } egl_fb;
 
 void egl_fb_destroy(egl_fb *fb);
diff --git a/include/ui/gtk.h b/include/ui/gtk.h
index e6cbf0507c..3e6a48b978 100644
--- a/include/ui/gtk.h
+++ b/include/ui/gtk.h
@@ -152,6 +152,7 @@ extern bool gtk_use_gl_area;
 /* ui/gtk.c */
 void gd_update_windowsize(VirtualConsole *vc);
 int gd_monitor_update_interval(GtkWidget *widget);
+void gd_hw_gl_flushed(void *vc);
 
 /* ui/gtk-egl.c */
 void gd_egl_init(VirtualConsole *vc);
diff --git a/ui/gtk-egl.c b/ui/gtk-egl.c
index b671181272..2c68696d9f 100644
--- a/ui/gtk-egl.c
+++ b/ui/gtk-egl.c
@@ -12,6 +12,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/main-loop.h"
 
 #include "trace.h"
 
@@ -63,6 +64,7 @@ void gd_egl_draw(VirtualConsole *vc)
 {
 GdkWindow *window;
 int ww, wh;
+QemuDmaBuf *dmabuf = vc->gfx.guest_fb.dmabuf;
 
 if (!vc->gfx.gls) {
 return;
@@ -94,6 +96,14 @@ void gd_egl_draw(VirtualConsole *vc)
 }
 
 glFlush();
+if (dmabuf) {
+egl_dmabuf_create_fence(dmabuf);
+if (dmabuf->fence_fd > 0) {
+qemu_set_fd_handler(dmabuf->fence_fd, gd_hw_gl_flushed, NULL, vc);
+return;
+}
+graphic_hw_gl_block(vc->gfx.dcl.con, false);
+}
 graphic_hw_gl_flushed(vc->gfx.dcl.con);
 }
 
@@ -209,6 +219,8 @@ void gd_egl_scanout_dmabuf(DisplayChangeListener *dcl,
QemuDmaBuf *dmabuf)
 {
 #ifdef CONFIG_GBM
+VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
+
 egl_dmabuf_import_texture(dmabuf);
 if (!dmabuf->texture) {
 return;
@@ -217,6 +229,10 @@ void gd_egl_scanout_dmabuf(DisplayChangeListener *dcl,
 gd_egl_scanout_texture(dcl, dmabuf->texture,
false, dmabuf->width, dmabuf->height,
0, 0, dmabuf->width, dmabuf->height);
+
+if (dmabuf->allow_fences) {
+vc->gfx.guest_fb.dmabuf = dmabuf;
+}
 #endif
 }
 
@@ -281,6 +297,10 @@ void gd_egl_scanout_flush(DisplayChangeListener *dcl,
 egl_fb_blit(&vc->gfx.win_fb, &vc->gfx.guest_fb, !vc->gfx.y0_top);
 }
 
+if (vc->gfx.guest_fb.dmabuf) {
+egl_dmabuf_create_sync(vc->gfx.guest_fb.dmabuf);
+}
+
 eglSwapBuffers(qemu_egl_display, vc->gfx.esurface);
 }
 
diff --git a/ui/gtk-gl-area.c b/ui/gtk-gl-area.c
index dd5783fec7..1654941dc9 100644
--- a/ui/gtk-gl-area.c
+++ b/ui/gtk-gl-area.c
@@ -8,6 +8,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/main-loop.h"
 
 #include "trace.h"
 
@@ -38,6 +39,7 @@ static void gtk_gl_area_set_scanout_mode(VirtualConsole *vc, 
bool scanout)
 void gd_gl_area_draw(VirtualConsole *vc)
 {
 int ww, wh, y1, y2;
+QemuDmaBuf *dmabuf = vc->gfx.guest_fb.dmabuf;
 
 if (!vc->gfx.gls) {
 return;
@@ -71,7 +73,18 @@ void gd_gl_area_draw(VirtualConsole *vc)
 surface_gl_render_texture(vc->gfx.gls, vc->gfx.ds);
 }
 
+if (dmabuf) {
+egl_dmabuf_create_sync(dmabuf);
+}
 glFlush();
+if (dmabuf) {
+egl_dmabuf_create_fence(dmabuf);
+if (dmabuf->fence_fd > 0) {
+qemu_set_fd_handler(dmabuf->fence_fd, gd_hw_gl_flushed, NULL, vc);
+return;
+}
+graphic_hw_gl_block(vc->gfx.dcl.con, false);
+}

[PATCH v4 4/5] ui/gtk-egl: Wait for the draw signal for dmabuf blobs

2021-06-24 Thread Vivek Kasireddy
Instead of immediately drawing and submitting, queue and wait
for the draw signal if the dmabuf submitted is a blob.

Cc: Gerd Hoffmann 
Reviewed-by: Gerd Hoffmann 
Signed-off-by: Vivek Kasireddy 
---
 include/ui/gtk.h |  2 ++
 ui/gtk-egl.c | 15 +++
 ui/gtk.c |  2 +-
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/include/ui/gtk.h b/include/ui/gtk.h
index 3e6a48b978..a25f5bfecc 100644
--- a/include/ui/gtk.h
+++ b/include/ui/gtk.h
@@ -179,6 +179,8 @@ void gd_egl_cursor_dmabuf(DisplayChangeListener *dcl,
   uint32_t hot_x, uint32_t hot_y);
 void gd_egl_cursor_position(DisplayChangeListener *dcl,
 uint32_t pos_x, uint32_t pos_y);
+void gd_egl_flush(DisplayChangeListener *dcl,
+  uint32_t x, uint32_t y, uint32_t w, uint32_t h);
 void gd_egl_scanout_flush(DisplayChangeListener *dcl,
   uint32_t x, uint32_t y, uint32_t w, uint32_t h);
 void gtk_egl_init(DisplayGLMode mode);
diff --git a/ui/gtk-egl.c b/ui/gtk-egl.c
index 2c68696d9f..737e7b90d4 100644
--- a/ui/gtk-egl.c
+++ b/ui/gtk-egl.c
@@ -304,6 +304,21 @@ void gd_egl_scanout_flush(DisplayChangeListener *dcl,
 eglSwapBuffers(qemu_egl_display, vc->gfx.esurface);
 }
 
+void gd_egl_flush(DisplayChangeListener *dcl,
+  uint32_t x, uint32_t y, uint32_t w, uint32_t h)
+{
+VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
+GtkWidget *area = vc->gfx.drawing_area;
+
+if (vc->gfx.guest_fb.dmabuf) {
+graphic_hw_gl_block(vc->gfx.dcl.con, true);
+gtk_widget_queue_draw_area(area, x, y, w, h);
+return;
+}
+
+gd_egl_scanout_flush(&vc->gfx.dcl, x, y, w, h);
+}
+
 void gtk_egl_init(DisplayGLMode mode)
 {
 GdkDisplay *gdk_display = gdk_display_get_default();
diff --git a/ui/gtk.c b/ui/gtk.c
index ee3a084c21..ce885d2ca3 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -637,7 +637,7 @@ static const DisplayChangeListenerOps dcl_egl_ops = {
 .dpy_gl_scanout_dmabuf   = gd_egl_scanout_dmabuf,
 .dpy_gl_cursor_dmabuf= gd_egl_cursor_dmabuf,
 .dpy_gl_cursor_position  = gd_egl_cursor_position,
-.dpy_gl_update   = gd_egl_scanout_flush,
+.dpy_gl_update   = gd_egl_flush,
 .dpy_gl_release_dmabuf   = gd_gl_release_dmabuf,
 .dpy_has_dmabuf  = gd_has_dmabuf,
 };
-- 
2.30.2




[PATCH v4 0/5] virtio-gpu: Add a default synchronization mechanism for blobs

2021-06-24 Thread Vivek Kasireddy
When the Guest and Host are using Blob resources, there is a chance
that they may use the underlying storage associated with a Blob at
the same time leading to glitches such as flickering or tearing.
To prevent these from happening, the Host needs to ensure that it
waits until its Blit is completed by the Host GPU before letting
the Guest reuse the Blob.

This should be the default behavior regardless of the type of Guest
that is using Blob resources but would be particularly useful for 
Guests that are using frontbuffer rendering such as Linux with X
or Windows 10, etc.

The way it works is the Guest includes a fence as part of 
resource_flush and waits for it to be signalled. The Host will
queue a repaint request and signal the fence after it completes
waiting on the sync object associated with the Blit.

v2:
- Added more description in the cover letter
- Removed the wait from resource_flush and included it in
  a gl_flushed() callback

v3:
- Instead of explicitly waiting on the sync object and stalling the
  thread, add the relevant fence fd to Qemu's main loop and wait
  for it to be signalled. (suggested by Gerd Hoffmann)

v4:
- Replace the field 'blob' with 'allow_fences' in QemuDmabuf struct.
  (Gerd)

Cc: Gerd Hoffmann 
Cc: Dongwon Kim 
Cc: Tina Zhang 

Vivek Kasireddy (5):
  ui/gtk: Create a common release_dmabuf helper
  ui/egl: Add egl helpers to help with synchronization
  ui: Create sync objects and fences only for blobs
  ui/gtk-egl: Wait for the draw signal for dmabuf blobs
  virtio-gpu: Add gl_flushed callback

 hw/display/virtio-gpu-udmabuf.c |  1 +
 hw/display/virtio-gpu.c | 32 ++--
 include/ui/console.h|  3 +++
 include/ui/egl-helpers.h|  3 +++
 include/ui/gtk.h|  5 ++--
 ui/egl-helpers.c| 26 
 ui/gtk-egl.c| 43 +++--
 ui/gtk-gl-area.c| 20 +++
 ui/gtk.c| 26 ++--
 9 files changed, 145 insertions(+), 14 deletions(-)

-- 
2.30.2




[PATCH v4 2/5] ui/egl: Add egl helpers to help with synchronization

2021-06-24 Thread Vivek Kasireddy
These egl helpers would be used for creating and waiting on
a sync object.

Cc: Gerd Hoffmann 
Reviewed-by: Gerd Hoffmann 
Signed-off-by: Vivek Kasireddy 
---
 include/ui/console.h |  2 ++
 include/ui/egl-helpers.h |  2 ++
 ui/egl-helpers.c | 26 ++
 3 files changed, 30 insertions(+)

diff --git a/include/ui/console.h b/include/ui/console.h
index b30b63976a..49978fdae3 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h
@@ -168,6 +168,8 @@ typedef struct QemuDmaBuf {
 uint64_t  modifier;
 uint32_t  texture;
 bool  y0_top;
+void  *sync;
+int   fence_fd;
 } QemuDmaBuf;
 
 typedef struct DisplayState DisplayState;
diff --git a/include/ui/egl-helpers.h b/include/ui/egl-helpers.h
index f1bf8f97fc..2c3ba92b53 100644
--- a/include/ui/egl-helpers.h
+++ b/include/ui/egl-helpers.h
@@ -45,6 +45,8 @@ int egl_get_fd_for_texture(uint32_t tex_id, EGLint *stride, 
EGLint *fourcc,
 
 void egl_dmabuf_import_texture(QemuDmaBuf *dmabuf);
 void egl_dmabuf_release_texture(QemuDmaBuf *dmabuf);
+void egl_dmabuf_create_sync(QemuDmaBuf *dmabuf);
+void egl_dmabuf_create_fence(QemuDmaBuf *dmabuf);
 
 #endif
 
diff --git a/ui/egl-helpers.c b/ui/egl-helpers.c
index 6d0cb2b5cb..d8986b0a7f 100644
--- a/ui/egl-helpers.c
+++ b/ui/egl-helpers.c
@@ -76,6 +76,32 @@ void egl_fb_setup_for_tex(egl_fb *fb, int width, int height,
   GL_TEXTURE_2D, fb->texture, 0);
 }
 
+void egl_dmabuf_create_sync(QemuDmaBuf *dmabuf)
+{
+EGLSyncKHR sync;
+
+if (epoxy_has_egl_extension(qemu_egl_display,
+"EGL_KHR_fence_sync") &&
+epoxy_has_egl_extension(qemu_egl_display,
+"EGL_ANDROID_native_fence_sync")) {
+sync = eglCreateSyncKHR(qemu_egl_display,
+EGL_SYNC_NATIVE_FENCE_ANDROID, NULL);
+if (sync != EGL_NO_SYNC_KHR) {
+dmabuf->sync = sync;
+}
+}
+}
+
+void egl_dmabuf_create_fence(QemuDmaBuf *dmabuf)
+{
+if (dmabuf->sync) {
+dmabuf->fence_fd = eglDupNativeFenceFDANDROID(qemu_egl_display,
+  dmabuf->sync);
+eglDestroySyncKHR(qemu_egl_display, dmabuf->sync);
+dmabuf->sync = NULL;
+}
+}
+
 void egl_fb_setup_new_tex(egl_fb *fb, int width, int height)
 {
 GLuint texture;
-- 
2.30.2




[PATCH v4 1/5] ui/gtk: Create a common release_dmabuf helper

2021-06-24 Thread Vivek Kasireddy
Since the texture release mechanism is same for both gtk-egl
and gtk-glarea, move the helper from gtk-egl to common gtk
code so that it can be shared by both gtk backends.

Cc: Gerd Hoffmann 
Reviewed-by: Gerd Hoffmann 
Signed-off-by: Vivek Kasireddy 
---
 include/ui/gtk.h |  2 --
 ui/gtk-egl.c |  8 
 ui/gtk.c | 11 ++-
 3 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/include/ui/gtk.h b/include/ui/gtk.h
index 9516670ebc..e6cbf0507c 100644
--- a/include/ui/gtk.h
+++ b/include/ui/gtk.h
@@ -178,8 +178,6 @@ void gd_egl_cursor_dmabuf(DisplayChangeListener *dcl,
   uint32_t hot_x, uint32_t hot_y);
 void gd_egl_cursor_position(DisplayChangeListener *dcl,
 uint32_t pos_x, uint32_t pos_y);
-void gd_egl_release_dmabuf(DisplayChangeListener *dcl,
-   QemuDmaBuf *dmabuf);
 void gd_egl_scanout_flush(DisplayChangeListener *dcl,
   uint32_t x, uint32_t y, uint32_t w, uint32_t h);
 void gtk_egl_init(DisplayGLMode mode);
diff --git a/ui/gtk-egl.c b/ui/gtk-egl.c
index 2a2e6d3a17..b671181272 100644
--- a/ui/gtk-egl.c
+++ b/ui/gtk-egl.c
@@ -249,14 +249,6 @@ void gd_egl_cursor_position(DisplayChangeListener *dcl,
 vc->gfx.cursor_y = pos_y * vc->gfx.scale_y;
 }
 
-void gd_egl_release_dmabuf(DisplayChangeListener *dcl,
-   QemuDmaBuf *dmabuf)
-{
-#ifdef CONFIG_GBM
-egl_dmabuf_release_texture(dmabuf);
-#endif
-}
-
 void gd_egl_scanout_flush(DisplayChangeListener *dcl,
   uint32_t x, uint32_t y, uint32_t w, uint32_t h)
 {
diff --git a/ui/gtk.c b/ui/gtk.c
index 98046f577b..6132bab52f 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -575,6 +575,14 @@ static bool gd_has_dmabuf(DisplayChangeListener *dcl)
 return vc->gfx.has_dmabuf;
 }
 
+static void gd_gl_release_dmabuf(DisplayChangeListener *dcl,
+ QemuDmaBuf *dmabuf)
+{
+#ifdef CONFIG_GBM
+egl_dmabuf_release_texture(dmabuf);
+#endif
+}
+
 /** DisplayState Callbacks (opengl version) **/
 
 static const DisplayChangeListenerOps dcl_gl_area_ops = {
@@ -593,6 +601,7 @@ static const DisplayChangeListenerOps dcl_gl_area_ops = {
 .dpy_gl_scanout_disable  = gd_gl_area_scanout_disable,
 .dpy_gl_update   = gd_gl_area_scanout_flush,
 .dpy_gl_scanout_dmabuf   = gd_gl_area_scanout_dmabuf,
+.dpy_gl_release_dmabuf   = gd_gl_release_dmabuf,
 .dpy_has_dmabuf  = gd_has_dmabuf,
 };
 
@@ -615,8 +624,8 @@ static const DisplayChangeListenerOps dcl_egl_ops = {
 .dpy_gl_scanout_dmabuf   = gd_egl_scanout_dmabuf,
 .dpy_gl_cursor_dmabuf= gd_egl_cursor_dmabuf,
 .dpy_gl_cursor_position  = gd_egl_cursor_position,
-.dpy_gl_release_dmabuf   = gd_egl_release_dmabuf,
 .dpy_gl_update   = gd_egl_scanout_flush,
+.dpy_gl_release_dmabuf   = gd_gl_release_dmabuf,
 .dpy_has_dmabuf  = gd_has_dmabuf,
 };
 
-- 
2.30.2




[PATCH v4 5/5] virtio-gpu: Add gl_flushed callback

2021-06-24 Thread Vivek Kasireddy
Adding this callback provides a way to resume the processing of
cmds in fenceq and cmdq that were not processed because the UI
was waiting on a fence and blocked cmd processing.

Cc: Gerd Hoffmann 
Reviewed-by: Gerd Hoffmann 
Signed-off-by: Vivek Kasireddy 
---
 hw/display/virtio-gpu.c | 32 ++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index e183f4ecda..3fcd44ac5e 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -991,8 +991,10 @@ void virtio_gpu_simple_process_cmd(VirtIOGPU *g,
 break;
 }
 if (!cmd->finished) {
-virtio_gpu_ctrl_response_nodata(g, cmd, cmd->error ? cmd->error :
-VIRTIO_GPU_RESP_OK_NODATA);
+if (!g->parent_obj.renderer_blocked) {
+virtio_gpu_ctrl_response_nodata(g, cmd, cmd->error ? cmd->error :
+VIRTIO_GPU_RESP_OK_NODATA);
+}
 }
 }
 
@@ -1048,6 +1050,30 @@ void virtio_gpu_process_cmdq(VirtIOGPU *g)
 g->processing_cmdq = false;
 }
 
+static void virtio_gpu_process_fenceq(VirtIOGPU *g)
+{
+struct virtio_gpu_ctrl_command *cmd, *tmp;
+
+QTAILQ_FOREACH_SAFE(cmd, &g->fenceq, next, tmp) {
+trace_virtio_gpu_fence_resp(cmd->cmd_hdr.fence_id);
+virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA);
+QTAILQ_REMOVE(&g->fenceq, cmd, next);
+g_free(cmd);
+g->inflight--;
+if (virtio_gpu_stats_enabled(g->parent_obj.conf)) {
+fprintf(stderr, "inflight: %3d (-)\r", g->inflight);
+}
+}
+}
+
+static void virtio_gpu_handle_gl_flushed(VirtIOGPUBase *b)
+{
+VirtIOGPU *g = container_of(b, VirtIOGPU, parent_obj);
+
+virtio_gpu_process_fenceq(g);
+virtio_gpu_process_cmdq(g);
+}
+
 static void virtio_gpu_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
 {
 VirtIOGPU *g = VIRTIO_GPU(vdev);
@@ -1406,10 +1432,12 @@ static void virtio_gpu_class_init(ObjectClass *klass, 
void *data)
 DeviceClass *dc = DEVICE_CLASS(klass);
 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
 VirtIOGPUClass *vgc = VIRTIO_GPU_CLASS(klass);
+VirtIOGPUBaseClass *vgbc = &vgc->parent;
 
 vgc->handle_ctrl = virtio_gpu_handle_ctrl;
 vgc->process_cmd = virtio_gpu_simple_process_cmd;
 vgc->update_cursor_data = virtio_gpu_update_cursor_data;
+vgbc->gl_flushed = virtio_gpu_handle_gl_flushed;
 
 vdc->realize = virtio_gpu_device_realize;
 vdc->reset = virtio_gpu_reset;
-- 
2.30.2




Re: [PATCH] acpi/ged: fix reset cause

2021-06-24 Thread Michael S. Tsirkin
On Thu, Jun 24, 2021 at 12:06:14PM +0100, Peter Maydell wrote:
> On Thu, 24 Jun 2021 at 12:01, Gerd Hoffmann  wrote:
> >
> > Reset requests should use SHUTDOWN_CAUSE_GUEST_RESET not
> > SHUTDOWN_CAUSE_GUEST_SHUTDOWN.
> >
> > Reported-by: Peter Maydell 
> > Signed-off-by: Gerd Hoffmann 
> > ---
> >  hw/acpi/generic_event_device.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
> > index 39c825763ad8..e28457a7d103 100644
> > --- a/hw/acpi/generic_event_device.c
> > +++ b/hw/acpi/generic_event_device.c
> > @@ -207,7 +207,7 @@ static void ged_regs_write(void *opaque, hwaddr addr, 
> > uint64_t data,
> >  return;
> >  case ACPI_GED_REG_RESET:
> >  if (data == ACPI_GED_RESET_VALUE) {
> > -qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
> > +qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
> >  }
> >  return;
> >  }
> > --
> > 2.31.1
> 
> Reviewed-by: Peter Maydell 
> 
> thanks
> -- PMM

tagged, thanks!




Re: Extracting PC information from QEMU/KVM during single-step

2021-06-24 Thread Peter Maydell
On Thu, 24 Jun 2021 at 18:09, Steven Raasch  wrote:
> NOTE: I do not yet understand how gdb interacts with the virtual machine. I 
> have experience with GDB, but only at a linux app-debug level. I don't grok 
> how gdb on a linux host works with QEMU running a windows guest.
> My *assumption* is that the VM continues to run while an app is being 
> debugged with GDB can be stopped, stepped, etc. If this is the case, I would 
> expect that the VM's sense of time will continue to move forward while the 
> app is paused. This would be an issue for my time-sensitive app.

No, that's not how it works. A gdb connected to QEMU's gdbstub is a
bit like a hardware JTAG debugger connected to a real CPU, if that
helps. When gdb gets control the entire VM is stopped; stepping
steps one instruction of whatever the VM is doing. gdb and the
gdbstub have no understanding of individual processes running
inside the guest OS -- single stepping will happily step through
the app, into interrupt handlers, across the OS context switching
and into other processes, etc.

-- PMM



Re: [PATCH v4 0/6] acpi: Error Record Serialization Table, ERST, support for QEMU

2021-06-24 Thread Eric DeVolder
Igor,
Thanks for the feedback. I am working to address/correct the items. I hope to 
have v5 posted early next week.
eric


From: Igor Mammedov 
Sent: Tuesday, June 22, 2021 10:51 AM
To: Eric DeVolder 
Cc: qemu-devel@nongnu.org ; m...@redhat.com 
; marcel.apfelb...@gmail.com ; 
pbonz...@redhat.com ; r...@twiddle.net ; 
ehabk...@redhat.com ; Konrad Wilk 
; Boris Ostrovsky 
Subject: Re: [PATCH v4 0/6] acpi: Error Record Serialization Table, ERST, 
support for QEMU

On Fri, 11 Jun 2021 14:31:17 -0400
Eric DeVolder  wrote:

> This patchset introduces support for the ACPI Error Record
> Serialization Table, ERST.
>
> Linux uses the persistent storage filesystem, pstore, to record
> information (eg. dmesg tail) upon panics and shutdowns.  Pstore is
> independent of, and runs before, kdump.  In certain scenarios (ie.
> hosts/guests with root filesystems on NFS/iSCSI where networking
> software and/or hardware fails), pstore may contain the only
> information available for post-mortem debugging.
>
> Two common storage backends for the pstore filesystem are ACPI ERST
> and UEFI. Most BIOS implement ACPI ERST; however, ACPI ERST is not
> currently supported in QEMU, and UEFI is not utilized in all guests.
> By implementing ACPI ERST within QEMU, then the ACPI ERST becomes a
> viable pstore storage backend for virtual machines (as it is now for
> bare metal machines).
>
> Enabling support for ACPI ERST facilitates a consistent method to
> capture kernel panic information in a wide range of guests: from
> resource-constrained microvms to very large guests, and in
> particular, in direct-boot environments (which would lack UEFI
> run-time services).
>
> Note that Microsoft Windows also utilizes the ACPI ERST for certain
> crash information, if available.
>
> The ACPI ERST persistent storage is contained within a single backing
> file. The size and location of the backing file is specified upon
> QEMU startup of the ACPI ERST device.
>
> The ACPI specification[1], in Chapter "ACPI Platform Error Interfaces
> (APEI)", and specifically subsection "Error Serialization", outlines
> a method for storing error records into persistent storage.
>
> [1] "Advanced Configuration and Power Interface Specification",
> version 6.2, May 2017.
> https://www.uefi.org/sites/default/files/resources/ACPI_6_2.pdf
>
> [2] "Unified Extensible Firmware Interface Specification",
> version 2.8, March 2019.
> https://uefi.org/sites/default/files/resources/UEFI_Spec_2_8_final.pdf
>
> Suggested-by: Konrad Wilk 
> Signed-off-by: Eric DeVolder 
>
> ---
> v4: 11jun2021
>  - Converted to a PCI device, per Igor.

Series looks much better now that impl. were split into
backend/frontend parts and dynamic MMIO placement.

I left some mandatory nit-picking about
comments, styles, overall documentation, leftovers
from previous revisions.
And also some how we can simplify impl. a bit more.


>  - Updated qtest.
>
> v3: 28may2021
>  - Converted to using a TYPE_MEMORY_BACKEND_FILE object rather than
>internal array with explicit file operations, per Igor.
>  - Changed the way the qdev and base address are handled, allowing
>ERST to be disabled at run-time. Also aligns better with other
>existing code.
>
> v2: 8feb2021
>  - Added qtest/smoke test per Paolo Bonzini
>  - Split patch into smaller chunks, per Igo Mammedov
>  - Did away with use of ACPI packed structures, per Igo Mammedov
>
> v1: 26oct2020
>  - initial post
>
> ---
> Eric DeVolder (6):
>   ACPI ERST: bios-tables-test.c steps 1 and 2
>   ACPI ERST: header file for ERST
>   ACPI ERST: support for ACPI ERST feature
>   ACPI ERST: create ACPI ERST table for pc/x86 machines.
>   ACPI ERST: qtest for ERST
>   ACPI ERST: step 6 of bios-tables-test.c
>
>  hw/acpi/erst.c   | 880 
> +++
>  hw/acpi/meson.build  |   1 +
>  hw/i386/acpi-build.c |   5 +
>  include/hw/acpi/erst.h   |  79 
>  tests/data/acpi/microvm/ERST |   0
>  tests/data/acpi/pc/ERST  | Bin 0 -> 976 bytes
>  tests/data/acpi/q35/ERST | Bin 0 -> 976 bytes
>  tests/qtest/erst-test.c  | 109 ++
>  tests/qtest/meson.build  |   2 +
>  9 files changed, 1076 insertions(+)
>  create mode 100644 hw/acpi/erst.c
>  create mode 100644 include/hw/acpi/erst.h
>  create mode 100644 tests/data/acpi/microvm/ERST
>  create mode 100644 tests/data/acpi/pc/ERST
>  create mode 100644 tests/data/acpi/q35/ERST
>  create mode 100644 tests/qtest/erst-test.c
>



Re: [PULL v2 00/57] target-arm queue

2021-06-24 Thread Peter Maydell
On Thu, 24 Jun 2021 at 15:00, Peter Maydell  wrote:
>
> Squashed in a trivial fix for 32-bit hosts:
>
> --- a/target/arm/mve_helper.c
> +++ b/target/arm/mve_helper.c
> @@ -1120,7 +1120,7 @@ DO_LDAV(vmlsldavxsw, 4, int32_t, true, +=, -=)
>  acc = EVENACC(acc, TO128(n[H##ESIZE(e + 1 * XCHG)] * \
>   m[H##ESIZE(e)]));  \
>  }   \
> -acc = int128_add(acc, 1 << 7);  \
> +acc = int128_add(acc, int128_make64(1 << 7));   \
>  }   \
>  }   \
>  mve_advance_vpt(env);   \
>
> -- PMM
>
> The following changes since commit 53f306f316549d20c76886903181413d20842423:
>
>   Merge remote-tracking branch 
> 'remotes/ehabkost-gl/tags/x86-next-pull-request' into staging (2021-06-21 
> 11:26:04 +0100)
>
> are available in the Git repository at:
>
>   https://git.linaro.org/people/pmaydell/qemu-arm.git 
> tags/pull-target-arm-20210624
>
> for you to fetch changes up to 90a76c6316cfe6416fc33814a838fb3928f746ee:
>
>   docs/system: arm: Add nRF boards description (2021-06-24 14:58:48 +0100)
>
> 
> target-arm queue:
>  * Don't require 'virt' board to be compiled in for ACPI GHES code
>  * docs: Document which architecture extensions we emulate
>  * Fix bugs in M-profile FPCXT_NS accesses
>  * First slice of MVE patches
>  * Implement MTE3
>  * docs/system: arm: Add nRF boards description
>


Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/6.1
for any user-visible changes.

-- PMM



Re: [PULL 30/43] vt82c686: Fix SMBus IO base and configuration registers

2021-06-24 Thread Philippe Mathieu-Daudé
On 6/24/21 8:01 PM, BALATON Zoltan wrote:
> On Thu, 24 Jun 2021, Philippe Mathieu-Daudé wrote:
>> On 6/24/21 7:00 PM, BALATON Zoltan wrote:
>>> On Thu, 24 Jun 2021, Philippe Mathieu-Daudé wrote:
 On 6/24/21 6:16 PM, Philippe Mathieu-Daudé wrote:
> On 6/24/21 6:01 PM, Philippe Mathieu-Daudé wrote:
>> On 6/24/21 5:46 PM, Philippe Mathieu-Daudé wrote:
>>> Hi Zoltan,
>>>
>>> On 2/21/21 3:34 PM, Philippe Mathieu-Daudé wrote:
 From: BALATON Zoltan 

 The base address of the SMBus io ports and its enabled status is
 set
 by registers in the PCI config space but this was not correctly
 emulated. Instead the SMBus registers were mapped on realize to the
 base address set by a property to the address expected by fuloong2e
 firmware.

 Fix the base and config register handling to more closely model
 hardware which allows to remove the property and allows the
 guest to
 control this mapping. Do all this in reset instead of realize so
 it's
 correctly updated on reset.
>>>
>>> This commit broken running PMON on Fuloong2E:
>>> https://www.mail-archive.com/qemu-devel@nongnu.org/msg752605.html
>>> console: PMON2000 MIPS Initializing. Standby...
>>> console: ERRORPC= CONFIG=00030932
>>> console: PRID=6302
>>> console: DIMM read
>>> console: 00ff
>>> console: 00ff
>>> console: 00ff
>>> console: 00ff
>>> console: 00ff
>>> console: 00ff
>>> console: 00ff
>>> console: 00ff
>>> console: 00ff
>>> console: 00ff
>>> ...
>>>
>>> From here the console loops displaying this value...
>>
>> Tracing:
>>
> pci_cfg_write vt82c686b-pm 05:4 @0x90 <- 0xeee1

 Offset 93-90 – SMBus I/O Base
 ... RW
 15-4 I/O Base (16-byte I/O space) default = 00h
 pci_cfg_write vt82c686b-pm 05:4 @0x90 <- 0xeee1

> pci_cfg_write vt82c686b-pm 05:4 @0xd0 <- 0x1

 Offset D2 – SMBus Host Configuration . RW
 SMBus Host Controller Enable
 0 Disable SMB controller functions . default
 1 Enable SMB controller functions
 pci_cfg_write vt82c686b-pm 05:4 @0xd0 <- 0x1

 Hmm the datasheet indeed document 0xd2... why is the guest accessing
 0xd0 to enable the function? It seems this is the problem, since if
 I replace d2 -> d0 PMON boots. See below [*].
>>
>>> Expected:
>>>
>>> console: PMON2000 MIPS Initializing. Standby...
>>> console: ERRORPC= CONFIG=00030932
>>> console: PRID=6302
>>> console: DIMM read
>>> console: 0080
>>> console: read memory type
>>> console: read number of rows
>>> ...
>>
  static void pm_write_config(PCIDevice *d, uint32_t addr, uint32_t
 val, int len)
  {
 +    VT686PMState *s = VT82C686B_PM(d);
 +
  trace_via_pm_write(addr, val, len);
  pci_default_write_config(d, addr, val, len);
 +    if (ranges_overlap(addr, len, 0x90, 4)) {
 +    uint32_t v = pci_get_long(s->dev.config + 0x90);
 +    pci_set_long(s->dev.config + 0x90, (v & 0xfff0UL) | 1);
 +    }
 +    if (range_covers_byte(addr, len, 0xd2)) {
 +    s->dev.config[0xd2] &= 0xf;
 +    smb_io_space_update(s);

 [*] So the guest writing at 0xd0, this block is skipped, the
 I/O region never enabled.
>>>
>>> Could it be it does word or dword i/o to access multiple addresses at
>>> once. Wasn't there a recent change to memory regions that could break
>>> this? Is adjusting valid access sizes to the mem region ops needed now
>>> to have the memory region handle this?
>>
>> Do you mean it was buggy earlier, so to accept a guest write at 0xd0
>> the code had to handle the 0xd2 address? 0xd2 is the address in the
>> datasheet, so I doubt.
> 
> No, I meant that instead of writing a byte to 0xd2 the guest might write
> a dword to 0xd0 which also overlaps 0xd2 and would change that but it
> does not reach the device for some reason. But in your trace there was:
> 
>>> mr_ops_write mr 0x5583912b2e00 (south-bridge-pci-config) addr
>>> 0x1fe80490 value 0xeee1 size 4
>>> mr_ops_write mr 0x5583912b2e00 (south-bridge-pci-config) addr
>>> 0x1fe804d2 value 0x1 size 2
>>
>> These are:
>> pci_cfg_write vt82c686b-pm 05:4 @0x90 <- 0xeee1
>> pci_cfg_write vt82c686b-pm 05:4 @0xd0 <- 0x1
> 
> Where size is 2 so it would not reach 0xd2 but the address part above is
> 0x1fe804d2 which somehow comes out as 0xd0 in the PCI trace so looks
> like something strips the low bits within PCI code and the guest does
> intend to access 0xd2 but it's not passed on to the device as such.

Oh, good eyes :)

Indeed I see:

static uint32_t bonito_sbridge_pciaddr(void *opaque, hwaddr addr)
{
...
regn

[RFC PATCH] audio: Make the AudiodevDriver enum conditional

2021-06-24 Thread Thomas Huth
This way, the upper layers like libvirt could have the possibility
to use QAPI to find out which audio drivers have been enabled during
compile-time of QEMU.

Signed-off-by: Thomas Huth 
---
 Note: Marked as RFC since it's quite a lot of ifdef'ing here...
   not sure whether I really like it...

 audio/audio.c  | 16 +++
 audio/audio_legacy.c   | 45 +-
 audio/audio_template.h | 16 +++
 qapi/audio.json| 37 --
 4 files changed, 90 insertions(+), 24 deletions(-)

diff --git a/audio/audio.c b/audio/audio.c
index 59453ef856..34a0f39c29 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -1988,14 +1988,30 @@ void audio_create_pdos(Audiodev *dev)
 break
 
 CASE(NONE, none, );
+#if defined(CONFIG_AUDIO_ALSA)
 CASE(ALSA, alsa, Alsa);
+#endif
+#if defined(CONFIG_AUDIO_COREAUDIO)
 CASE(COREAUDIO, coreaudio, Coreaudio);
+#endif
+#if defined(CONFIG_AUDIO_DSOUND)
 CASE(DSOUND, dsound, );
+#endif
+#if defined(CONFIG_AUDIO_JACK)
 CASE(JACK, jack, Jack);
+#endif
+#if defined(CONFIG_AUDIO_OSS)
 CASE(OSS, oss, Oss);
+#endif
+#if defined(CONFIG_AUDIO_PA)
 CASE(PA, pa, Pa);
+#endif
+#if defined(CONFIG_AUDIO_SDL)
 CASE(SDL, sdl, Sdl);
+#endif
+#if defined(CONFIG_SPICE)
 CASE(SPICE, spice, );
+#endif
 CASE(WAV, wav, );
 
 case AUDIODEV_DRIVER__MAX:
diff --git a/audio/audio_legacy.c b/audio/audio_legacy.c
index 0fe827b057..d940f77a27 100644
--- a/audio/audio_legacy.c
+++ b/audio/audio_legacy.c
@@ -92,7 +92,7 @@ static void get_fmt(const char *env, AudioFormat *dst, bool 
*has_dst)
 }
 }
 
-
+#if defined(CONFIG_AUDIO_ALSA) || defined(CONFIG_AUDIO_DSOUND)
 static void get_millis_to_usecs(const char *env, uint32_t *dst, bool *has_dst)
 {
 const char *val = getenv(env);
@@ -101,6 +101,7 @@ static void get_millis_to_usecs(const char *env, uint32_t 
*dst, bool *has_dst)
 *has_dst = true;
 }
 }
+#endif
 
 static uint32_t frames_to_usecs(uint32_t frames,
 AudiodevPerDirectionOptions *pdo)
@@ -109,7 +110,7 @@ static uint32_t frames_to_usecs(uint32_t frames,
 return (frames * 100 + freq / 2) / freq;
 }
 
-
+#if defined(CONFIG_AUDIO_COREAUDIO)
 static void get_frames_to_usecs(const char *env, uint32_t *dst, bool *has_dst,
 AudiodevPerDirectionOptions *pdo)
 {
@@ -119,6 +120,7 @@ static void get_frames_to_usecs(const char *env, uint32_t 
*dst, bool *has_dst,
 *has_dst = true;
 }
 }
+#endif
 
 static uint32_t samples_to_usecs(uint32_t samples,
  AudiodevPerDirectionOptions *pdo)
@@ -127,6 +129,7 @@ static uint32_t samples_to_usecs(uint32_t samples,
 return frames_to_usecs(samples / channels, pdo);
 }
 
+#if defined(CONFIG_AUDIO_PA) || defined(CONFIG_AUDIO_SDL)
 static void get_samples_to_usecs(const char *env, uint32_t *dst, bool *has_dst,
  AudiodevPerDirectionOptions *pdo)
 {
@@ -136,6 +139,7 @@ static void get_samples_to_usecs(const char *env, uint32_t 
*dst, bool *has_dst,
 *has_dst = true;
 }
 }
+#endif
 
 static uint32_t bytes_to_usecs(uint32_t bytes, AudiodevPerDirectionOptions 
*pdo)
 {
@@ -144,6 +148,7 @@ static uint32_t bytes_to_usecs(uint32_t bytes, 
AudiodevPerDirectionOptions *pdo)
 return samples_to_usecs(bytes / bytes_per_sample, pdo);
 }
 
+__attribute__((unused))
 static void get_bytes_to_usecs(const char *env, uint32_t *dst, bool *has_dst,
AudiodevPerDirectionOptions *pdo)
 {
@@ -155,7 +160,7 @@ static void get_bytes_to_usecs(const char *env, uint32_t 
*dst, bool *has_dst,
 }
 
 /* backend specific functions */
-/* ALSA */
+#if defined(CONFIG_AUDIO_ALSA)
 static void handle_alsa_per_direction(
 AudiodevAlsaPerDirectionOptions *apdo, const char *prefix)
 {
@@ -200,8 +205,9 @@ static void handle_alsa(Audiodev *dev)
 get_millis_to_usecs("QEMU_ALSA_THRESHOLD",
 &aopt->threshold, &aopt->has_threshold);
 }
+#endif
 
-/* coreaudio */
+#if defined(CONFIG_AUDIO_COREAUDIO)
 static void handle_coreaudio(Audiodev *dev)
 {
 get_frames_to_usecs(
@@ -213,8 +219,9 @@ static void handle_coreaudio(Audiodev *dev)
 &dev->u.coreaudio.out->buffer_count,
 &dev->u.coreaudio.out->has_buffer_count);
 }
+#endif
 
-/* dsound */
+#if defined(CONFIG_AUDIO_DSOUND)
 static void handle_dsound(Audiodev *dev)
 {
 get_millis_to_usecs("QEMU_DSOUND_LATENCY_MILLIS",
@@ -228,8 +235,9 @@ static void handle_dsound(Audiodev *dev)
&dev->u.dsound.in->has_buffer_length,
dev->u.dsound.in);
 }
+#endif
 
-/* OSS */
+#if defined(CONFIG_AUDIO_OSS)
 static void handle_oss_per_direction(
 AudiodevOssPerDirectionOptions *opdo, const char *try_poll_env,
 const char *dev_env)
@@ -256,8 +264,9 @@ static void handle_oss(Audiodev *dev)
 get_boo

Re: [PULL 30/43] vt82c686: Fix SMBus IO base and configuration registers

2021-06-24 Thread BALATON Zoltan

On Thu, 24 Jun 2021, Philippe Mathieu-Daudé wrote:

Hi Zoltan,

On 2/21/21 3:34 PM, Philippe Mathieu-Daudé wrote:

From: BALATON Zoltan 

The base address of the SMBus io ports and its enabled status is set
by registers in the PCI config space but this was not correctly
emulated. Instead the SMBus registers were mapped on realize to the
base address set by a property to the address expected by fuloong2e
firmware.

Fix the base and config register handling to more closely model
hardware which allows to remove the property and allows the guest to
control this mapping. Do all this in reset instead of realize so it's
correctly updated on reset.


This commit broken running PMON on Fuloong2E:
https://www.mail-archive.com/qemu-devel@nongnu.org/msg752605.html


By the way, I'm not sure I'm reading this test right but looks like the 
one which claims to test ati-vga has -vga std command line option that 
probably turns off ati-vga so maybe it does not really test it and the one 
which has no -vga option so could test ati-vga is not declaring it. Not 
sure it's a problem just something I've noticed.


Regards,
BALATON Zoltan

RE: [PATCH v3 3/5] ui: Create sync objects and fences only for blobs

2021-06-24 Thread Kasireddy, Vivek
Hi Gerd,

> > >
> > > >  dmabuf->buf.fd = res->dmabuf_fd;
> > > > +dmabuf->buf.blob = true;
> > >
> > > Do you actually need the 'blob' field?
> > > I think checking 'fd' instead should work too.
> > [Kasireddy, Vivek] I want these changes to be limited to blob resources 
> > only as I do not
> > know how they might affect other use-cases or whether they are needed there 
> > or not. I
> > don't think I can rely on fd as vfio/display.c also populates the fd field:
> > dmabuf = g_new0(VFIODMABuf, 1);
> > dmabuf->dmabuf_id  = plane.dmabuf_id;
> > dmabuf->buf.width  = plane.width;
> > dmabuf->buf.height = plane.height;
> > dmabuf->buf.stride = plane.stride;
> > dmabuf->buf.fourcc = plane.drm_format;
> > dmabuf->buf.modifier = plane.drm_format_mod;
> > dmabuf->buf.fd = fd;
> >
> > Therefore, I need a way to identify a dmabuf that is associated with blobs 
> > vs others.
> 
> And it actually is a dma-buf too (the guest display provided by i915 gvt
> mdev driver).  So fencing that should work, right?
[Kasireddy, Vivek] Well, for virtio-gpu, as you know we are adding a dma fence 
to
resource_flush to make it wait until it gets signalled by Qemu. We might have 
to do
to something similar on i915 GVT side but I do not have the hardware to write a 
patch
and test it out -- as i915 GVT is not supported for > Gen 9 platforms.

> 
> Even if we have to restrict it to some kinds of dma-bufs the field
> should have a more descriptive name like "allow_fences".
[Kasireddy, Vivek] I think limiting this to blobs makes sense at the moment. If 
need be,
we can extend it to include dma-bufs generated by i915 GVT later. Let me send a
v4 with your suggestion to change the name.

Thanks,
Vivek
> 
> take care,
>   Gerd




Re: [PULL 30/43] vt82c686: Fix SMBus IO base and configuration registers

2021-06-24 Thread BALATON Zoltan

On Thu, 24 Jun 2021, BALATON Zoltan wrote:

On Thu, 24 Jun 2021, Philippe Mathieu-Daudé wrote:

On 6/24/21 7:00 PM, BALATON Zoltan wrote:

On Thu, 24 Jun 2021, Philippe Mathieu-Daudé wrote:

On 6/24/21 6:16 PM, Philippe Mathieu-Daudé wrote:

On 6/24/21 6:01 PM, Philippe Mathieu-Daudé wrote:

On 6/24/21 5:46 PM, Philippe Mathieu-Daudé wrote:

Hi Zoltan,

On 2/21/21 3:34 PM, Philippe Mathieu-Daudé wrote:

From: BALATON Zoltan 

The base address of the SMBus io ports and its enabled status is set
by registers in the PCI config space but this was not correctly
emulated. Instead the SMBus registers were mapped on realize to the
base address set by a property to the address expected by fuloong2e
firmware.

Fix the base and config register handling to more closely model
hardware which allows to remove the property and allows the guest to
control this mapping. Do all this in reset instead of realize so it's
correctly updated on reset.


This commit broken running PMON on Fuloong2E:
https://www.mail-archive.com/qemu-devel@nongnu.org/msg752605.html
console: PMON2000 MIPS Initializing. Standby...
console: ERRORPC= CONFIG=00030932
console: PRID=6302
console: DIMM read
console: 00ff
console: 00ff
console: 00ff
console: 00ff
console: 00ff
console: 00ff
console: 00ff
console: 00ff
console: 00ff
console: 00ff
...

From here the console loops displaying this value...


Tracing:



pci_cfg_write vt82c686b-pm 05:4 @0x90 <- 0xeee1


Offset 93-90 – SMBus I/O Base ... RW
15-4 I/O Base (16-byte I/O space) default = 00h
pci_cfg_write vt82c686b-pm 05:4 @0x90 <- 0xeee1


pci_cfg_write vt82c686b-pm 05:4 @0xd0 <- 0x1


Offset D2 – SMBus Host Configuration . RW
SMBus Host Controller Enable
0 Disable SMB controller functions . default
1 Enable SMB controller functions
pci_cfg_write vt82c686b-pm 05:4 @0xd0 <- 0x1

Hmm the datasheet indeed document 0xd2... why is the guest accessing
0xd0 to enable the function? It seems this is the problem, since if
I replace d2 -> d0 PMON boots. See below [*].



Expected:

console: PMON2000 MIPS Initializing. Standby...
console: ERRORPC= CONFIG=00030932
console: PRID=6302
console: DIMM read
console: 0080
console: read memory type
console: read number of rows
...



 static void pm_write_config(PCIDevice *d, uint32_t addr, uint32_t
val, int len)
 {
+    VT686PMState *s = VT82C686B_PM(d);
+
 trace_via_pm_write(addr, val, len);
 pci_default_write_config(d, addr, val, len);
+    if (ranges_overlap(addr, len, 0x90, 4)) {
+    uint32_t v = pci_get_long(s->dev.config + 0x90);
+    pci_set_long(s->dev.config + 0x90, (v & 0xfff0UL) | 1);
+    }
+    if (range_covers_byte(addr, len, 0xd2)) {
+    s->dev.config[0xd2] &= 0xf;
+    smb_io_space_update(s);


[*] So the guest writing at 0xd0, this block is skipped, the
I/O region never enabled.


Could it be it does word or dword i/o to access multiple addresses at
once. Wasn't there a recent change to memory regions that could break
this? Is adjusting valid access sizes to the mem region ops needed now
to have the memory region handle this?


Do you mean it was buggy earlier, so to accept a guest write at 0xd0
the code had to handle the 0xd2 address? 0xd2 is the address in the
datasheet, so I doubt.


No, I meant that instead of writing a byte to 0xd2 the guest might write a 
dword to 0xd0 which also overlaps 0xd2 and would change that but it does not 
reach the device for some reason. But in your trace there was:


mr_ops_write mr 0x5583912b2e00 (south-bridge-pci-config) addr 0x1fe80490 
value 0xeee1 size 4
mr_ops_write mr 0x5583912b2e00 (south-bridge-pci-config) addr 0x1fe804d2 
value 0x1 size 2


These are:
pci_cfg_write vt82c686b-pm 05:4 @0x90 <- 0xeee1
pci_cfg_write vt82c686b-pm 05:4 @0xd0 <- 0x1


Where size is 2 so it would not reach 0xd2 but the address part above is 
0x1fe804d2 which somehow comes out as 0xd0 in the PCI trace so looks like 
something strips the low bits within PCI code and the guest does intend to 
access 0xd2 but it's not passed on to the device as such.


Now I remember I've seen this once:

https://lists.nongnu.org/archive/html/qemu-devel/2020-12/msg06299.html

Regards,
BALATON Zoltan

RE: [RFC v1 0/1] ui: Add a Wayland backend for Qemu UI

2021-06-24 Thread Kasireddy, Vivek
Hi Gerd,

> > Why does Qemu need a new Wayland UI backend?
> > The main reason why there needs to be a plain and simple Wayland backend
> > for Qemu UI is to eliminate the Blit (aka GPU copy) that happens if using
> > a toolkit like GTK or SDL (because they use EGL). The Blit can be eliminated
> > by sharing the dmabuf fd -- associated with the Guest scanout buffer --
> > directly with the Host compositor via the linux-dmabuf (unstable) protocol.
> 
> Hmm, that probably means no window decorations (and other UI elements),
[Kasireddy, Vivek] Right, unfortunately, no decorations or other UI elements. 
For
that we can use GTK. 
> right?  Also the code seems to not (yet?) handle mouse and kbd input.
[Kasireddy, Vivek] Yes, kbd and mouse support not added yet and that is why I
tagged it as WIP. But it should not be too hard to add that.

> 
> > The patch(es) are still WIP and the only reason why I am sending them now
> > is to get feedback and see if anyone thinks this work is interesting. And,
> > even after this work is complete, it is not meant to be merged and can be
> > used for performance testing purposes. Given Qemu UI's new direction, the
> > proper way to add new backends is to create a separate UI/display module
> > that is part of the dbus/pipewire infrastructure that Marc-Andre is
> > working on:
> > https://lists.nongnu.org/archive/html/qemu-devel/2021-03/msg04331.html
> 
> Separating emulation and UI has the big advantage that the guest
> lifecycle is decoupled from the desktop session lifecycle, i.e.
> the guest can continue to run when the desktop session ends.
> 
> Works today with spice (when using unix socket to connect it can pass
> dma-buf handles from qemu to spice client).
> 
> Using dbus instead certainly makes sense.  Whenever we'll just go send
> dma-buf handles over dbus or integrate with pipewire for display/sound
> not clear yet.  Marc-André thinks using pipewire doesn't bring benefits
> and I havn't found the time yet to learn more about pipewire ...
[Kasireddy, Vivek] On our side, we'll also try to learn how dbus and pipewire
fit in and work. Having said that, can Marc-Andre's work be merged in 
stages -- first only dbus and no pipewire?

Thanks,
Vivek
> 
> take care,
>   Gerd




Re: [PATCH 6/6] tests/acceptance: Linux boot test for record/replay

2021-06-24 Thread Willian Rampazzo
On Wed, Jun 23, 2021 at 3:45 PM Willian Rampazzo  wrote:
>
> Hi Pavel,
>
> On Thu, Jun 10, 2021 at 8:25 AM Pavel Dovgalyuk
>  wrote:
> >
> > From: Pavel Dovgalyuk 
> >
> > This patch adds a test for record/replay, which boots Linux
> > image from the disk and interacts with the network.
> > The idea and code of this test is borrowed from boot_linux.py
> > This test includes only x86_64 platform. Other platforms and
> > machines will be added later after testing and improving
> > record/replay to completely support them.
> >
> > Each test consists of the following phases:
> >  - downloading the disk image
> >  - recording the execution
> >  - replaying the execution
> >
> > Replay does not validates the output, but waits until QEMU
> > finishes the execution. This is reasonable, because
> > QEMU usually hangs when replay goes wrong.
> >
>
> It took me some time to review this patch because I could not identify
> what makes it an automated test. I mean, when I look at an automated
> test I expect a pass/fail/skip output. I could not identify the
> expected output of this test compared to the actual result. If I did
> not miss anything, this test will always pass unless there is an
> exception that, potentially, could not be related to the record/replay
> mechanism.

I was looking at the current record/replay test, replay_kernel.py and
I noticed you followed the same pattern in this test. Although I do
not agree much with a test that does not have a specific
objective/check, I'm fine if this has value for you.

>
> Also, as far as I could check, you inherit from the LinuxTest class
> but only use the cloudinit methods. Most of the other methods are not
> used or overridden. In this case, I think it is worth splitting the
> LinuxTest with a new mixin utility class to handle the cloudinit part.
> If you need help with that, let me know.

As this is more related to code design, I can split the cloudinit code
later and adjust your code.
In this case,

Reviewed-by: Willian Rampazzo 

>
> > Signed-off-by: Pavel Dovgalyuk 
> > ---
> >  MAINTAINERS  |1
> >  tests/acceptance/replay_linux.py |  116 
> > ++
> >  2 files changed, 117 insertions(+)
> >  create mode 100644 tests/acceptance/replay_linux.py
> >
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index 7d9cd29042..9675a1095b 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -2863,6 +2863,7 @@ F: include/sysemu/replay.h
> >  F: docs/replay.txt
> >  F: stubs/replay.c
> >  F: tests/acceptance/replay_kernel.py
> > +F: tests/acceptance/replay_linux.py
> >  F: tests/acceptance/reverse_debugging.py
> >  F: qapi/replay.json
> >
> > diff --git a/tests/acceptance/replay_linux.py 
> > b/tests/acceptance/replay_linux.py
> > new file mode 100644
> > index 00..15953f9e49
> > --- /dev/null
> > +++ b/tests/acceptance/replay_linux.py
> > @@ -0,0 +1,116 @@
> > +# Record/replay test that boots a complete Linux system via a cloud image
> > +#
> > +# Copyright (c) 2020 ISP RAS
> > +#
> > +# Author:
> > +#  Pavel Dovgalyuk 
> > +#
> > +# This work is licensed under the terms of the GNU GPL, version 2 or
> > +# later.  See the COPYING file in the top-level directory.
> > +
> > +import os
> > +import logging
> > +import time
> > +
> > +from avocado import skipUnless
> > +from avocado.utils import cloudinit
> > +from avocado.utils import network
> > +from avocado.utils import vmimage
> > +from avocado.utils import datadrainer
> > +from avocado.utils.path import find_command
> > +from avocado_qemu import LinuxTest
> > +
> > +class ReplayLinux(LinuxTest):
> > +"""
> > +Boots a Linux system, checking for a successful initialization
> > +"""
> > +
> > +timeout = 1800
> > +chksum = None
> > +hdd = 'ide-hd'
> > +cd = 'ide-cd'
> > +bus = 'ide'
> > +
> > +def setUp(self):
> > +super(ReplayLinux, self).setUp()
> > +self.boot_path = self.download_boot()
> > +self.cloudinit_path = self.prepare_cloudinit()
> > +
> > +def vm_add_disk(self, vm, path, id, device):
> > +bus_string = ''
> > +if self.bus:
> > +bus_string = ',bus=%s.%d' % (self.bus, id,)
> > +vm.add_args('-drive', 'file=%s,snapshot,id=disk%s,if=none' % 
> > (path, id))
> > +vm.add_args('-drive',
> > +'driver=blkreplay,id=disk%s-rr,if=none,image=disk%s' % (id, 
> > id))
> > +vm.add_args('-device',
> > +'%s,drive=disk%s-rr%s' % (device, id, bus_string))
> > +
> > +def launch_and_wait(self, record, args, shift):
> > +vm = self.get_vm()
> > +vm.add_args('-smp', '1')
> > +vm.add_args('-m', '1024')
> > +vm.add_args('-object', 'filter-replay,id=replay,netdev=hub0port0')
> > +if args:
> > +vm.add_args(*args)
> > +self.vm_add_disk(vm, self.boot_path, 0, self.hdd)
> > +self.vm_add_disk(vm, self.cloudinit_path, 1, self.cd)
> > +logger = logging.getLogger('repla

[PATCH 07/11] block: feature detection for host block support

2021-06-24 Thread Paolo Bonzini
From: Joelle van Dyne 

On Darwin (iOS), there are no system level APIs for directly accessing
host block devices. We detect this at configure time.

Signed-off-by: Joelle van Dyne 
Message-Id: <20210315180341.31638-...@getutm.app>
Signed-off-by: Paolo Bonzini 
---
 block/file-posix.c   | 33 ++---
 meson.build  |  6 +-
 qapi/block-core.json | 14 ++
 3 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index ea102483b0..e56bb491a1 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -42,6 +42,8 @@
 #include "scsi/constants.h"
 
 #if defined(__APPLE__) && (__MACH__)
+#include 
+#if defined(HAVE_HOST_BLOCK_DEVICE)
 #include 
 #include 
 #include 
@@ -52,6 +54,7 @@
 //#include 
 #include 
 #include 
+#endif /* defined(HAVE_HOST_BLOCK_DEVICE) */
 #endif
 
 #ifdef __sun__
@@ -178,7 +181,17 @@ typedef struct BDRVRawReopenState {
 bool check_cache_dropped;
 } BDRVRawReopenState;
 
-static int fd_open(BlockDriverState *bs);
+static int fd_open(BlockDriverState *bs)
+{
+BDRVRawState *s = bs->opaque;
+
+/* this is just to ensure s->fd is sane (its called by io ops) */
+if (s->fd >= 0) {
+return 0;
+}
+return -EIO;
+}
+
 static int64_t raw_getlength(BlockDriverState *bs);
 
 typedef struct RawPosixAIOData {
@@ -3033,6 +3046,7 @@ static BlockStatsSpecific 
*raw_get_specific_stats(BlockDriverState *bs)
 return stats;
 }
 
+#if defined(HAVE_HOST_BLOCK_DEVICE)
 static BlockStatsSpecific *hdev_get_specific_stats(BlockDriverState *bs)
 {
 BlockStatsSpecific *stats = g_new(BlockStatsSpecific, 1);
@@ -3042,6 +3056,7 @@ static BlockStatsSpecific 
*hdev_get_specific_stats(BlockDriverState *bs)
 
 return stats;
 }
+#endif /* HAVE_HOST_BLOCK_DEVICE */
 
 static QemuOptsList raw_create_opts = {
 .name = "raw-create-opts",
@@ -3257,6 +3272,8 @@ BlockDriver bdrv_file = {
 /***/
 /* host device */
 
+#if defined(HAVE_HOST_BLOCK_DEVICE)
+
 #if defined(__APPLE__) && defined(__MACH__)
 static kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath,
 CFIndex maxPathSize, int flags);
@@ -3549,16 +3566,6 @@ hdev_co_ioctl(BlockDriverState *bs, unsigned long int 
req, void *buf)
 }
 #endif /* linux */
 
-static int fd_open(BlockDriverState *bs)
-{
-BDRVRawState *s = bs->opaque;
-
-/* this is just to ensure s->fd is sane (its called by io ops) */
-if (s->fd >= 0)
-return 0;
-return -EIO;
-}
-
 static coroutine_fn int
 hdev_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
 {
@@ -3882,6 +3889,8 @@ static BlockDriver bdrv_host_cdrom = {
 };
 #endif /* __FreeBSD__ */
 
+#endif /* HAVE_HOST_BLOCK_DEVICE */
+
 static void bdrv_file_init(void)
 {
 /*
@@ -3889,6 +3898,7 @@ static void bdrv_file_init(void)
  * registered last will get probed first.
  */
 bdrv_register(&bdrv_file);
+#if defined(HAVE_HOST_BLOCK_DEVICE)
 bdrv_register(&bdrv_host_device);
 #ifdef __linux__
 bdrv_register(&bdrv_host_cdrom);
@@ -3896,6 +3906,7 @@ static void bdrv_file_init(void)
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
 bdrv_register(&bdrv_host_cdrom);
 #endif
+#endif /* HAVE_HOST_BLOCK_DEVICE */
 }
 
 block_init(bdrv_file_init);
diff --git a/meson.build b/meson.build
index 62de7ac106..bb3a5be796 100644
--- a/meson.build
+++ b/meson.build
@@ -183,7 +183,7 @@ if targetos == 'windows'
   include_directories: 
include_directories('.'))
 elif targetos == 'darwin'
   coref = dependency('appleframeworks', modules: 'CoreFoundation')
-  iokit = dependency('appleframeworks', modules: 'IOKit')
+  iokit = dependency('appleframeworks', modules: 'IOKit', required: false)
 elif targetos == 'sunos'
   socket = [cc.find_library('socket'),
 cc.find_library('nsl'),
@@ -1147,6 +1147,9 @@ if get_option('cfi')
   add_global_link_arguments(cfi_flags, native: false, language: ['c', 'cpp', 
'objc'])
 endif
 
+have_host_block_device = (targetos != 'darwin' or
+cc.has_header('IOKit/storage/IOMedia.h'))
+
 #
 # config-host.h #
 #
@@ -1246,6 +1249,7 @@ config_host_data.set('HAVE_PTY_H', cc.has_header('pty.h'))
 config_host_data.set('HAVE_SYS_IOCCOM_H', cc.has_header('sys/ioccom.h'))
 config_host_data.set('HAVE_SYS_KCOV_H', cc.has_header('sys/kcov.h'))
 config_host_data.set('HAVE_SYSTEM_FUNCTION', cc.has_function('system', prefix: 
'#include '))
+config_host_data.set('HAVE_HOST_BLOCK_DEVICE', have_host_block_device)
 
 config_host_data.set('CONFIG_PREADV', cc.has_function('preadv', prefix: 
'#include '))
 
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 2ea294129e..a54f37dbef 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -897,7 +897,8 @@
   'discriminator': 'driver',
   'data': {
   'file': 'BlockStatsSpecificFile',
-  'host_device': 'BlockStatsSpecificFile',

[PATCH 08/11] block: check for sys/disk.h

2021-06-24 Thread Paolo Bonzini
From: Joelle van Dyne 

Some BSD platforms do not have this header.

Reviewed-by: Peter Maydell 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Joelle van Dyne 
Message-Id: <20210315180341.31638-...@getutm.app>
Reviewed-by: Max Reitz 
Signed-off-by: Paolo Bonzini 
---
 block.c | 2 +-
 meson.build | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/block.c b/block.c
index 3f456892d0..1d37f133a8 100644
--- a/block.c
+++ b/block.c
@@ -54,7 +54,7 @@
 #ifdef CONFIG_BSD
 #include 
 #include 
-#ifndef __DragonFly__
+#if defined(HAVE_SYS_DISK_H)
 #include 
 #endif
 #endif
diff --git a/meson.build b/meson.build
index bb3a5be796..a95a9fbcbf 100644
--- a/meson.build
+++ b/meson.build
@@ -1250,6 +1250,7 @@ config_host_data.set('HAVE_SYS_IOCCOM_H', 
cc.has_header('sys/ioccom.h'))
 config_host_data.set('HAVE_SYS_KCOV_H', cc.has_header('sys/kcov.h'))
 config_host_data.set('HAVE_SYSTEM_FUNCTION', cc.has_function('system', prefix: 
'#include '))
 config_host_data.set('HAVE_HOST_BLOCK_DEVICE', have_host_block_device)
+config_host_data.set('HAVE_SYS_DISK_H', cc.has_header('sys/disk.h'))
 
 config_host_data.set('CONFIG_PREADV', cc.has_function('preadv', prefix: 
'#include '))
 
-- 
2.31.1





[PATCH 11/11] file-posix: handle EINTR during ioctl

2021-06-24 Thread Paolo Bonzini
Similar to other handle_aiocb_* functions, handle_aiocb_ioctl needs to cater
for the possibility that ioctl is interrupted by a signal.  Otherwise, the
I/O is incorrectly reported as a failure to the guest.

Reported-by: Gordon Watson 
Signed-off-by: Paolo Bonzini 
---
 block/file-posix.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index 74b8216077..a26eab0ac3 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1347,7 +1347,9 @@ static int handle_aiocb_ioctl(void *opaque)
 RawPosixAIOData *aiocb = opaque;
 int ret;
 
-ret = ioctl(aiocb->aio_fildes, aiocb->ioctl.cmd, aiocb->ioctl.buf);
+do {
+ret = ioctl(aiocb->aio_fildes, aiocb->ioctl.cmd, aiocb->ioctl.buf);
+} while (ret == -1 && errno == EINTR);
 if (ret == -1) {
 return -errno;
 }
-- 
2.31.1




[PATCH 09/11] block: try BSD disk size ioctls one after another

2021-06-24 Thread Paolo Bonzini
Try all the possible ioctls for disk size as long as they are
supported, to keep the #if ladder simple.

Extracted and cleaned up from a patch by Joelle van Dyne and
Warner Losh.

Signed-off-by: Paolo Bonzini 
---
 block/file-posix.c | 34 --
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index e56bb491a1..f16d987c07 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -2327,39 +2327,37 @@ static int64_t raw_getlength(BlockDriverState *bs)
 again:
 #endif
 if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
+size = 0;
 #ifdef DIOCGMEDIASIZE
-if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
-#elif defined(DIOCGPART)
-{
-struct partinfo pi;
-if (ioctl(fd, DIOCGPART, &pi) == 0)
-size = pi.media_size;
-else
-size = 0;
+if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size)) {
+size = 0;
+}
+#endif
+#ifdef DIOCGPART
+if (size == 0) {
+struct partinfo pi;
+if (ioctl(fd, DIOCGPART, &pi) == 0) {
+size = pi.media_size;
+}
 }
-if (size == 0)
 #endif
 #if defined(__APPLE__) && defined(__MACH__)
-{
+if (size == 0) {
 uint64_t sectors = 0;
 uint32_t sector_size = 0;
 
 if (ioctl(fd, DKIOCGETBLOCKCOUNT, §ors) == 0
&& ioctl(fd, DKIOCGETBLOCKSIZE, §or_size) == 0) {
 size = sectors * sector_size;
-} else {
-size = lseek(fd, 0LL, SEEK_END);
-if (size < 0) {
-return -errno;
-}
 }
 }
-#else
-size = lseek(fd, 0LL, SEEK_END);
+#endif
+if (size == 0) {
+size = lseek(fd, 0LL, SEEK_END);
+}
 if (size < 0) {
 return -errno;
 }
-#endif
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
 switch(s->type) {
 case FTYPE_CD:
-- 
2.31.1





[PATCH 10/11] block: detect DKIOCGETBLOCKCOUNT/SIZE before use

2021-06-24 Thread Paolo Bonzini
From: Joelle van Dyne 

iOS hosts do not have these defined so we fallback to the
default behaviour.

Co-authored-by: Warner Losh 
Signed-off-by: Joelle van Dyne 
Signed-off-by: Paolo Bonzini 
---
 block/file-posix.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index f16d987c07..74b8216077 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -2341,7 +2341,7 @@ again:
 }
 }
 #endif
-#if defined(__APPLE__) && defined(__MACH__)
+#if defined(DKIOCGETBLOCKCOUNT) && defined(DKIOCGETBLOCKSIZE)
 if (size == 0) {
 uint64_t sectors = 0;
 uint32_t sector_size = 0;
-- 
2.31.1





[PATCH 05/11] block: add max_hw_transfer to BlockLimits

2021-06-24 Thread Paolo Bonzini
For block host devices, I/O can happen through either the kernel file
descriptor I/O system calls (preadv/pwritev, io_submit, io_uring)
or the SCSI passthrough ioctl SG_IO.

In the latter case, the size of each transfer can be limited by the
HBA, while for file descriptor I/O the kernel is able to split and
merge I/O in smaller pieces as needed.  Applying the HBA limits to
file descriptor I/O results in more system calls and suboptimal
performance, so this patch splits the max_transfer limit in two:
max_transfer remains valid and is used in general, while max_hw_transfer
is limited to the maximum hardware size.  max_hw_transfer can then be
included by the scsi-generic driver in the block limits page, to ensure
that the stricter hardware limit is used.

Signed-off-by: Paolo Bonzini 
---
 block/block-backend.c  | 13 +
 block/file-posix.c |  2 +-
 block/io.c |  2 ++
 hw/scsi/scsi-generic.c |  2 +-
 include/block/block_int.h  |  7 +++
 include/sysemu/block-backend.h |  1 +
 6 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 6e37582740..deb55c272e 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1953,6 +1953,19 @@ uint32_t blk_get_request_alignment(BlockBackend *blk)
 return bs ? bs->bl.request_alignment : BDRV_SECTOR_SIZE;
 }
 
+/* Returns the maximum hardware transfer length, in bytes; guaranteed nonzero 
*/
+uint64_t blk_get_max_hw_transfer(BlockBackend *blk)
+{
+BlockDriverState *bs = blk_bs(blk);
+uint64_t max = INT_MAX;
+
+if (bs) {
+max = MIN_NON_ZERO(max, bs->bl.max_hw_transfer);
+max = MIN_NON_ZERO(max, bs->bl.max_transfer);
+}
+return ROUND_DOWN(max, blk_get_request_alignment(blk));
+}
+
 /* Returns the maximum transfer length, in bytes; guaranteed nonzero */
 uint32_t blk_get_max_transfer(BlockBackend *blk)
 {
diff --git a/block/file-posix.c b/block/file-posix.c
index 6db690baf2..88e58d2863 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1232,7 +1232,7 @@ static void raw_refresh_limits(BlockDriverState *bs, 
Error **errp)
 int ret = sg_get_max_transfer_length(s->fd);
 
 if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
-bs->bl.max_transfer = pow2floor(ret);
+bs->bl.max_hw_transfer = pow2floor(ret);
 }
 
 ret = sg_get_max_segments(s->fd);
diff --git a/block/io.c b/block/io.c
index 323854d063..dd93364258 100644
--- a/block/io.c
+++ b/block/io.c
@@ -127,6 +127,8 @@ static void bdrv_merge_limits(BlockLimits *dst, const 
BlockLimits *src)
 {
 dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
 dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer);
+dst->max_hw_transfer = MIN_NON_ZERO(dst->max_hw_transfer,
+src->max_hw_transfer);
 dst->opt_mem_alignment = MAX(dst->opt_mem_alignment,
  src->opt_mem_alignment);
 dst->min_mem_alignment = MAX(dst->min_mem_alignment,
diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
index b6c4143dc7..665baf900e 100644
--- a/hw/scsi/scsi-generic.c
+++ b/hw/scsi/scsi-generic.c
@@ -179,7 +179,7 @@ static int scsi_handle_inquiry_reply(SCSIGenericReq *r, 
SCSIDevice *s, int len)
 (r->req.cmd.buf[1] & 0x01)) {
 page = r->req.cmd.buf[2];
 if (page == 0xb0) {
-uint32_t max_transfer = blk_get_max_transfer(s->conf.blk);
+uint64_t max_transfer = blk_get_max_hw_transfer(s->conf.blk);
 uint32_t max_iov = blk_get_max_iov(s->conf.blk);
 
 assert(max_transfer);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 057d88b1fc..f1a54db0f8 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -695,6 +695,13 @@ typedef struct BlockLimits {
  * clamped down. */
 uint32_t max_transfer;
 
+/* Maximal hardware transfer length in bytes.  Applies whenever
+ * transfers to the device bypass the kernel I/O scheduler, for
+ * example with SG_IO.  If larger than max_transfer or if zero,
+ * blk_get_max_hw_transfer will fall back to max_transfer.
+ */
+uint64_t max_hw_transfer;
+
 /* memory alignment, in bytes so that no bounce buffer is needed */
 size_t min_mem_alignment;
 
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 5423e3d9c6..9ac5f7bbd3 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -208,6 +208,7 @@ void blk_eject(BlockBackend *blk, bool eject_flag);
 int blk_get_flags(BlockBackend *blk);
 uint32_t blk_get_request_alignment(BlockBackend *blk);
 uint32_t blk_get_max_transfer(BlockBackend *blk);
+uint64_t blk_get_max_hw_transfer(BlockBackend *blk);
 int blk_get_max_iov(BlockBackend *blk);
 void blk_set_guest_block_size(BlockBackend *blk, int align);
 void *blk_try_blockalign(BlockBackend *blk, size_t

[PATCH 04/11] block-backend: align max_transfer to request alignment

2021-06-24 Thread Paolo Bonzini
Block device requests must be aligned to bs->bl.request_alignment.
It makes sense for drivers to align bs->bl.max_transfer the same
way; however when there is no specified limit, blk_get_max_transfer
just returns INT_MAX.  Since the contract of the function does not
specify that INT_MAX means "no maximum", just align the outcome
of the function (whether INT_MAX or bs->bl.max_transfer) before
returning it.

Signed-off-by: Paolo Bonzini 
---
 block/block-backend.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 15f1ea4288..6e37582740 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1957,12 +1957,12 @@ uint32_t blk_get_request_alignment(BlockBackend *blk)
 uint32_t blk_get_max_transfer(BlockBackend *blk)
 {
 BlockDriverState *bs = blk_bs(blk);
-uint32_t max = 0;
+uint32_t max = INT_MAX;
 
 if (bs) {
-max = bs->bl.max_transfer;
+max = MIN_NON_ZERO(max, bs->bl.max_transfer);
 }
-return MIN_NON_ZERO(max, INT_MAX);
+return ROUND_DOWN(max, blk_get_request_alignment(blk));
 }
 
 int blk_get_max_iov(BlockBackend *blk)
-- 
2.31.1





[PATCH 06/11] file-posix: try BLKSECTGET on block devices too, do not round to power of 2

2021-06-24 Thread Paolo Bonzini
bs->sg is only true for character devices, but block devices can also
be used with scsi-block and scsi-generic.  Unfortunately BLKSECTGET
returns bytes in an int for /dev/sgN devices, and sectors in a short
for block devices, so account for that in the code.

The maximum transfer also need not be a power of 2 (for example I have
seen disks with 1280 KiB maximum transfer) so there's no need to pass
the result through pow2floor.

Signed-off-by: Paolo Bonzini 
---
 block/file-posix.c | 67 ++
 1 file changed, 38 insertions(+), 29 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index 88e58d2863..ea102483b0 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1147,22 +1147,27 @@ static void raw_reopen_abort(BDRVReopenState *state)
 s->reopen_state = NULL;
 }
 
-static int sg_get_max_transfer_length(int fd)
+static int hdev_get_max_hw_transfer(int fd, struct stat *st)
 {
 #ifdef BLKSECTGET
-int max_bytes = 0;
-
-if (ioctl(fd, BLKSECTGET, &max_bytes) == 0) {
-return max_bytes;
+if (S_ISBLK(st->st_mode)) {
+unsigned short max_sectors = 0;
+if (ioctl(fd, BLKSECTGET, &max_sectors) == 0) {
+return max_sectors * 512;
+}
 } else {
-return -errno;
+int max_bytes = 0;
+if (ioctl(fd, BLKSECTGET, &max_bytes) == 0) {
+return max_bytes;
+}
 }
+return -errno;
 #else
 return -ENOSYS;
 #endif
 }
 
-static int sg_get_max_segments(int fd)
+static int hdev_get_max_segments(int fd, struct stat *st)
 {
 #ifdef CONFIG_LINUX
 char buf[32];
@@ -1171,26 +1176,20 @@ static int sg_get_max_segments(int fd)
 int ret;
 int sysfd = -1;
 long max_segments;
-struct stat st;
 
-if (fstat(fd, &st)) {
-ret = -errno;
-goto out;
-}
-
-if (S_ISCHR(st.st_mode)) {
+if (S_ISCHR(st->st_mode)) {
 if (ioctl(fd, SG_GET_SG_TABLESIZE, &ret) == 0) {
 return ret;
 }
 return -ENOTSUP;
 }
 
-if (!S_ISBLK(st.st_mode)) {
+if (!S_ISBLK(st->st_mode)) {
 return -ENOTSUP;
 }
 
 sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments",
-major(st.st_rdev), minor(st.st_rdev));
+major(st->st_rdev), minor(st->st_rdev));
 sysfd = open(sysfspath, O_RDONLY);
 if (sysfd == -1) {
 ret = -errno;
@@ -1227,23 +1226,33 @@ out:
 static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
 {
 BDRVRawState *s = bs->opaque;
-
-if (bs->sg) {
-int ret = sg_get_max_transfer_length(s->fd);
-
-if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
-bs->bl.max_hw_transfer = pow2floor(ret);
-}
-
-ret = sg_get_max_segments(s->fd);
-if (ret > 0) {
-bs->bl.max_iov = ret;
-}
-}
+struct stat st;
 
 raw_probe_alignment(bs, s->fd, errp);
 bs->bl.min_mem_alignment = s->buf_align;
 bs->bl.opt_mem_alignment = MAX(s->buf_align, qemu_real_host_page_size);
+
+/*
+ * Maximum transfers are best effort, so it is okay to ignore any
+ * errors.  That said, based on the man page errors in fstat would be
+ * very much unexpected; the only possible case seems to be ENOMEM.
+ */
+if (fstat(s->fd, &st)) {
+return;
+}
+
+if (bs->sg || S_ISBLK(st.st_mode)) {
+int ret = hdev_get_max_hw_transfer(s->fd, &st);
+
+if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
+bs->bl.max_hw_transfer = ret;
+}
+
+ret = hdev_get_max_segments(s->fd, &st);
+if (ret > 0) {
+bs->bl.max_iov = ret;
+}
+}
 }
 
 static int check_for_dasd(int fd)
-- 
2.31.1





[PATCH v5 00/11] block: file-posix queue

2021-06-24 Thread Paolo Bonzini
New patches:
- 3/4 (for review comments),
- 9 (split for ease of review),
- 11 (new bugfix)

v1->v2: add missing patch

v2->v3: add max_hw_transfer to BlockLimits

v3->v4: fix compilation after patch 1, tweak commit messages according
to Vladimir's review

v4->v5: round down max_transfer and max_hw_transfer to request alignment
checkpatch fixes
return -ENOTSUP, -not -EIO if block limits ioctls fail
handle host_cdrom like host_device in QAPI
split "block: try BSD disk size ioctls one after another"
new bugfix patch "file-posix: handle EINTR during ioctl"

Joelle van Dyne (3):
  block: feature detection for host block support
  block: check for sys/disk.h
  block: detect DKIOCGETBLOCKCOUNT/SIZE before use

Paolo Bonzini (8):
  file-posix: fix max_iov for /dev/sg devices
  scsi-generic: pass max_segments via max_iov field in BlockLimits
  osdep: provide ROUND_DOWN macro
  block-backend: align max_transfer to request alignment
  block: add max_hw_transfer to BlockLimits
  file-posix: try BLKSECTGET on block devices too, do not round to power of 2
  block: try BSD disk size ioctls one after another
  file-posix: handle EINTR during ioctl

 block.c|   2 +-
 block/block-backend.c  |  19 -
 block/file-posix.c | 144 -
 block/io.c |   2 +
 hw/scsi/scsi-generic.c |   6 +-
 include/block/block_int.h  |   7 ++
 include/qemu/osdep.h   |  28 +--
 include/sysemu/block-backend.h |   1 +
 meson.build|   7 +-
 qapi/block-core.json   |  14 +++-
 10 files changed, 156 insertions(+), 74 deletions(-)

-- 
2.31.1




[PATCH 03/11] osdep: provide ROUND_DOWN macro

2021-06-24 Thread Paolo Bonzini
osdep.h provides a ROUND_UP macro to hide bitwise operations for the
purpose of rounding a number up to a power of two; add a ROUND_DOWN
macro that does the same with truncation towards zero.

While at it, change the formatting of some comments.

Signed-off-by: Paolo Bonzini 
---
 include/qemu/osdep.h | 28 ++--
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 0a54bf7be8..c3656b755a 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -319,11 +319,16 @@ extern "C" {
 })
 #endif
 
-/* Round number down to multiple */
+/*
+ * Round number down to multiple. Safe when m is not a power of 2 (see
+ * ROUND_DOWN for a faster version when a power of 2 is guaranteed).
+ */
 #define QEMU_ALIGN_DOWN(n, m) ((n) / (m) * (m))
 
-/* Round number up to multiple. Safe when m is not a power of 2 (see
- * ROUND_UP for a faster version when a power of 2 is guaranteed) */
+/*
+ * Round number up to multiple. Safe when m is not a power of 2 (see
+ * ROUND_UP for a faster version when a power of 2 is guaranteed).
+ */
 #define QEMU_ALIGN_UP(n, m) QEMU_ALIGN_DOWN((n) + (m) - 1, (m))
 
 /* Check if n is a multiple of m */
@@ -340,11 +345,22 @@ extern "C" {
 /* Check if pointer p is n-bytes aligned */
 #define QEMU_PTR_IS_ALIGNED(p, n) QEMU_IS_ALIGNED((uintptr_t)(p), (n))
 
-/* Round number up to multiple. Requires that d be a power of 2 (see
+/*
+ * Round number down to multiple. Requires that d be a power of 2 (see
  * QEMU_ALIGN_UP for a safer but slower version on arbitrary
- * numbers); works even if d is a smaller type than n.  */
+ * numbers); works even if d is a smaller type than n.
+ */
+#ifndef ROUND_DOWN
+#define ROUND_DOWN(n, d) ((n) & -(0 ? (n) : (d)))
+#endif
+
+/*
+ * Round number up to multiple. Requires that d be a power of 2 (see
+ * QEMU_ALIGN_UP for a safer but slower version on arbitrary
+ * numbers); works even if d is a smaller type than n.
+ */
 #ifndef ROUND_UP
-#define ROUND_UP(n, d) (((n) + (d) - 1) & -(0 ? (n) : (d)))
+#define ROUND_UP(n, d) ROUND_DOWN((n) + (d) - 1, (d))
 #endif
 
 #ifndef DIV_ROUND_UP
-- 
2.31.1





[PATCH 01/11] file-posix: fix max_iov for /dev/sg devices

2021-06-24 Thread Paolo Bonzini
Even though it was only called for devices that have bs->sg set (which
must be character devices), sg_get_max_segments looked at /sys/dev/block
which only works for block devices.

On Linux the sg driver has its own way to provide the maximum number of
iovecs in a scatter/gather list, so add support for it.  The block device
path is kept because it will be reinstated in the next patches.

Signed-off-by: Paolo Bonzini 
Reviewed-by: Max Reitz 
---
 block/file-posix.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/block/file-posix.c b/block/file-posix.c
index b3fbb9bd63..b8dc19ce1a 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1178,6 +1178,17 @@ static int sg_get_max_segments(int fd)
 goto out;
 }
 
+if (S_ISCHR(st.st_mode)) {
+if (ioctl(fd, SG_GET_SG_TABLESIZE, &ret) == 0) {
+return ret;
+}
+return -ENOTSUP;
+}
+
+if (!S_ISBLK(st.st_mode)) {
+return -ENOTSUP;
+}
+
 sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments",
 major(st.st_rdev), minor(st.st_rdev));
 sysfd = open(sysfspath, O_RDONLY);
-- 
2.31.1





[PATCH 02/11] scsi-generic: pass max_segments via max_iov field in BlockLimits

2021-06-24 Thread Paolo Bonzini
I/O to a disk via read/write is not limited by the number of segments allowed
by the host adapter; the kernel can split requests if needed, and the limit
imposed by the host adapter can be very low (256k or so) to avoid that SG_IO
returns EINVAL if memory is heavily fragmented.

Since this value is only interesting for SG_IO-based I/O, do not include
it in the max_transfer and only take it into account when patching the
block limits VPD page in the scsi-generic device.

Signed-off-by: Paolo Bonzini 
Reviewed-by: Max Reitz 
---
 block/file-posix.c | 3 +--
 hw/scsi/scsi-generic.c | 6 --
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index b8dc19ce1a..6db690baf2 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1237,8 +1237,7 @@ static void raw_refresh_limits(BlockDriverState *bs, 
Error **errp)
 
 ret = sg_get_max_segments(s->fd);
 if (ret > 0) {
-bs->bl.max_transfer = MIN(bs->bl.max_transfer,
-  ret * qemu_real_host_page_size);
+bs->bl.max_iov = ret;
 }
 }
 
diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
index 40e039864f..b6c4143dc7 100644
--- a/hw/scsi/scsi-generic.c
+++ b/hw/scsi/scsi-generic.c
@@ -179,10 +179,12 @@ static int scsi_handle_inquiry_reply(SCSIGenericReq *r, 
SCSIDevice *s, int len)
 (r->req.cmd.buf[1] & 0x01)) {
 page = r->req.cmd.buf[2];
 if (page == 0xb0) {
-uint32_t max_transfer =
-blk_get_max_transfer(s->conf.blk) / s->blocksize;
+uint32_t max_transfer = blk_get_max_transfer(s->conf.blk);
+uint32_t max_iov = blk_get_max_iov(s->conf.blk);
 
 assert(max_transfer);
+max_transfer = MIN_NON_ZERO(max_transfer, max_iov * 
qemu_real_host_page_size)
+/ s->blocksize;
 stl_be_p(&r->buf[8], max_transfer);
 /* Also take care of the opt xfer len. */
 stl_be_p(&r->buf[12],
-- 
2.31.1





Re: [PATCH v4 00/34] modules: add meta-data database

2021-06-24 Thread Dr. David Alan Gilbert
* Gerd Hoffmann (kra...@redhat.com) wrote:
> On Thu, Jun 24, 2021 at 04:01:25PM +0100, Dr. David Alan Gilbert wrote:
> > * Gerd Hoffmann (kra...@redhat.com) wrote:
> > > This patch series adds support for module meta-data.  Today this is
> > > either hard-coded in qemu (see qemu_load_module_for_opts) or handled
> > > with manually maintained lists in util/module (see module_deps[] and
> > > qom_modules[]).  This series replaced that scheme with annotation
> > > macros, so the meta-data can go into the module source code and -- for
> > > example -- the module_obj() annotations can go next to the TypeInfo
> > > struct for the object class.
> > 
> > So this is slightly off-topic for the series; but kind of relevant,
> > but...
> > Is there a way to inhibit module loading after a given point?
> 
> We could block loading after machine initialization.
> Has implications for hotplug though.

Yes; I was thinking perhaps a command to explicitly disable autoloading
if people worried about it.

> > I ask, because there's a fairly well known security escalation that
> > takes advantage of NSS loading of PAM modules; typically you have
> > your nice sandboxed application, you write out your nasty .so into the
> > sandbox and then somehow get your application to trigger the PAM module
> > load.
> > Now, what stops the same attack here?
> 
> Placing a new .so at some random directory wouldn't work, qemu only
> loads modules from the search path (but I guess the same is true for
> pam).

Yes, I'm failing to find the CVE I vaguely remember about the details of
how it was messed up.

Dave

> With this patch series applied all modules are listed the in modinfo.c
> database (even if we don't have any metadata about them), so we could
> easily limit loading to modules known at compile time.  Not sure how
> much that alone would improve security though, when the attacker is able
> to write to the qemu module directory it isn't much of a problem to just
> overwrite one of the existing modules.
> 
> We could try work with hashes or signatures stored in modinfo ...
> 
> take care,
>   Gerd
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK




Re: [PULL 30/43] vt82c686: Fix SMBus IO base and configuration registers

2021-06-24 Thread BALATON Zoltan

On Thu, 24 Jun 2021, Philippe Mathieu-Daudé wrote:

On 6/24/21 7:00 PM, BALATON Zoltan wrote:

On Thu, 24 Jun 2021, Philippe Mathieu-Daudé wrote:

On 6/24/21 6:16 PM, Philippe Mathieu-Daudé wrote:

On 6/24/21 6:01 PM, Philippe Mathieu-Daudé wrote:

On 6/24/21 5:46 PM, Philippe Mathieu-Daudé wrote:

Hi Zoltan,

On 2/21/21 3:34 PM, Philippe Mathieu-Daudé wrote:

From: BALATON Zoltan 

The base address of the SMBus io ports and its enabled status is set
by registers in the PCI config space but this was not correctly
emulated. Instead the SMBus registers were mapped on realize to the
base address set by a property to the address expected by fuloong2e
firmware.

Fix the base and config register handling to more closely model
hardware which allows to remove the property and allows the guest to
control this mapping. Do all this in reset instead of realize so it's
correctly updated on reset.


This commit broken running PMON on Fuloong2E:
https://www.mail-archive.com/qemu-devel@nongnu.org/msg752605.html
console: PMON2000 MIPS Initializing. Standby...
console: ERRORPC= CONFIG=00030932
console: PRID=6302
console: DIMM read
console: 00ff
console: 00ff
console: 00ff
console: 00ff
console: 00ff
console: 00ff
console: 00ff
console: 00ff
console: 00ff
console: 00ff
...

From here the console loops displaying this value...


Tracing:



pci_cfg_write vt82c686b-pm 05:4 @0x90 <- 0xeee1


Offset 93-90 – SMBus I/O Base ... RW
15-4 I/O Base (16-byte I/O space) default = 00h
pci_cfg_write vt82c686b-pm 05:4 @0x90 <- 0xeee1


pci_cfg_write vt82c686b-pm 05:4 @0xd0 <- 0x1


Offset D2 – SMBus Host Configuration . RW
SMBus Host Controller Enable
0 Disable SMB controller functions . default
1 Enable SMB controller functions
pci_cfg_write vt82c686b-pm 05:4 @0xd0 <- 0x1

Hmm the datasheet indeed document 0xd2... why is the guest accessing
0xd0 to enable the function? It seems this is the problem, since if
I replace d2 -> d0 PMON boots. See below [*].



Expected:

console: PMON2000 MIPS Initializing. Standby...
console: ERRORPC= CONFIG=00030932
console: PRID=6302
console: DIMM read
console: 0080
console: read memory type
console: read number of rows
...



 static void pm_write_config(PCIDevice *d, uint32_t addr, uint32_t
val, int len)
 {
+    VT686PMState *s = VT82C686B_PM(d);
+
 trace_via_pm_write(addr, val, len);
 pci_default_write_config(d, addr, val, len);
+    if (ranges_overlap(addr, len, 0x90, 4)) {
+    uint32_t v = pci_get_long(s->dev.config + 0x90);
+    pci_set_long(s->dev.config + 0x90, (v & 0xfff0UL) | 1);
+    }
+    if (range_covers_byte(addr, len, 0xd2)) {
+    s->dev.config[0xd2] &= 0xf;
+    smb_io_space_update(s);


[*] So the guest writing at 0xd0, this block is skipped, the
I/O region never enabled.


Could it be it does word or dword i/o to access multiple addresses at
once. Wasn't there a recent change to memory regions that could break
this? Is adjusting valid access sizes to the mem region ops needed now
to have the memory region handle this?


Do you mean it was buggy earlier, so to accept a guest write at 0xd0
the code had to handle the 0xd2 address? 0xd2 is the address in the
datasheet, so I doubt.


No, I meant that instead of writing a byte to 0xd2 the guest might write a 
dword to 0xd0 which also overlaps 0xd2 and would change that but it does 
not reach the device for some reason. But in your trace there was:



mr_ops_write mr 0x5583912b2e00 (south-bridge-pci-config) addr 0x1fe80490 value 
0xeee1 size 4
mr_ops_write mr 0x5583912b2e00 (south-bridge-pci-config) addr 0x1fe804d2 value 
0x1 size 2


These are:
pci_cfg_write vt82c686b-pm 05:4 @0x90 <- 0xeee1
pci_cfg_write vt82c686b-pm 05:4 @0xd0 <- 0x1


Where size is 2 so it would not reach 0xd2 but the address part above is 
0x1fe804d2 which somehow comes out as 0xd0 in the PCI trace so looks like 
something strips the low bits within PCI code and the guest does intend to 
access 0xd2 but it's not passed on to the device as such.


Regards,
BALATON Zoltan

Re: [PATCH v4 30/34] monitor: allow register hmp commands

2021-06-24 Thread Gerd Hoffmann
On Thu, Jun 24, 2021 at 03:55:29PM +0100, Dr. David Alan Gilbert wrote:
> * Gerd Hoffmann (kra...@redhat.com) wrote:
> > Allow commands having a NULL cmd pointer, add a function to set the
> > pointer later.  Use case: allow modules implement hmp commands.
> > 
> > Signed-off-by: Gerd Hoffmann 
> 
> So this is OK, so
> 
> Acked-by: Dr. David Alan Gilbert 
> 
> however, I can imagine:
>   a) Auto load as you suggest

Not sure about that.  The tcg monitor commands are pointless when you
picked another accelerator, and "info usbhost" would probably also be
most useful when trouble-shooting usb-host issues.  That's why I left
it as FIXME question for now.  But can certainly be done, we can add
something along the lines of 'module_hmp("info usbhost");' to the
meta-data database and autoload based on that (or use it for more
verbose error messages).

>   c) Don't actually define the command in the tables at all; make
>  the module actually add the command to the table.

Another possible approach.  I don't see a need for modules to expand the
list of commands though, so I only set the function pointer for existing
table entries ...

take care,
  Gerd




Re: [PULL 30/43] vt82c686: Fix SMBus IO base and configuration registers

2021-06-24 Thread Philippe Mathieu-Daudé
On 6/24/21 7:00 PM, BALATON Zoltan wrote:
> On Thu, 24 Jun 2021, Philippe Mathieu-Daudé wrote:
>> On 6/24/21 6:16 PM, Philippe Mathieu-Daudé wrote:
>>> On 6/24/21 6:01 PM, Philippe Mathieu-Daudé wrote:
 On 6/24/21 5:46 PM, Philippe Mathieu-Daudé wrote:
> Hi Zoltan,
>
> On 2/21/21 3:34 PM, Philippe Mathieu-Daudé wrote:
>> From: BALATON Zoltan 
>>
>> The base address of the SMBus io ports and its enabled status is set
>> by registers in the PCI config space but this was not correctly
>> emulated. Instead the SMBus registers were mapped on realize to the
>> base address set by a property to the address expected by fuloong2e
>> firmware.
>>
>> Fix the base and config register handling to more closely model
>> hardware which allows to remove the property and allows the guest to
>> control this mapping. Do all this in reset instead of realize so it's
>> correctly updated on reset.
>
> This commit broken running PMON on Fuloong2E:
> https://www.mail-archive.com/qemu-devel@nongnu.org/msg752605.html
> console: PMON2000 MIPS Initializing. Standby...
> console: ERRORPC= CONFIG=00030932
> console: PRID=6302
> console: DIMM read
> console: 00ff
> console: 00ff
> console: 00ff
> console: 00ff
> console: 00ff
> console: 00ff
> console: 00ff
> console: 00ff
> console: 00ff
> console: 00ff
> ...
>
> From here the console loops displaying this value...

 Tracing:

>>> pci_cfg_write vt82c686b-pm 05:4 @0x90 <- 0xeee1
>>
>> Offset 93-90 – SMBus I/O Base ... RW
>> 15-4 I/O Base (16-byte I/O space) default = 00h
>> pci_cfg_write vt82c686b-pm 05:4 @0x90 <- 0xeee1
>>
>>> pci_cfg_write vt82c686b-pm 05:4 @0xd0 <- 0x1
>>
>> Offset D2 – SMBus Host Configuration . RW
>> SMBus Host Controller Enable
>> 0 Disable SMB controller functions . default
>> 1 Enable SMB controller functions
>> pci_cfg_write vt82c686b-pm 05:4 @0xd0 <- 0x1
>>
>> Hmm the datasheet indeed document 0xd2... why is the guest accessing
>> 0xd0 to enable the function? It seems this is the problem, since if
>> I replace d2 -> d0 PMON boots. See below [*].

> Expected:
>
> console: PMON2000 MIPS Initializing. Standby...
> console: ERRORPC= CONFIG=00030932
> console: PRID=6302
> console: DIMM read
> console: 0080
> console: read memory type
> console: read number of rows
> ...

>>  static void pm_write_config(PCIDevice *d, uint32_t addr, uint32_t
>> val, int len)
>>  {
>> +    VT686PMState *s = VT82C686B_PM(d);
>> +
>>  trace_via_pm_write(addr, val, len);
>>  pci_default_write_config(d, addr, val, len);
>> +    if (ranges_overlap(addr, len, 0x90, 4)) {
>> +    uint32_t v = pci_get_long(s->dev.config + 0x90);
>> +    pci_set_long(s->dev.config + 0x90, (v & 0xfff0UL) | 1);
>> +    }
>> +    if (range_covers_byte(addr, len, 0xd2)) {
>> +    s->dev.config[0xd2] &= 0xf;
>> +    smb_io_space_update(s);
>>
>> [*] So the guest writing at 0xd0, this block is skipped, the
>> I/O region never enabled.
> 
> Could it be it does word or dword i/o to access multiple addresses at
> once. Wasn't there a recent change to memory regions that could break
> this? Is adjusting valid access sizes to the mem region ops needed now
> to have the memory region handle this?

Do you mean it was buggy earlier, so to accept a guest write at 0xd0
the code had to handle the 0xd2 address? 0xd2 is the address in the
datasheet, so I doubt.



Re: [PATCH v1 1/1] migration: Unregister yank if migration setup fails

2021-06-24 Thread Peter Xu
On Thu, Jun 24, 2021 at 06:14:39PM +0100, Dr. David Alan Gilbert wrote:
> * Leonardo Bras (leob...@redhat.com) wrote:
> > Currently, if a qemu instance is started with "-incoming defer" and
> > an incorect parameter is passed to "migrate_incoming", it will print the
> > expected error and reply with "duplicate yank instance" for any upcoming
> > "migrate_incoming" command.
> > 
> > This renders current qemu process unusable, and requires a new qemu
> > process to be started before accepting a migration.
> > 
> > This is caused by a yank_register_instance() that happens in
> > qemu_start_incoming_migration() but is never reverted if any error
> > happens.
> > 
> > Solves this by unregistering the instance if anything goes wrong
> > in the function, allowing a new "migrate_incoming" command to be
> > accepted.
> > 
> > Fixes: b5eea99ec2f ("migration: Add yank feature", 2021-01-13)
> > Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1974366
> > Signed-off-by: Leonardo Bras 
> > 
> > ---
> >  migration/migration.c | 6 +-
> >  1 file changed, 5 insertions(+), 1 deletion(-)
> > 
> > diff --git a/migration/migration.c b/migration/migration.c
> > index 4228635d18..ddcf9e1868 100644
> > --- a/migration/migration.c
> > +++ b/migration/migration.c
> > @@ -474,9 +474,13 @@ static void qemu_start_incoming_migration(const char 
> > *uri, Error **errp)
> >  } else if (strstart(uri, "fd:", &p)) {
> >  fd_start_incoming_migration(p, errp);
> >  } else {
> > -yank_unregister_instance(MIGRATION_YANK_INSTANCE);
> >  error_setg(errp, "unknown migration protocol: %s", uri);
> >  }
> > +
> > +if (*errp) {
> > +yank_unregister_instance(MIGRATION_YANK_INSTANCE);
> > +}
> 
> My understanding is that testing *errp isn't allowed, because
> it's legal to pass NULL to ignore errors, or legal to pass
> &error_abort to mean that any error you do hit will cause the
> process to assert; so you need to have something separate you can test.

Per my understanding error_abort should be fine, as the value of error_abort is
still NULL (in error_setg() we only check against &error_abort as the pointer,
and its value seems to be better always be NULL..).

But indeed at least we need "errp && *errp", but that won't capture the case
when errp==NULL.

So I think we may need to define a local error, check here when unregister
yank, and do error_propagate() before return..

-- 
Peter Xu




  1   2   3   4   >