[Qemu-devel] [RFC PATCH V4 2/4] vfio: Add vm status change callback to stop/restart the mdev device

2018-04-09 Thread Yulei Zhang
VM status change handler is added to change the vfio pci device
status during the migration, write the demanded device status
to the DEVICE STATUS subregion to stop the device on the source side
before fetch its status and start the deivce on the target side
after restore its status.

Signed-off-by: Yulei Zhang 
---
 hw/vfio/pci.c | 20 
 include/hw/vfio/vfio-common.h |  1 +
 linux-headers/linux/vfio.h|  6 ++
 roms/seabios  |  2 +-
 4 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index f98a9dd..13d8c73 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -38,6 +38,7 @@
 
 static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
 static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
+static void vfio_vm_change_state_handler(void *pv, int running, RunState 
state);
 
 /*
  * Disabling BAR mmaping can be slow, but toggling it around INTx can
@@ -2896,6 +2897,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
 vfio_register_err_notifier(vdev);
 vfio_register_req_notifier(vdev);
 vfio_setup_resetfn_quirk(vdev);
+qemu_add_vm_change_state_handler(vfio_vm_change_state_handler, vdev);
 
 return;
 
@@ -2982,6 +2984,24 @@ post_reset:
 vfio_pci_post_reset(vdev);
 }
 
+static void vfio_vm_change_state_handler(void *pv, int running, RunState state)
+{
+VFIOPCIDevice *vdev = pv;
+VFIODevice *vbasedev = >vbasedev;
+uint8_t dev_state;
+uint8_t sz = 1;
+
+dev_state = running ? VFIO_DEVICE_START : VFIO_DEVICE_STOP;
+
+if (pwrite(vdev->vbasedev.fd, _state,
+   sz, vdev->device_state.offset) != sz) {
+error_report("vfio: Failed to %s device", running ? "start" : "stop");
+return;
+}
+
+vbasedev->device_state = dev_state;
+}
+
 static void vfio_instance_init(Object *obj)
 {
 PCIDevice *pci_dev = PCI_DEVICE(obj);
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index f3a2ac9..9c14a8f 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -125,6 +125,7 @@ typedef struct VFIODevice {
 unsigned int num_irqs;
 unsigned int num_regions;
 unsigned int flags;
+bool device_state;
 } VFIODevice;
 
 struct VFIODeviceOps {
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index e3380ad..8f02f2f 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -304,6 +304,12 @@ struct vfio_region_info_cap_type {
 /* Mdev sub-type for device state save and restore */
 #define VFIO_REGION_SUBTYPE_DEVICE_STATE   (4)
 
+/* Offset in region to save device state */
+#define VFIO_DEVICE_STATE_OFFSET   1
+
+#define VFIO_DEVICE_START  0
+#define VFIO_DEVICE_STOP   1
+
 /**
  * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
  * struct vfio_irq_info)
diff --git a/roms/seabios b/roms/seabios
index 63451fc..5f4c7b1 16
--- a/roms/seabios
+++ b/roms/seabios
@@ -1 +1 @@
-Subproject commit 63451fca13c75870e1703eb3e20584d91179aebc
+Subproject commit 5f4c7b13cdf9c450eb55645f4362ea58fa61b79b
-- 
2.7.4




[Qemu-devel] [RFC PATCH V4 3/4] vfio: Add SaveVMHanlders for VFIO device to support live migration

2018-04-09 Thread Yulei Zhang
Instead of using vm state description, add SaveVMHandlers for VFIO
device to support live migration.

Introduce new Ioctl VFIO_DEVICE_GET_DIRTY_BITMAP to fetch the memory
bitmap that dirtied by vfio device during the iterative precopy stage
to shorten the system downtime afterward.

For vfio pci device status migrate, during the system downtime, it will
save the following states
1. pci configuration space addr0~addr5
2. pci configuration space msi_addr msi_data
3. pci device status fetch from device driver

And on the target side the vfio_load will restore the same states
1. re-setup the pci bar configuration
2. re-setup the pci device msi configuration
3. restore the pci device status

Signed-off-by: Yulei Zhang 
---
 hw/vfio/pci.c  | 195 +++--
 linux-headers/linux/vfio.h |  14 
 2 files changed, 204 insertions(+), 5 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 13d8c73..ac6a9c7 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -33,9 +33,14 @@
 #include "trace.h"
 #include "qapi/error.h"
 #include "migration/blocker.h"
+#include "migration/register.h"
+#include "exec/ram_addr.h"
 
 #define MSIX_CAP_LENGTH 12
 
+#define VFIO_SAVE_FLAG_SETUP 0
+#define VFIO_SAVE_FLAG_DEV_STATE 1
+
 static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
 static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
 static void vfio_vm_change_state_handler(void *pv, int running, RunState 
state);
@@ -2639,6 +2644,190 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice 
*vdev)
 vdev->req_enabled = false;
 }
 
+static uint64_t vfio_dirty_log_sync(VFIOPCIDevice *vdev)
+{
+RAMBlock *block;
+struct vfio_device_get_dirty_bitmap *d;
+uint64_t page = 0;
+ram_addr_t size;
+unsigned long nr, bitmap;
+
+RAMBLOCK_FOREACH(block) {
+size = block->used_length;
+nr = size >> TARGET_PAGE_BITS;
+bitmap = (BITS_TO_LONGS(nr) + 1) * sizeof(unsigned long);
+d = g_malloc0(sizeof(*d) +  bitmap);
+d->start_addr = block->offset;
+d->page_nr = nr;
+if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_DIRTY_BITMAP, d)) {
+error_report("vfio: Failed to get device dirty bitmap");
+g_free(d);
+goto exit;
+}
+
+if (d->page_nr) {
+cpu_physical_memory_set_dirty_lebitmap(
+ (unsigned long *)>dirty_bitmap,
+ d->start_addr, d->page_nr);
+page += d->page_nr;
+}
+g_free(d);
+}
+
+exit:
+return page;
+}
+
+static void vfio_save_live_pending(QEMUFile *f, void *opaque, uint64_t 
max_size,
+   uint64_t *non_postcopiable_pending,
+   uint64_t *postcopiable_pending)
+{
+VFIOPCIDevice *vdev = opaque;
+uint64_t pending;
+
+qemu_mutex_lock_iothread();
+rcu_read_lock();
+pending = vfio_dirty_log_sync(vdev);
+rcu_read_unlock();
+qemu_mutex_unlock_iothread();
+*non_postcopiable_pending += pending;
+}
+
+static int vfio_load(QEMUFile *f, void *opaque, int version_id)
+{
+VFIOPCIDevice *vdev = opaque;
+PCIDevice *pdev = >pdev;
+int sz = vdev->device_state.size - VFIO_DEVICE_STATE_OFFSET;
+uint8_t *buf = NULL;
+uint32_t ctl, msi_lo, msi_hi, msi_data, bar_cfg, i;
+bool msi_64bit;
+
+if (qemu_get_byte(f) == VFIO_SAVE_FLAG_SETUP) {
+goto exit;
+}
+
+/* retore pci bar configuration */
+ctl = pci_default_read_config(pdev, PCI_COMMAND, 2);
+vfio_pci_write_config(pdev, PCI_COMMAND,
+  ctl & (!(PCI_COMMAND_IO | PCI_COMMAND_MEMORY)), 2);
+for (i = 0; i < PCI_ROM_SLOT; i++) {
+bar_cfg = qemu_get_be32(f);
+vfio_pci_write_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, bar_cfg, 4);
+}
+vfio_pci_write_config(pdev, PCI_COMMAND,
+  ctl | PCI_COMMAND_IO | PCI_COMMAND_MEMORY, 2);
+
+/* restore msi configuration */
+ctl = pci_default_read_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, 2);
+msi_64bit = !!(ctl & PCI_MSI_FLAGS_64BIT);
+
+vfio_pci_write_config(>pdev,
+  pdev->msi_cap + PCI_MSI_FLAGS,
+  ctl & (!PCI_MSI_FLAGS_ENABLE), 2);
+
+msi_lo = qemu_get_be32(f);
+vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO, msi_lo, 4);
+
+if (msi_64bit) {
+msi_hi = qemu_get_be32(f);
+vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI,
+  msi_hi, 4);
+}
+msi_data = qemu_get_be32(f);
+vfio_pci_write_config(pdev,
+  pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32),
+  msi_data, 2);
+
+vfio_pci_write_config(>pdev, pdev->msi_cap + PCI_MSI_FLAGS,
+  ctl | PCI_MSI_FLAGS_ENABLE, 2);
+
+buf = g_malloc(sz);
+if (buf == NULL) {
+error_report("vfio: Failed 

[Qemu-devel] [RFC V4 PATCH 0/4] vfio: Introduce live migation capability to

2018-04-09 Thread Yulei Zhang
Summary

This series RFC would like to resume the discussion about how to
introduce the live migration capability to vfio mdev device. 

A new subtype region VFIO_REGION_SUBTYPE_DEVICE_STATE is introduced
for vfio device status migrate, during the initialization it will
check if the region is supported by the vfio device, otherwise it 
will remain non-migratable.

The intention to add the new region is using it for mdev device status
save and restore during the migration. The access to this region
will be trapped and forward to the mdev device driver, it also uses 
the first byte in the new region to control the running state of mdev
device, so during the migration after stop the mdev driver, qemu could
retrieve the specific device status from this region and transfer to 
the target VM side for the mdev device restore.

In addition, during the pre-copy period, it will be able to fetch the
dirty bitmap of vfio device through ioctl VFIO_DEVICE_GET_DIRTY_BITMAP
iteratively, which will be able to shorten the system downtime during
the static copy.

Below is the vfio mdev device migration sequence
Source VM side:
start migration
|
V
 in pre-copy stage, fetch the device dirty bitmap
 and add into qemu dirty list for migrate iteratively.
|
V
 get the cpu state change callback, write to the
 subregion's first byte to stop the mdev device
|
V
 quary the dirty page bitmap from iommu container 
 and add into qemu dirty list for last synchronization
|
V
 save the deivce status into Qemufile which is 
 read from the vfio device subregion

Target VM side:
 restore the mdev device after get the
 saved status context from Qemufile
|
V
  get the cpu state change callback write to 
  subregion's first byte to start the mdev device
  to put it in running status
|
V
finish migration

V3->V4:
1. add migration_blocker if device state region isnot supported.
2. instead of using vmsd, register SaveVMHandlers for VFIO device
   to leverage the pro-copy facility, and add new ioctl for VFIO
   device to fetch dirty bitmap during pro-copy.
3. remove the intel vendor ID dependence for the device state 
   subregion.

V2->V3:
1. rebase the patch to Qemu stable 2.10 branch.
2. use a common name for the subregion instead of specific for 
   intel IGD.

V1->V2:
Per Alex's suggestion:
1. use device subtype region instead of VFIO PCI fixed region.
2. remove unnecessary ioctl, use the first byte of subregion to 
   control the running state of mdev device.  
3. for dirty page synchronization, implement the interface with
   VFIOContainer instead of vfio pci device.

Yulei Zhang (4):
  vfio: introduce a new VFIO subregion for mdev device migration support
  vfio: Add vm status change callback to stop/restart the mdev device
  vfio: Add SaveVMHanlders for VFIO device to support live migration
  vifo: introduce new VFIO ioctl VFIO_IOMMU_GET_DIRTY_BITMAP

 hw/vfio/common.c  |  34 ++
 hw/vfio/pci.c | 240 --
 hw/vfio/pci.h |   2 +
 include/hw/vfio/vfio-common.h |   1 +
 linux-headers/linux/vfio.h|  43 +++-
 roms/seabios  |   2 +-
 6 files changed, 312 insertions(+), 10 deletions(-)

-- 
2.7.4




[Qemu-devel] [RFC PATCH V4 1/4] vfio: introduce a new VFIO subregion for mdev device migration support

2018-04-09 Thread Yulei Zhang
New VFIO sub region VFIO_REGION_SUBTYPE_DEVICE_STATE is added
to fetch and restore the status of mdev device vGPU during the
live migration.

Signed-off-by: Yulei Zhang 
---
 hw/vfio/pci.c  | 25 -
 hw/vfio/pci.h  |  2 ++
 linux-headers/linux/vfio.h |  9 ++---
 3 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index c977ee3..f98a9dd 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -32,6 +32,7 @@
 #include "pci.h"
 #include "trace.h"
 #include "qapi/error.h"
+#include "migration/blocker.h"
 
 #define MSIX_CAP_LENGTH 12
 
@@ -2821,6 +2822,25 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
 vfio_vga_quirk_setup(vdev);
 }
 
+struct vfio_region_info *device_state;
+/* device state region setup */
+if (!vfio_get_dev_region_info(>vbasedev,
+VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
+VFIO_REGION_SUBTYPE_DEVICE_STATE, _state)) {
+memcpy(>device_state, device_state,
+   sizeof(struct vfio_region_info));
+g_free(device_state);
+} else {
+error_setg(>migration_blocker,
+"Migration disabled: cannot support device state region");
+migrate_add_blocker(vdev->migration_blocker, );
+if (err) {
+error_propagate(errp, err);
+error_free(vdev->migration_blocker);
+goto error;
+}
+}
+
 for (i = 0; i < PCI_ROM_SLOT; i++) {
 vfio_bar_quirk_setup(vdev, i);
 }
@@ -2884,6 +2904,10 @@ out_teardown:
 vfio_teardown_msi(vdev);
 vfio_bars_exit(vdev);
 error:
+if (vdev->migration_blocker) {
+migrate_del_blocker(vdev->migration_blocker);
+error_free(vdev->migration_blocker);
+}
 error_prepend(errp, ERR_PREFIX, vdev->vbasedev.name);
 }
 
@@ -3009,7 +3033,6 @@ static Property vfio_pci_dev_properties[] = {
 
 static const VMStateDescription vfio_pci_vmstate = {
 .name = "vfio-pci",
-.unmigratable = 1,
 };
 
 static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 502a575..0ee1724 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -116,6 +116,8 @@ typedef struct VFIOPCIDevice {
 VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
 VFIOVGA *vga; /* 0xa, 0x3b0, 0x3c0 */
 void *igd_opregion;
+struct vfio_region_info device_state;
+Error *migration_blocker;
 PCIHostDeviceAddress host;
 EventNotifier err_notifier;
 EventNotifier req_notifier;
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index 4312e96..e3380ad 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -297,9 +297,12 @@ struct vfio_region_info_cap_type {
 #define VFIO_REGION_TYPE_PCI_VENDOR_MASK   (0x)
 
 /* 8086 Vendor sub-types */
-#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1)
-#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
-#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG  (3)
+#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1)
+#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
+#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG  (3)
+
+/* Mdev sub-type for device state save and restore */
+#define VFIO_REGION_SUBTYPE_DEVICE_STATE   (4)
 
 /**
  * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
-- 
2.7.4




[Qemu-devel] [PATCH] configure: don't warn SDL abi if disabled

2018-04-09 Thread Peter Xu
SDL has the same problem as GTK that we might get warnings on SDL ABI
version even if SDL is disabled.  Fix that by only probing SDL if SDL is
enabled.  Also this should let configure be a little bit faster since we
don't really need to probe SDL stuff when it's off.

CC: Paolo Bonzini 
CC: Gerd Hoffmann 
CC: Peter Maydell 
CC: Daniel P. Berrange 
CC: Fam Zheng 
CC: "Philippe Mathieu-Daudé" 
Signed-off-by: Peter Xu 
---
 configure | 83 ++-
 1 file changed, 45 insertions(+), 38 deletions(-)

diff --git a/configure b/configure
index 752dd9ef32..f647026b8d 100755
--- a/configure
+++ b/configure
@@ -2836,49 +2836,52 @@ fi
 # Look for sdl configuration program (pkg-config or sdl-config).  Try
 # sdl-config even without cross prefix, and favour pkg-config over sdl-config.
 
-if test "$sdlabi" = ""; then
-if $pkg_config --exists "sdl2"; then
-sdlabi=2.0
-elif $pkg_config --exists "sdl"; then
-sdlabi=1.2
-else
-sdlabi=2.0
-fi
-fi
+sdl_probe ()
+{
+  sdl_too_old=no
+  if test "$sdlabi" = ""; then
+  if $pkg_config --exists "sdl2"; then
+  sdlabi=2.0
+  elif $pkg_config --exists "sdl"; then
+  sdlabi=1.2
+  else
+  sdlabi=2.0
+  fi
+  fi
 
-if test $sdlabi = "2.0"; then
-sdl_config=$sdl2_config
-sdlname=sdl2
-sdlconfigname=sdl2_config
-elif test $sdlabi = "1.2"; then
-sdlname=sdl
-sdlconfigname=sdl_config
-else
-error_exit "Unknown sdlabi $sdlabi, must be 1.2 or 2.0"
-fi
+  if test $sdlabi = "2.0"; then
+  sdl_config=$sdl2_config
+  sdlname=sdl2
+  sdlconfigname=sdl2_config
+  elif test $sdlabi = "1.2"; then
+  sdlname=sdl
+  sdlconfigname=sdl_config
+  else
+  error_exit "Unknown sdlabi $sdlabi, must be 1.2 or 2.0"
+  fi
 
-if test "$(basename $sdl_config)" != $sdlconfigname && ! has ${sdl_config}; 
then
-  sdl_config=$sdlconfigname
-fi
+  if test "$(basename $sdl_config)" != $sdlconfigname && ! has ${sdl_config}; 
then
+sdl_config=$sdlconfigname
+  fi
 
-if $pkg_config $sdlname --exists; then
-  sdlconfig="$pkg_config $sdlname"
-  sdlversion=$($sdlconfig --modversion 2>/dev/null)
-elif has ${sdl_config}; then
-  sdlconfig="$sdl_config"
-  sdlversion=$($sdlconfig --version)
-else
-  if test "$sdl" = "yes" ; then
-feature_not_found "sdl" "Install SDL2-devel"
+  if $pkg_config $sdlname --exists; then
+sdlconfig="$pkg_config $sdlname"
+sdlversion=$($sdlconfig --modversion 2>/dev/null)
+  elif has ${sdl_config}; then
+sdlconfig="$sdl_config"
+sdlversion=$($sdlconfig --version)
+  else
+if test "$sdl" = "yes" ; then
+  feature_not_found "sdl" "Install SDL2-devel"
+fi
+sdl=no
+# no need to do the rest
+return
+  fi
+  if test -n "$cross_prefix" && test "$(basename "$sdlconfig")" = sdl-config; 
then
+echo warning: using "\"$sdlconfig\"" to detect cross-compiled sdl >&2
   fi
-  sdl=no
-fi
-if test -n "$cross_prefix" && test "$(basename "$sdlconfig")" = sdl-config; 
then
-  echo warning: using "\"$sdlconfig\"" to detect cross-compiled sdl >&2
-fi
 
-sdl_too_old=no
-if test "$sdl" != "no" ; then
   cat > $TMPC << EOF
 #include 
 #undef main /* We don't want SDL to override our main() */
@@ -2920,6 +2923,10 @@ EOF
 fi
 sdl=no
   fi # sdl compile test
+}
+
+if test "$sdl" != "no" ; then
+  sdl_probe
 fi
 
 if test "$sdl" = "yes" ; then
-- 
2.14.3




Re: [Qemu-devel] [PATCH] configure: don't warn GTK if disabled

2018-04-09 Thread Peter Xu
On Mon, Apr 09, 2018 at 07:37:11PM +0200, Paolo Bonzini wrote:
> On 09/04/2018 11:24, Peter Maydell wrote:
> > On 9 April 2018 at 09:31, Daniel P. Berrangé  wrote:
> >> Feels to me that since we've deprecated 2.0, we could just *never* auto
> >> detect - just do test -z "$gtkabi" && gtkabi=3.0
> >>
> >> Anyone who wants gtk2 should have to use an explicit --with-gtkabi=2.0
> >
> > I think if we still work with gtk2 then we should go ahead and
> > warn-but-use it.
> 
> Both are valid choices of course.  Since we have Peter (Xu)'s patch, I
> think we should go for that and revisit in 2.13 whether to remove GTK+
> 2.0 support altogether, require an explicit configure argument, or leave
> things as they are.

Thanks all. Please anyone let me know if a repost is needed. Otherwise
I'll keep it as is and I can try to post a similar one for SDL.

-- 
Peter Xu



Re: [Qemu-devel] [PATCH for-2.12 v2] monitor: bind dispatch bh to iohandler context

2018-04-09 Thread Stefan Hajnoczi
On Tue, Apr 10, 2018 at 12:49:42PM +0800, Peter Xu wrote:
> Eric Auger reported the problem days ago that OOB broke ARM when running
> with libvirt:
> 
> http://lists.gnu.org/archive/html/qemu-devel/2018-03/msg06231.html
> 
> The problem was that the monitor dispatcher bottom half was bound to
> qemu_aio_context now, which could be polled unexpectedly in block code.

And TPM and 9P code, who all use nested event loops.

> We should keep the dispatchers run in iohandler_ctx just like what we
> did before the Out-Of-Band series (chardev uses qio, and qio binds
> everything with iohandler_ctx).
> 
> If without this change, QMP dispatcher might be run even before reaching
> main loop in block IO path, for example, in a stack like (the ARM case,
> "cont" command handler run even during machine init phase):
> 
> #0  qmp_cont ()
> #1  0x006bd210 in qmp_marshal_cont ()
> #2  0x00ac05c4 in do_qmp_dispatch ()
> #3  0x00ac07a0 in qmp_dispatch ()
> #4  0x00472d60 in monitor_qmp_dispatch_one ()
> #5  0x0047302c in monitor_qmp_bh_dispatcher ()
> #6  0x00acf374 in aio_bh_call ()
> #7  0x00acf428 in aio_bh_poll ()
> #8  0x00ad5110 in aio_poll ()
> #9  0x00a08ab8 in blk_prw ()
> #10 0x00a091c4 in blk_pread ()
> #11 0x00734f94 in pflash_cfi01_realize ()
> #12 0x0075a3a4 in device_set_realized ()
> #13 0x009a26cc in property_set_bool ()
> #14 0x009a0a40 in object_property_set ()
> #15 0x009a3a08 in object_property_set_qobject ()
> #16 0x009a0c8c in object_property_set_bool ()
> #17 0x00758f94 in qdev_init_nofail ()
> #18 0x0058e190 in create_one_flash ()
> #19 0x0058e2f4 in create_flash ()
> #20 0x005902f0 in machvirt_init ()
> #21 0x007635cc in machine_run_board_init ()
> #22 0x006b135c in main ()
> 
> Actually the problem is more severe than that.  After we switched to the
> qemu AIO handler it means the monitor dispatcher code can even be called
> with nested aio_poll(), then it can be an explicit aio_poll() inside
> another main loop aio_poll() which could be racy too.
> 
> Switch to use the iohandler_ctx for monitor dispatchers.
> 
> My sincere thanks to Eric Auger who offered great help during both
> debugging and verifying the problem.  The ARM test was carried out by
> applying this patch upon QEMU 2.12.0-rc0 and problem is gone after the
> patch.
> 
> A quick test of mine shows that after this patch applied we can pass all
> raw iotests even with OOB on by default.
> 
> CC: Eric Blake 
> CC: Markus Armbruster 
> CC: Stefan Hajnoczi 
> CC: Fam Zheng 
> Reported-by: Eric Auger 
> Tested-by: Eric Auger 
> Signed-off-by: Peter Xu 
> ---
> v2:
> - enhanced commit message
> ---
>  monitor.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/monitor.c b/monitor.c
> index 51f4cf480f..39f8ee17ba 100644
> --- a/monitor.c
> +++ b/monitor.c
> @@ -4467,7 +4467,7 @@ static void monitor_iothread_init(void)
>   * have assumption to be run on main loop thread.  It would be
>   * nice that one day we can remove this assumption in the future.
>   */
> -mon_global.qmp_dispatcher_bh = aio_bh_new(qemu_get_aio_context(),
> +mon_global.qmp_dispatcher_bh = aio_bh_new(iohandler_get_aio_context(),
>monitor_qmp_bh_dispatcher,
>NULL);

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature


[Qemu-devel] [PATCH for-2.12 v2] monitor: bind dispatch bh to iohandler context

2018-04-09 Thread Peter Xu
Eric Auger reported the problem days ago that OOB broke ARM when running
with libvirt:

http://lists.gnu.org/archive/html/qemu-devel/2018-03/msg06231.html

The problem was that the monitor dispatcher bottom half was bound to
qemu_aio_context now, which could be polled unexpectedly in block code.
We should keep the dispatchers run in iohandler_ctx just like what we
did before the Out-Of-Band series (chardev uses qio, and qio binds
everything with iohandler_ctx).

If without this change, QMP dispatcher might be run even before reaching
main loop in block IO path, for example, in a stack like (the ARM case,
"cont" command handler run even during machine init phase):

#0  qmp_cont ()
#1  0x006bd210 in qmp_marshal_cont ()
#2  0x00ac05c4 in do_qmp_dispatch ()
#3  0x00ac07a0 in qmp_dispatch ()
#4  0x00472d60 in monitor_qmp_dispatch_one ()
#5  0x0047302c in monitor_qmp_bh_dispatcher ()
#6  0x00acf374 in aio_bh_call ()
#7  0x00acf428 in aio_bh_poll ()
#8  0x00ad5110 in aio_poll ()
#9  0x00a08ab8 in blk_prw ()
#10 0x00a091c4 in blk_pread ()
#11 0x00734f94 in pflash_cfi01_realize ()
#12 0x0075a3a4 in device_set_realized ()
#13 0x009a26cc in property_set_bool ()
#14 0x009a0a40 in object_property_set ()
#15 0x009a3a08 in object_property_set_qobject ()
#16 0x009a0c8c in object_property_set_bool ()
#17 0x00758f94 in qdev_init_nofail ()
#18 0x0058e190 in create_one_flash ()
#19 0x0058e2f4 in create_flash ()
#20 0x005902f0 in machvirt_init ()
#21 0x007635cc in machine_run_board_init ()
#22 0x006b135c in main ()

Actually the problem is more severe than that.  After we switched to the
qemu AIO handler it means the monitor dispatcher code can even be called
with nested aio_poll(), then it can be an explicit aio_poll() inside
another main loop aio_poll() which could be racy too.

Switch to use the iohandler_ctx for monitor dispatchers.

My sincere thanks to Eric Auger who offered great help during both
debugging and verifying the problem.  The ARM test was carried out by
applying this patch upon QEMU 2.12.0-rc0 and problem is gone after the
patch.

A quick test of mine shows that after this patch applied we can pass all
raw iotests even with OOB on by default.

CC: Eric Blake 
CC: Markus Armbruster 
CC: Stefan Hajnoczi 
CC: Fam Zheng 
Reported-by: Eric Auger 
Tested-by: Eric Auger 
Signed-off-by: Peter Xu 
---
v2:
- enhanced commit message
---
 monitor.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monitor.c b/monitor.c
index 51f4cf480f..39f8ee17ba 100644
--- a/monitor.c
+++ b/monitor.c
@@ -4467,7 +4467,7 @@ static void monitor_iothread_init(void)
  * have assumption to be run on main loop thread.  It would be
  * nice that one day we can remove this assumption in the future.
  */
-mon_global.qmp_dispatcher_bh = aio_bh_new(qemu_get_aio_context(),
+mon_global.qmp_dispatcher_bh = aio_bh_new(iohandler_get_aio_context(),
   monitor_qmp_bh_dispatcher,
   NULL);
 
-- 
2.14.3




[Qemu-devel] [Bug 1211943] Re: #GP and aligned move instruction

2018-04-09 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1211943

Title:
  #GP and aligned move instruction

Status in QEMU:
  Expired

Bug description:
  When the operand of movaps, movapd or movdqa instruction isn't
  aligned, general-protection exception should be generated.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1211943/+subscriptions



[Qemu-devel] [Bug 1196498] Re: Failed to create COM port in Windows Guest VM

2018-04-09 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1196498

Title:
  Failed to create COM port in Windows Guest VM

Status in QEMU:
  Expired

Bug description:
  Hi,

  Tried to establish virtual serial connection between two Windows Guest
  VMs(Windows 2008 Server).

  As it mentioned under the link,
  http://www.linux-kvm.org/page/WindowsGuestDrivers/UpdatedGuestDebugging

  Started VM using the below command with serial device as TCP port.
  # qemu-system-x86_64 -smp 1 -enable-kvm -m 512 -boot c -serial 
tcp:127.0.0.1:50001,server,nowait -hda 
/mnt/sda5/var/lib/libvirt/images/win2008host.img

  And started WinDbg in the Windows VM, with serial port:COM1 and
  Baudrate:115200.

  It throws the error "Could not start kernel debugging using
  com:port:COM1,baudrate:115200 parameters, Win32 error 0n2. The system
  cannot find the file specified."

  Checked under Device Manager and there is no COM1 port.
  Does the -serial tcp:127.0.0.01:50001 creates COM1 port? I doubt.
  If no what are the parameters I should pass to WinDbg for serial port?

  Please help me how to solve this issue.

  Thanks
  Venkatesh

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1196498/+subscriptions



[Qemu-devel] [Bug 1354167] Re: On VM restart: Could not open 'poppy.qcow2': Could not read snapshots: File too large

2018-04-09 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1354167

Title:
  On VM restart: Could not open 'poppy.qcow2': Could not read snapshots:
  File too large

Status in QEMU:
  Expired

Bug description:
  I'm unable to restart a VM.   virt-manager is giving me:

  Error starting domain: internal error: process exited while connecting
  to monitor: qemu-system-x86_64: -drive
  file=/var/lib/libvirt/images/poppy.qcow2,if=none,id=drive-virtio-
  disk0,format=qcow2: could not open disk image
  /var/lib/libvirt/images/poppy.qcow2: Could not read snapshots: File
  too large

  
  From the command line trying to check the image also gives me:
  qemu-img check poppy.qcow2
  qemu-img: Could not open 'poppy.qcow2': Could not read snapshots: File too 
large

  
  This bug appears with both the default install of qemu for ubuntu 14.04:
  qemu-img version 2.0.0, Copyright (c) 2004-2008 Fabrice Bellard

  And the latest version.
  qemu-img version 2.1.50, Copyright (c) 2004-2008 Fabrice Bellard

  
  Host: 
  Dual E5-2650 v2 @ 2.60GHz
  32GB Memory
  4TB Disk space (2.1TB Free) 

  Host OS: Ubuntu 14.04.1 LTS 64bit

  Guest:
  Ubuntu 14.04 64bit
  Storage Size: 500gb

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1354167/+subscriptions



[Qemu-devel] [Bug 1225187] Re: qemu hangs in windows 7 host with -serial pipe:windbg

2018-04-09 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1225187

Title:
  qemu hangs in windows 7 host with -serial pipe:windbg

Status in QEMU:
  Expired

Bug description:
  Execution line:
  qemu-system-i386.exe -m 512 c:\Disks\Qemu_XP_en.vhd  -serial pipe:windbg

  It waits for the pipe.
  Execute windbg
  c:\WINDDK\7600.16385.1\Debuggers\windbg.exe -k 
com:pipe,port=\\.\pipe\windbg,resets=0,reconnect

  GUI black screen shown. QEMU hangs.

  qemu v1.5.3 (c0b1a7e207094dba0b37a892b41fe4cab3195e44). MinGW built.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1225187/+subscriptions



[Qemu-devel] [Bug 1343827] Re: block.c: multiwrite_merge() truncates overlapping requests

2018-04-09 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1343827

Title:
  block.c: multiwrite_merge() truncates overlapping requests

Status in QEMU:
  Expired

Bug description:
  If the list of requests passed to multiwrite_merge() contains two
  requests where the first is for a range of sectors that is a strict
  subset of the second's, the second request is truncated to end where
  the first starts, so the second half of the second request is lost.

  This is easy to reproduce by running fio against a virtio-blk device
  running on qemu 2.1.0-rc1 with the below fio script. At least with fio
  2.0.13, the randwrite pass will issue overlapping bios to the block
  driver, which the kernel is happy to pass along to qemu:

  [global]
  randrepeat=0
  ioengine=libaio
  iodepth=64
  direct=1
  size=1M
  numjobs=1
  verify_fatal=1
  verify_dump=1

  filename=$dev

  [seqwrite]
  blocksize_range=4k-1M
  rw=write
  verify=crc32c-intel

  [randwrite]
  stonewall
  blocksize_range=4k-1M
  rw=randwrite
  verify=meta

  Here is a naive fix for the problem that simply avoids merging
  problematic requests. I guess a better solution would be to redo
  qemu_iovec_concat() to do the right thing.

  diff -ur old/qemu-2.1.0-rc2/block.c qemu-2.1.0-rc2/block.c
  --- old/qemu-2.1.0-rc2/block.c  2014-07-15 14:49:14.0 -0700
  +++ qemu-2.1.0-rc2/block.c  2014-07-17 23:03:14.224169741 -0700
  @@ -4460,7 +4460,9 @@
   int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
   
   // Handle exactly sequential writes and overlapping writes.
  -if (reqs[i].sector <= oldreq_last) {
  +// If this request ends before the previous one, don't merge.
  +if (reqs[i].sector <= oldreq_last &&
  +reqs[i].sector + reqs[i].nb_sectors >= oldreq_last) {
   merge = 1;
   }

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1343827/+subscriptions



Re: [Qemu-devel] [PATCH for 2.13 1/2] Revert "spapr: Don't allow memory hotplug to memory less nodes"

2018-04-09 Thread David Gibson
On Fri, Apr 06, 2018 at 08:48:55AM +0300, Serhii Popovych wrote:
> Bharata B Rao wrote:
> > On Thu, Apr 05, 2018 at 10:35:22AM -0400, Serhii Popovych wrote:
> >> This reverts commit b556854bd8524c26b8be98ab1bfdf0826831e793.
> >>
> >> Leave change @node type from uint32_t to to int from reverted commit
> >> because node < 0 is always false.
> >>
> >> Signed-off-by: Serhii Popovych 
> >> ---
> >>  hw/ppc/spapr.c | 22 --
> >>  1 file changed, 22 deletions(-)
> >>
> >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >> index 2c0be8c..3ad4545 100644
> >> --- a/hw/ppc/spapr.c
> >> +++ b/hw/ppc/spapr.c
> >> @@ -3477,28 +3477,6 @@ static void 
> >> spapr_machine_device_plug(HotplugHandler *hotplug_dev,
> >>  return;
> >>  }
> >>
> >> -/*
> >> - * Currently PowerPC kernel doesn't allow hot-adding memory to
> >> - * memory-less node, but instead will silently add the memory
> >> - * to the first node that has some memory. This causes two
> >> - * unexpected behaviours for the user.
> >> - *
> >> - * - Memory gets hotplugged to a different node than what the user
> >> - *   specified.
> >> - * - Since pc-dimm subsystem in QEMU still thinks that memory 
> >> belongs
> >> - *   to memory-less node, a reboot will set things accordingly
> >> - *   and the previously hotplugged memory now ends in the right 
> >> node.
> >> - *   This appears as if some memory moved from one node to 
> >> another.
> >> - *
> >> - * So until kernel starts supporting memory hotplug to memory-less
> >> - * nodes, just prevent such attempts upfront in QEMU.
> >> - */
> >> -if (nb_numa_nodes && !numa_info[node].node_mem) {
> >> -error_setg(errp, "Can't hotplug memory to memory-less node 
> >> %d",
> >> -   node);
> >> -return;
> >> -}
> >> -
> > 
> > If you remove this unconditionally, wouldn't it be a problem in case
> > of newer QEMU with older guest kernels ?
> 
> Yes, that definitely would affect guest kernels without such support. We
> probably need to add some capability to test for guest kernel
> functionality presence.

Hm, maybe.

So first, we should check when the guest side support came in.  If
it's old enough we might not care.

PAPR does include a mechanism for negotiating guest/host
capabilities.  However, I don't think it has a bit for this specific
feature, so I can't really see a way to do this cleanly.

I don't think we necessarily have to handle that case: it's not like
we can reasonably workaround *every* possible guest bug/limitation
from the host side.  If you want to create a system with a memory-less
node you need an OS that can handle that, nothing really special there.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH for 2.13 2/2] spapr: Add ibm, max-associativity-domains property

2018-04-09 Thread David Gibson
On Thu, Apr 05, 2018 at 10:35:23AM -0400, Serhii Popovych wrote:
> Now recent kernels (i.e. since linux-stable commit a346137e9142
> ("powerpc/numa: Use ibm,max-associativity-domains to discover possible nodes")
> support this property to mark initially memory-less NUMA nodes as "possible"
> to allow further memory hot-add to them.
> 
> Advertise this property for pSeries machines to let guest kernels detect
> maximum supported node configuration and benefit from kernel side change
> when hot-add memory to specific, possibly empty before, NUMA node.
> 
> Signed-off-by: Serhii Popovych 
> ---
>  hw/ppc/spapr.c | 11 +++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 3ad4545..e02fc94 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -909,6 +909,14 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void 
> *fdt)
>  0, cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE),
>  cpu_to_be32(max_cpus / smp_threads),
>  };
> +uint32_t maxdomains[] = {
> +cpu_to_be32(5),
> +cpu_to_be32(0),
> +cpu_to_be32(0),
> +cpu_to_be32(0),
> +cpu_to_be32(nb_numa_nodes - 1),
> +cpu_to_be32(max_cpus - 1),
> +};

There's a minor problem here, which I didn't think of when I was
discussing this with you earlier.

(max_cpus - 1) in the last slot isn't quite right, because we need the
maximum vcpu id here.  Because of (complicated, historical) reasons we
don't allocate the vcpu ids contiguously in all cases, so it could be
larger than max_cpus - 1.

But, we don't actually need to handle that case.  We give 5 levels of
associativity on cpus, but only 4 on memory.  Having a quick look at
the NUMA code on the guest side, I'm pretty sure it's ok if we only
give maximum values to depth 4 here, so we can just drop the last cell
here.

>  
>  _FDT(rtas = fdt_add_subnode(fdt, 0, "rtas"));
>  
> @@ -945,6 +953,9 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void 
> *fdt)
>  _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points",
>   refpoints, sizeof(refpoints)));
>  
> +_FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains",
> + maxdomains, sizeof(maxdomains)));
> +
>  _FDT(fdt_setprop_cell(fdt, rtas, "rtas-error-log-max",
>RTAS_ERROR_LOG_MAX));
>  _FDT(fdt_setprop_cell(fdt, rtas, "rtas-event-scan-rate",

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [Qemu-ppc] [PATCH for 2.13 0/2] target/ppc: Support adding memory to initially memory-less NUMA nodes

2018-04-09 Thread David Gibson
On Fri, Apr 06, 2018 at 10:21:36AM +0200, Greg Kurz wrote:
> On Thu,  5 Apr 2018 10:35:21 -0400
> Serhii Popovych  wrote:
> 
> > Now PowerPC Linux kernel supports hot-add to NUMA nodes not populated
> > initially with memory we can enable such support in qemu. This requires
> > two changes:
> > 
> >   o Add device tree property "ibm,max-associativity-domains" to let
> > guest kernel chance to find max possible NUMA node
> > 
> >   o Revert  commit b556854bd852 ("spapr: Don't allow memory hotplug to
> > memory less nodes") to remove check for hot-add to memory-less node.
> > 
> 
> But the series do the changes in the opposite order... 

Right, the patches should go in the reverse order to avoid breaking
bisections.

> IIUC correctly a recent kernel will mis-behave as before linux commit
> a346137e9142 if the property is not present... ie, patch 2 should come
> first.
> 
> > See description messges for individual changes for more details.
> > 
> > Serhii Popovych (2):
> >   Revert "spapr: Don't allow memory hotplug to memory less nodes"
> >   spapr: Add ibm,max-associativity-domains property
> > 
> >  hw/ppc/spapr.c | 33 +++--
> >  1 file changed, 11 insertions(+), 22 deletions(-)
> > 
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH v2 00/17] Translation loop conversion for sh4/sparc/mips/s390x/openrisc/riscv targets

2018-04-09 Thread Richard Henderson
On 04/10/2018 02:11 AM, Emilio G. Cota wrote:
> On Mon, Apr 09, 2018 at 16:01:36 +0200, Bastian Koppelmann wrote:
>> Thanks for doing this grunt work. Me and a colleague were planning to do
>> this as well after converting the RISC-V frontend to decodetree. Do you
>> have any plans to do this for the TriCore frontend as well? I have the
>> same plan for Tricore: Convert it to decodetree, then to translation loop.
> 
> I won't do any further conversions in the near future, so please go
> ahead with your plans :-)

In which case I won't review Emilio's final two patches for risc-v.


r~



Re: [Qemu-devel] [PATCH v2 14/17] target/openrisc: convert to TranslatorOps

2018-04-09 Thread Richard Henderson
On 04/07/2018 04:19 AM, Emilio G. Cota wrote:
> Notes:
> 
> - Changed the num_insns test in insn_start to check for
>   dc->base.num_insns > 1, since when tb_start is first
>   called in a TB, base.num_insns is already set to 1.
> 
> - Removed DISAS_NEXT from the switch in tb_stop; use
>   DISAS_TOO_MANY instead.
> 
> - Added an assert_not_reached on tb_stop for DISAS_NEXT
>   and the default case.
> 
> - Merged the two separate log_target_disas calls into the
>   disas_log op.
> 
> Cc: Stafford Horne 
> Signed-off-by: Emilio G. Cota 
> ---
>  target/openrisc/translate.c | 163 
> +---
>  1 file changed, 79 insertions(+), 84 deletions(-)

Reviewed-by: Richard Henderson 


r~



Re: [Qemu-devel] Crash when running hello-world unikernel for ARM

2018-04-09 Thread Ajay Garg
Following is the gdb details :

##
ajay@debian:~/rumprun-arm32$ gdb --args qemu-system-arm -machine virt
-nographic -kernel helloer.bin
GNU gdb (Debian 7.7.1+dfsg-5) 7.7.1
Copyright (C) 2014 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later 
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "arm-linux-gnueabihf".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
.
Find the GDB manual and other documentation resources online at:
.
For help, type "help".
Type "apropos word" to search for commands related to "word"...
Reading symbols from qemu-system-arm...(no debugging symbols found)...done.
(gdb) r
Starting program: /usr/bin/qemu-system-arm -machine virt -nographic
-kernel helloer.bin
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/arm-linux-gnueabihf/libthread_db.so.1".
[New Thread 0xb388f290 (LWP 3140)]
qemu: fatal: Trying to execute code outside RAM or ROM at 0x0010

R00= R01= R02= R03=
R04=
Program received signal SIGUSR1, User defined signal 1.
[Switching to Thread 0xb388f290 (LWP 3140)]
0xb5e80f42 in write () at ../sysdeps/unix/syscall-template.S:81
81../sysdeps/unix/syscall-template.S: No such file or directory.
(gdb) bt
#0  0xb5e80f42 in write () at ../sysdeps/unix/syscall-template.S:81
#1  0xb5e45c84 in _IO_new_file_write (f=0xb5ee29f0 <_IO_2_1_stderr_>,
data=0xb388c5a0, n=12) at fileops.c:1253
#2  0xb5e454b2 in new_do_write (fp=fp@entry=0xb5ee29f0 <_IO_2_1_stderr_>,
data=data@entry=0xb388c5a0 "R04=", to_do=to_do@entry=12)
at fileops.c:530
#3  0xb5e460d0 in _IO_new_file_xsputn (f=0xb5ee29f0 <_IO_2_1_stderr_>,
data=, n=12) at fileops.c:1335
#4  0xb5e2bc8e in buffered_vfprintf (s=s@entry=0xb5ee29f0 <_IO_2_1_stderr_>,
format=format@entry=0x263e68 "R%02d=%08x", args=...) at vfprintf.c:2369
#5  0xb5e28418 in _IO_vfprintf_internal (s=0xb5ee29f0 <_IO_2_1_stderr_>,
format=0x263e68 "R%02d=%08x", format@entry=0x84ad60 "pg\201", ap=...,
ap@entry=...) at vfprintf.c:1296
#6  0xb5e2ee00 in __fprintf (stream=,
format=0x263e68 "R%02d=%08x") at fprintf.c:32
#7  0x000dc352 in ?? ()
Backtrace stopped: previous frame identical to this frame (corrupt stack?)
(gdb)
#0  0xb5e80f42 in write () at ../sysdeps/unix/syscall-template.S:81
#1  0xb5e45c84 in _IO_new_file_write (f=0xb5ee29f0 <_IO_2_1_stderr_>,
data=0xb388c5a0, n=12) at fileops.c:1253
#2  0xb5e454b2 in new_do_write (fp=fp@entry=0xb5ee29f0 <_IO_2_1_stderr_>,
data=data@entry=0xb388c5a0 "R04=", to_do=to_do@entry=12)
at fileops.c:530
#3  0xb5e460d0 in _IO_new_file_xsputn (f=0xb5ee29f0 <_IO_2_1_stderr_>,
data=, n=12) at fileops.c:1335
#4  0xb5e2bc8e in buffered_vfprintf (s=s@entry=0xb5ee29f0 <_IO_2_1_stderr_>,
format=format@entry=0x263e68 "R%02d=%08x", args=...) at vfprintf.c:2369
#5  0xb5e28418 in _IO_vfprintf_internal (s=0xb5ee29f0 <_IO_2_1_stderr_>,
format=0x263e68 "R%02d=%08x", format@entry=0x84ad60 "pg\201", ap=...,
ap@entry=...) at vfprintf.c:1296
#6  0xb5e2ee00 in __fprintf (stream=,
format=0x263e68 "R%02d=%08x") at fprintf.c:32
#7  0x000dc352 in ?? ()
Backtrace stopped: previous frame identical to this frame (corrupt stack?)
##

On Tue, Apr 10, 2018 at 9:44 AM, Ajay Garg  wrote:
> Thanks Alex for the reply ..
>
>>
>> Can you run under -s -S and gdb step the *guest* and see where it ends
>> up. The above error is usually indicative of the guest going off into
>> the weeds somewhere because the hardware isn't what it expects.
>>
>
> So, after your reply that it might be because of the
> hardware-mismatch, I kinda took a detour, and installed a arm32 "virt"
> machine on qemu on a x86_64 host, as per the steps at
> https://translatedcode.wordpress.com/2016/11/03/installing-debian-on-qemus-32-bit-arm-virt-board/
>
> All went fine, and then I compiled rumprun on this "virt" guest.
> Finally, upon running, I now get this :
>
> ##
> ajay@debian:~/rumprun-arm32$ qemu-system-arm -machine virt -nographic
> -kernel helloer.bin
> qemu: fatal: Trying to execute code outside RAM or ROM at 0x0010
>
> R00= R01= R02= R03=
> R04= R05= R06= R07=
> R08= R09= R10= R11=
> R12= R13= R14= R15=0010
> PSR=41d3 -Z-- A svc32
> s00= s01= d00=
> 

Re: [Qemu-devel] [RFC PATCH v2] spapr: Support ibm, dynamic-memory-v2 property

2018-04-09 Thread Bharata B Rao
On Tue, Apr 10, 2018 at 01:02:45PM +1000, David Gibson wrote:
> On Mon, Apr 09, 2018 at 11:55:38AM +0530, Bharata B Rao wrote:
> > The new property ibm,dynamic-memory-v2 allows memory to be represented
> > in a more compact manner in device tree.
> 
> I still need to look at this in more detail, but to start with:
> what's the rationale for this new format?
> 
> It's more compact, but why do we care?  The embedded people always
> whinge about the size of the deivce tree, but I didn't think that was
> really a concern with PAPR.

Here's a real example of how this has affected us earlier:

SLOF's CAS FDT buffer size was initially 32K, was changed to 64k to
support 1TB guest memory and again changed to 2MB to support 16TB guest
memory.

With ibm,dynamic-memory-v2 we are less likely to hit such scenarios.

Also, theoretically it should be more efficient in the guest kernel
to handle LMB-sets than individual LMBs.

We aren't there yet, but I believe grouping of LMBs should eventually
help us do memory hotplug at set (or DIMM) granularity than at individual
LMB granularity (Again theoretical possibility)

Regards,
Bharata.




Re: [Qemu-devel] Crash when running hello-world unikernel for ARM

2018-04-09 Thread Ajay Garg
Thanks Alex for the reply ..

>
> Can you run under -s -S and gdb step the *guest* and see where it ends
> up. The above error is usually indicative of the guest going off into
> the weeds somewhere because the hardware isn't what it expects.
>

So, after your reply that it might be because of the
hardware-mismatch, I kinda took a detour, and installed a arm32 "virt"
machine on qemu on a x86_64 host, as per the steps at
https://translatedcode.wordpress.com/2016/11/03/installing-debian-on-qemus-32-bit-arm-virt-board/

All went fine, and then I compiled rumprun on this "virt" guest.
Finally, upon running, I now get this :

##
ajay@debian:~/rumprun-arm32$ qemu-system-arm -machine virt -nographic
-kernel helloer.bin
qemu: fatal: Trying to execute code outside RAM or ROM at 0x0010

R00= R01= R02= R03=
R04= R05= R06= R07=
R08= R09= R10= R11=
R12= R13= R14= R15=0010
PSR=41d3 -Z-- A svc32
s00= s01= d00=
s02= s03= d01=
s04= s05= d02=
s06= s07= d03=
s08= s09= d04=
s10= s11= d05=
s12= s13= d06=
s14= s15= d07=
s16= s17= d08=
s18= s19= d09=
s20= s21= d10=
s22= s23= d11=
s24= s25= d12=
s26= s27= d13=
s28= s29= d14=
s30= s31= d15=
s32= s33= d16=
s34= s35= d17=
s36= s37= d18=
s38= s39= d19=
s40= s41= d20=
s42= s43= d21=
s44= s45= d22=
s46= s47= d23=
s48= s49= d24=
s50= s51= d25=
s52= s53= d26=
s54= s55= d27=
s56= s57= d28=
s58= s59= d29=
s60= s61= d30=
s62= s63= d31=
FPSCR: 
Aborted
##


Additionally, I compiled rumprun on a beaglebone-green-wireless (arm32
machine), and did the same test.
(Fortunately), I got the exact same stacktrace as above, so I guess
it's no more  an issue with hardware-mismatch now ..

Not sure if this is an issue with qemu now, or rumprun ...


Thanks and Regards,
Ajay



Re: [Qemu-devel] [PATCH v2 13/17] target/openrisc: convert to DisasContextBase

2018-04-09 Thread Richard Henderson
On 04/07/2018 04:19 AM, Emilio G. Cota wrote:
> While at it, set is_jmp to DISAS_NORETURN when generating
> an exception.
> 
> Cc: Stafford Horne 
> Signed-off-by: Emilio G. Cota 
> ---
>  target/openrisc/translate.c | 93 
> ++---
>  1 file changed, 46 insertions(+), 47 deletions(-)

Reviewed-by: Richard Henderson 


r~




Re: [Qemu-devel] [PATCH v2 12/17] target/s390x: convert to TranslatorOps

2018-04-09 Thread Richard Henderson
On 04/07/2018 04:19 AM, Emilio G. Cota wrote:
> Note: I looked into dropping dc->do_debug. However, I don't see
> an easy way to do it given that TOO_MANY is also valid
> when we just translate more than max_insns. Thus, the check
> for do_debug in "case DISAS_PC_CC_UPDATED" would still need
> additional state to know whether or not we came from
> breakpoint_check.
> 
> Acked-by: Cornelia Huck 
> Reviewed-by: David Hildenbrand 
> Tested-by:   David Hildenbrand 
> Cc: David Hildenbrand 
> Cc: Cornelia Huck 
> Cc: Alexander Graf 
> Cc: qemu-s3...@nongnu.org
> Signed-off-by: Emilio G. Cota 
> ---
>  target/s390x/translate.c | 162 
> +++
>  1 file changed, 80 insertions(+), 82 deletions(-)

Reviewed-by: Richard Henderson 


r~




Re: [Qemu-devel] [PATCH v2 11/17] target/s390x: convert to DisasContextBase

2018-04-09 Thread Richard Henderson
On 04/07/2018 04:19 AM, Emilio G. Cota wrote:
> Notes:
> 
> - Did not convert {num,max}_insns and is_jmp, since the corresponding
>   code will go away in the next patch.
> 
> - Avoided a checkpatch error in use_exit_tb.
> 
> - As suggested by David, (1) Drop ctx.pc and use
>   ctx.base.pc_next instead, and (2) Rename ctx.next_pc to
>   ctx.pc_tmp and add a comment about it.
> 
> Acked-by: Cornelia Huck 
> Suggested-by: David Hildenbrand 
> Reviewed-by:  David Hildenbrand 
> Cc: David Hildenbrand 
> Cc: Cornelia Huck 
> Cc: Alexander Graf 
> Cc: qemu-s3...@nongnu.org
> Signed-off-by: Emilio G. Cota 
> ---
>  target/s390x/translate.c | 148 
> ---
>  1 file changed, 76 insertions(+), 72 deletions(-)

Reviewed-by: Richard Henderson 


r~




Re: [Qemu-devel] [PULL 1/1] linux-user: fix preadv/pwritev offsets

2018-04-09 Thread Richard Henderson
On 04/10/2018 01:29 PM, Max Filippov wrote:
> On Mon, Apr 9, 2018 at 8:20 PM, Richard Henderson
>  wrote:
>> On 04/10/2018 12:17 PM, Max Filippov wrote:
>>> +uint64_t off = tlow |
>>> +((unsigned long long)thigh << TARGET_LONG_BITS / 2) <<
>>
>> There's a second instance of long long here; needs uint64_t.
> 
> Ehh, will fix.
> BTW, why is uint64_t here better than unsigned long long? Just
> shorter?

Because it's more explicit about the number of bits we're talking about.


r~



Re: [Qemu-devel] [PATCH v2 09/17] target/mips: convert to TranslatorOps

2018-04-09 Thread Richard Henderson
On 04/07/2018 04:19 AM, Emilio G. Cota wrote:
> Notes:
> 
> - DISAS_TOO_MANY replaces the former "break" in the translation loop.
>   However, care must be taken not to overwrite a previous condition
>   in is_jmp; that's why in translate_insn we first check is_jmp and
>   return if it's != DISAS_NEXT.
> 
> - Added an assert in translate_insn, before exiting due to an exception,
>   to make sure that is_jmp is set to DISAS_EXCP (the exception generation
>   function always sets it.)
> 
> - Added an assert for the default case in is_jmp's switch.
> 
> Cc: Aurelien Jarno 
> Cc: Yongbok Kim 
> Signed-off-by: Emilio G. Cota 
> ---
>  target/mips/translate.c | 227 
> 
>  1 file changed, 113 insertions(+), 114 deletions(-)

Reviewed-by: Richard Henderson 


r~




Re: [Qemu-devel] [PATCH v2 08/17] target/mips: use *ctx for DisasContext

2018-04-09 Thread Richard Henderson
On 04/07/2018 04:19 AM, Emilio G. Cota wrote:
> No changes to the logic here; this is just to make the diff
> that follows easier to read.
> 
> While at it, remove the unnecessary 'struct' in
> 'struct TranslationBlock'.
> 
> Note that checkpatch complains with a false positive:
>   ERROR: space prohibited after that '&' (ctx:WxW)
>   #75: FILE: target/mips/translate.c:20220:
>   +ctx->kscrexist = (env->CP0_Config4 >> CP0C4_KScrExist) & 0xff;
>   ^
> Reviewed-by: Philippe Mathieu-Daudé 
> Cc: Aurelien Jarno 
> Cc: Yongbok Kim 
> Signed-off-by: Emilio G. Cota 
> ---
>  target/mips/translate.c | 166 
> 
>  1 file changed, 84 insertions(+), 82 deletions(-)

Reviewed-by: Richard Henderson 


r~




Re: [Qemu-devel] [PATCH v2 07/17] target/mips: convert to DisasContextBase

2018-04-09 Thread Richard Henderson
On 04/07/2018 04:19 AM, Emilio G. Cota wrote:
> Reviewed-by: Philippe Mathieu-Daudé 
> Cc: Aurelien Jarno 
> Cc: Yongbok Kim 
> Signed-off-by: Emilio G. Cota 
> ---
>  target/mips/translate.c | 346 
> 
>  1 file changed, 175 insertions(+), 171 deletions(-)

Reviewed-by: Richard Henderson 


r~




Re: [Qemu-devel] [PATCH v2 06/17] target/mips: convert to DisasJumpType

2018-04-09 Thread Richard Henderson
On 04/07/2018 04:19 AM, Emilio G. Cota wrote:
> Reviewed-by: Philippe Mathieu-Daudé 
> Cc: Aurelien Jarno 
> Cc: Yongbok Kim 
> Signed-off-by: Emilio G. Cota 
> ---
>  target/mips/translate.c | 186 
> +++-
>  1 file changed, 91 insertions(+), 95 deletions(-)
> 
> diff --git a/target/mips/translate.c b/target/mips/translate.c
> index d05ee67..a133205 100644
> --- a/target/mips/translate.c
> +++ b/target/mips/translate.c
> @@ -36,6 +36,7 @@
>  
>  #include "target/mips/trace.h"
>  #include "trace-tcg.h"
> +#include "exec/translator.h"
>  #include "exec/log.h"
>  
>  #define MIPS_DEBUG_DISAS 0
> @@ -1439,7 +1440,7 @@ typedef struct DisasContext {
>  int mem_idx;
>  TCGMemOp default_tcg_memop_mask;
>  uint32_t hflags, saved_hflags;
> -int bstate;
> +DisasJumpType is_jmp;
>  target_ulong btarget;
>  bool ulri;
>  int kscrexist;
> @@ -1460,13 +1461,8 @@ typedef struct DisasContext {
>  bool abs2008;
>  } DisasContext;
>  
> -enum {
> -BS_NONE = 0, /* We go out of the TB without reaching a branch or an
> -  * exception condition */
> -BS_STOP = 1, /* We want to stop translation for any reason */
> -BS_BRANCH   = 2, /* We reached a branch condition */
> -BS_EXCP = 3, /* We reached an exception condition */
> -};
> +#define DISAS_STOP   DISAS_TARGET_0
> +#define DISAS_EXCP   DISAS_TARGET_1

Ok, well, there are existing bugs within the MIPS translation here, and we
might as well fix them within this patch set.

(1) The description for BS_STOP says we want to stop, but (what will become)
mips_tr_tb_stop calls goto_tb.

That's not correct, since we use that after e.g. helper_mtc0_hwrena,
MIPS_HFLAG_HWRENA_ULR is included in tb->flags, and therefore the next TB is
not fixed but depends on the actual value stored into hwrena.

We should instead use lookup_and_goto_ptr, which does a full lookup of the
processor state every time through.

(2) The BS_EXCP in generate_exception_err should map to DISAS_NORETURN, because
we do not return after raising an exception.

(3) Otherwise, the use of BS_EXCP has nothing to do with an exception; e.g.

> case 0:
> save_cpu_state(ctx, 1);
> gen_helper_mtc0_status(cpu_env, arg);
> /* BS_STOP isn't good enough here, hflags may have changed. */
> gen_save_pc(ctx->pc + 4);
> ctx->bstate = BS_EXCP;
> rn = "Status";
> break;

where we are in fact relying on (what will become) mips_tr_tb_stop to emit
exit_tb.  It would be better to name these uses DISAS_EXIT, which would match
e.g. target/arm.



r~



Re: [Qemu-devel] [RFC PATCH 0/8] virtio-net 1.1 userspace backend support

2018-04-09 Thread Jason Wang



On 2018年04月04日 20:53, w...@redhat.com wrote:

From: Wei Xu 

This is a prototype for virtio-net 1.1 support in userspace backend,
only minimum part are included in this RFC(roughly synced to v8 as
Jason and Tiwei's RFC).

Test has been done together with Tiwei's RFC guest virtio-net driver
patch, ping and a quick iperf test successfully.

Issues:
1. Rx performance of Iperf is much slower than TX.
 TX: 13-15Gb
 RX: 100-300Mb


This needs to be investigated. What's the pps of TX/RX then? (Maybe you 
can try Jen's dpdk code too).


Thanks



Missing:
- device and driver
- indirect descriptor
- migration
- vIOMMU support
- other revisions since v8
- see FIXME

Wei Xu (8):
   virtio: feature bit, data structure for packed ring
   virtio: memory cache for packed ring
   virtio: add empty check for packed ring
   virtio: add detach element for packed ring(1.1)
   virtio: notification tweak for packed ring
   virtio: flush/push support for packed ring
   virtio: get avail bytes check for packed ring
   virtio: queue pop support for packed ring

  hw/virtio/virtio.c | 618 +++--
  include/hw/virtio/virtio.h |  12 +-
  include/standard-headers/linux/virtio_config.h |   2 +
  3 files changed, 601 insertions(+), 31 deletions(-)






[Qemu-devel] [PULL 4/6] e1000: Migrate props via a temporary structure

2018-04-09 Thread Jason Wang
From: "Dr. David Alan Gilbert" 

Swing the tx.props out via a temporary structure, so in future patches
we can select what we're going to send.

Signed-off-by: Dr. David Alan Gilbert 
Signed-off-by: Jason Wang 
---
 hw/net/e1000.c | 27 +++
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index bb8ee2a..4e606d4 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -130,6 +130,7 @@ typedef struct E1000State_st {
 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
 uint32_t compat_flags;
 bool received_tx_tso;
+e1000x_txd_props mig_props;
 } E1000State;
 
 #define chkflag(x) (s->compat_flags & E1000_FLAG_##x)
@@ -1365,6 +1366,7 @@ static int e1000_pre_save(void *opaque)
 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
 }
 
+s->mig_props = s->tx.props;
 return 0;
 }
 
@@ -1393,12 +1395,13 @@ static int e1000_post_load(void *opaque, int version_id)
   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
 }
 
+s->tx.props = s->mig_props;
 if (!s->received_tx_tso) {
 /* We received only one set of offload data (tx.props)
  * and haven't got tx.tso_props.  The best we can do
  * is dupe the data.
  */
-s->tx.tso_props = s->tx.props;
+s->tx.tso_props = s->mig_props;
 }
 return 0;
 }
@@ -1496,20 +1499,20 @@ static const VMStateDescription vmstate_e1000 = {
 VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
 VMSTATE_UINT16(eecd_state.reading, E1000State),
 VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
-VMSTATE_UINT8(tx.props.ipcss, E1000State),
-VMSTATE_UINT8(tx.props.ipcso, E1000State),
-VMSTATE_UINT16(tx.props.ipcse, E1000State),
-VMSTATE_UINT8(tx.props.tucss, E1000State),
-VMSTATE_UINT8(tx.props.tucso, E1000State),
-VMSTATE_UINT16(tx.props.tucse, E1000State),
-VMSTATE_UINT32(tx.props.paylen, E1000State),
-VMSTATE_UINT8(tx.props.hdr_len, E1000State),
-VMSTATE_UINT16(tx.props.mss, E1000State),
+VMSTATE_UINT8(mig_props.ipcss, E1000State),
+VMSTATE_UINT8(mig_props.ipcso, E1000State),
+VMSTATE_UINT16(mig_props.ipcse, E1000State),
+VMSTATE_UINT8(mig_props.tucss, E1000State),
+VMSTATE_UINT8(mig_props.tucso, E1000State),
+VMSTATE_UINT16(mig_props.tucse, E1000State),
+VMSTATE_UINT32(mig_props.paylen, E1000State),
+VMSTATE_UINT8(mig_props.hdr_len, E1000State),
+VMSTATE_UINT16(mig_props.mss, E1000State),
 VMSTATE_UINT16(tx.size, E1000State),
 VMSTATE_UINT16(tx.tso_frames, E1000State),
 VMSTATE_UINT8(tx.sum_needed, E1000State),
-VMSTATE_INT8(tx.props.ip, E1000State),
-VMSTATE_INT8(tx.props.tcp, E1000State),
+VMSTATE_INT8(mig_props.ip, E1000State),
+VMSTATE_INT8(mig_props.tcp, E1000State),
 VMSTATE_BUFFER(tx.header, E1000State),
 VMSTATE_BUFFER(tx.data, E1000State),
 VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
-- 
2.7.4




[Qemu-devel] [PULL 3/6] e1000: wire new subsection to property

2018-04-09 Thread Jason Wang
From: "Dr. David Alan Gilbert" 

Wire the new subsection from the previous commit to a property
so we can turn it off easily.

Signed-off-by: Dr. David Alan Gilbert 
Signed-off-by: Jason Wang 
---
 hw/net/e1000.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index d399ce3..bb8ee2a 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -123,9 +123,11 @@ typedef struct E1000State_st {
 #define E1000_FLAG_AUTONEG_BIT 0
 #define E1000_FLAG_MIT_BIT 1
 #define E1000_FLAG_MAC_BIT 2
+#define E1000_FLAG_TSO_BIT 3
 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
+#define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
 uint32_t compat_flags;
 bool received_tx_tso;
 } E1000State;
@@ -1422,6 +1424,13 @@ static bool e1000_full_mac_needed(void *opaque)
 return chkflag(MAC);
 }
 
+static bool e1000_tso_state_needed(void *opaque)
+{
+E1000State *s = opaque;
+
+return chkflag(TSO);
+}
+
 static const VMStateDescription vmstate_e1000_mit_state = {
 .name = "e1000/mit_state",
 .version_id = 1,
@@ -1452,6 +1461,7 @@ static const VMStateDescription 
vmstate_e1000_tx_tso_state = {
 .name = "e1000/tx_tso_state",
 .version_id = 1,
 .minimum_version_id = 1,
+.needed = e1000_tso_state_needed,
 .post_load = e1000_tx_tso_post_load,
 .fields = (VMStateField[]) {
 VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
@@ -1677,6 +1687,8 @@ static Property e1000_properties[] = {
 compat_flags, E1000_FLAG_MIT_BIT, true),
 DEFINE_PROP_BIT("extra_mac_registers", E1000State,
 compat_flags, E1000_FLAG_MAC_BIT, true),
+DEFINE_PROP_BIT("migrate_tso_props", E1000State,
+compat_flags, E1000_FLAG_TSO_BIT, true),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
2.7.4




[Qemu-devel] [PULL 6/6] e1000: Old machine types, turn new subsection off

2018-04-09 Thread Jason Wang
From: "Dr. David Alan Gilbert" 

Turn the newly added subsection off for old machine types

Signed-off-by: Dr. David Alan Gilbert 
Signed-off-by: Jason Wang 
---
 include/hw/compat.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/hw/compat.h b/include/hw/compat.h
index bc9e3a6..13242b8 100644
--- a/include/hw/compat.h
+++ b/include/hw/compat.h
@@ -14,6 +14,10 @@
 .driver   = "vhost-user-blk-pci",\
 .property = "vectors",\
 .value= "2",\
+},{\
+.driver   = "e1000",\
+.property = "migrate_tso_props",\
+.value= "off",\
 },
 
 #define HW_COMPAT_2_10 \
-- 
2.7.4




[Qemu-devel] [PULL 5/6] e1000: Choose which set of props to migrate

2018-04-09 Thread Jason Wang
From: "Dr. David Alan Gilbert" 

When we're using the subsection we migrate both
the 'props' and 'tso_props' data; when we're not using
the subsection (to migrate to 2.11 or old machine types) we've
got to choose what to migrate in the main structure.

If we're using the subsection migrate 'props' in the main structure.
If we're not using the subsection then migrate the last one
that changed, which gives behaviour similar to the old behaviour.

Signed-off-by: Dr. David Alan Gilbert 
Signed-off-by: Jason Wang 
---
 hw/net/e1000.c | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index 4e606d4..13a9494 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -130,6 +130,7 @@ typedef struct E1000State_st {
 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
 uint32_t compat_flags;
 bool received_tx_tso;
+bool use_tso_for_migration;
 e1000x_txd_props mig_props;
 } E1000State;
 
@@ -622,9 +623,11 @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
 if (dtype == E1000_TXD_CMD_DEXT) {/* context descriptor */
 if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
 e1000x_read_tx_ctx_descr(xp, >tso_props);
+s->use_tso_for_migration = 1;
 tp->tso_frames = 0;
 } else {
 e1000x_read_tx_ctx_descr(xp, >props);
+s->use_tso_for_migration = 0;
 }
 return;
 } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
@@ -1366,7 +1369,20 @@ static int e1000_pre_save(void *opaque)
 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
 }
 
-s->mig_props = s->tx.props;
+/* Decide which set of props to migrate in the main structure */
+if (chkflag(TSO) || !s->use_tso_for_migration) {
+/* Either we're migrating with the extra subsection, in which
+ * case the mig_props is always 'props' OR
+ * we've not got the subsection, but 'props' was the last
+ * updated.
+ */
+s->mig_props = s->tx.props;
+} else {
+/* We're not using the subsection, and 'tso_props' was
+ * the last updated.
+ */
+s->mig_props = s->tx.tso_props;
+}
 return 0;
 }
 
-- 
2.7.4




[Qemu-devel] [PULL 1/6] e1000: Convert v3 fields to subsection

2018-04-09 Thread Jason Wang
From: "Dr. David Alan Gilbert" 

A bunch of new TSO fields were introduced by d62644b4 and this bumped
the VMState version; however it's easier for those trying to keep
backwards migration compatibility if these fields are added in a
subsection instead.

Move the new fields to a subsection.

Since this was added after 2.11, this change will only affect
compatbility with 2.12-rc0.

Signed-off-by: Dr. David Alan Gilbert 
Signed-off-by: Jason Wang 
---
 hw/net/e1000.c | 34 ++
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index c7f1695..24e9a4a 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -1433,9 +1433,29 @@ static const VMStateDescription 
vmstate_e1000_full_mac_state = {
 }
 };
 
+static const VMStateDescription vmstate_e1000_tx_tso_state = {
+.name = "e1000/tx_tso_state",
+.version_id = 1,
+.minimum_version_id = 1,
+.fields = (VMStateField[]) {
+VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
+VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
+VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
+VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
+VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
+VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
+VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
+VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
+VMSTATE_UINT16(tx.tso_props.mss, E1000State),
+VMSTATE_INT8(tx.tso_props.ip, E1000State),
+VMSTATE_INT8(tx.tso_props.tcp, E1000State),
+VMSTATE_END_OF_LIST()
+}
+};
+
 static const VMStateDescription vmstate_e1000 = {
 .name = "e1000",
-.version_id = 3,
+.version_id = 2,
 .minimum_version_id = 1,
 .pre_save = e1000_pre_save,
 .post_load = e1000_post_load,
@@ -1508,22 +1528,12 @@ static const VMStateDescription vmstate_e1000 = {
 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
-VMSTATE_UINT8_V(tx.tso_props.ipcss, E1000State, 3),
-VMSTATE_UINT8_V(tx.tso_props.ipcso, E1000State, 3),
-VMSTATE_UINT16_V(tx.tso_props.ipcse, E1000State, 3),
-VMSTATE_UINT8_V(tx.tso_props.tucss, E1000State, 3),
-VMSTATE_UINT8_V(tx.tso_props.tucso, E1000State, 3),
-VMSTATE_UINT16_V(tx.tso_props.tucse, E1000State, 3),
-VMSTATE_UINT32_V(tx.tso_props.paylen, E1000State, 3),
-VMSTATE_UINT8_V(tx.tso_props.hdr_len, E1000State, 3),
-VMSTATE_UINT16_V(tx.tso_props.mss, E1000State, 3),
-VMSTATE_INT8_V(tx.tso_props.ip, E1000State, 3),
-VMSTATE_INT8_V(tx.tso_props.tcp, E1000State, 3),
 VMSTATE_END_OF_LIST()
 },
 .subsections = (const VMStateDescription*[]) {
 _e1000_mit_state,
 _e1000_full_mac_state,
+_e1000_tx_tso_state,
 NULL
 }
 };
-- 
2.7.4




[Qemu-devel] [PULL 2/6] e1000: Dupe offload data on reading old stream

2018-04-09 Thread Jason Wang
From: "Dr. David Alan Gilbert" 

Old QEMUs only had one set of offload data;  when we only receive
one lot, dupe the received data - that should give us about the
same bug level as the old version.

Signed-off-by: Dr. David Alan Gilbert 
Signed-off-by: Jason Wang 
---
 hw/net/e1000.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index 24e9a4a..d399ce3 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -127,6 +127,7 @@ typedef struct E1000State_st {
 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
 uint32_t compat_flags;
+bool received_tx_tso;
 } E1000State;
 
 #define chkflag(x) (s->compat_flags & E1000_FLAG_##x)
@@ -1390,6 +1391,20 @@ static int e1000_post_load(void *opaque, int version_id)
   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
 }
 
+if (!s->received_tx_tso) {
+/* We received only one set of offload data (tx.props)
+ * and haven't got tx.tso_props.  The best we can do
+ * is dupe the data.
+ */
+s->tx.tso_props = s->tx.props;
+}
+return 0;
+}
+
+static int e1000_tx_tso_post_load(void *opaque, int version_id)
+{
+E1000State *s = opaque;
+s->received_tx_tso = true;
 return 0;
 }
 
@@ -1437,6 +1452,7 @@ static const VMStateDescription 
vmstate_e1000_tx_tso_state = {
 .name = "e1000/tx_tso_state",
 .version_id = 1,
 .minimum_version_id = 1,
+.post_load = e1000_tx_tso_post_load,
 .fields = (VMStateField[]) {
 VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
 VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
-- 
2.7.4




[Qemu-devel] [PULL 0/6] Net patches

2018-04-09 Thread Jason Wang
The following changes since commit 915d34c5f99b0ab91517c69f54272bfdb6ca2b32:

  Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging 
(2018-04-09 17:29:10 +0100)

are available in the git repository at:

  https://github.com/jasowang/qemu.git tags/net-pull-request

for you to fetch changes up to 5f523530915e57a14ffb8c00e22252bfa557441c:

  e1000: Old machine types, turn new subsection off (2018-04-10 11:30:04 +0800)



A series from David that switches to use subsection instead of version
bumping for e1000 to keep migration compatibility for old
versions. This will ease the downstream maintaining.

Please merge.

Thanks


Dr. David Alan Gilbert (6):
  e1000: Convert v3 fields to subsection
  e1000: Dupe offload data on reading old stream
  e1000: wire new subsection to property
  e1000: Migrate props via a temporary structure
  e1000: Choose which set of props to migrate
  e1000: Old machine types, turn new subsection off

 hw/net/e1000.c  | 103 
 include/hw/compat.h |   4 ++
 2 files changed, 84 insertions(+), 23 deletions(-)




Re: [Qemu-devel] [RFC PATCH v2] spapr: Support ibm, dynamic-memory-v2 property

2018-04-09 Thread David Gibson
On Mon, Apr 09, 2018 at 11:55:38AM +0530, Bharata B Rao wrote:
> The new property ibm,dynamic-memory-v2 allows memory to be represented
> in a more compact manner in device tree.

I still need to look at this in more detail, but to start with:
what's the rationale for this new format?

It's more compact, but why do we care?  The embedded people always
whinge about the size of the deivce tree, but I didn't think that was
really a concern with PAPR.

> 
> Signed-off-by: Bharata B Rao 
> ---
> v1: https://lists.nongnu.org/archive/html/qemu-devel/2018-03/msg01788.html
> Changes in v1:
> - Minor cleanups in the error paths
> - Rebased on top of ppc-for-2.13
> 
>  docs/specs/ppc-spapr-hotplug.txt |  19 +++
>  hw/ppc/spapr.c   | 257 
> ---
>  include/hw/ppc/spapr.h   |   1 +
>  include/hw/ppc/spapr_ovec.h  |   1 +
>  4 files changed, 233 insertions(+), 45 deletions(-)
> 
> diff --git a/docs/specs/ppc-spapr-hotplug.txt 
> b/docs/specs/ppc-spapr-hotplug.txt
> index f57e2a09c6..cc7833108e 100644
> --- a/docs/specs/ppc-spapr-hotplug.txt
> +++ b/docs/specs/ppc-spapr-hotplug.txt
> @@ -387,4 +387,23 @@ Each LMB list entry consists of the following elements:
>  - A 32bit flags word. The bit at bit position 0x0008 defines whether
>the LMB is assigned to the the partition as of boot time.
>  
> +ibm,dynamic-memory-v2
> +
> +This property describes the dynamically reconfigurable memory. This is
> +an alternate and newer way to describe dyanamically reconfigurable memory.
> +It is a property encoded array that has an integer N (the number of
> +LMB set entries) followed by N LMB set entries. There is an LMB set entry
> +for each sequential group of LMBs that share common attributes.
> +
> +Each LMB set entry consists of the following elements:
> +
> +- Number of sequential LMBs in the entry represented by a 32bit integer.
> +- Logical address of the first LMB in the set encoded as a 64bit integer.
> +- DRC index of the first LMB in the set.
> +- Associativity list index that is used as an index into
> +  ibm,associativity-lookup-arrays property described earlier. This
> +  is used to retrieve the right associativity list to be used for all
> +  the LMBs in this set.
> +- A 32bit flags word that applies to all the LMBs in the set.
> +
>  [1] http://thread.gmane.org/gmane.linux.ports.ppc.embedded/75350/focus=106867
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 3ffadd6ac7..4a24fac38c 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -669,63 +669,139 @@ static uint32_t 
> spapr_pc_dimm_node(MemoryDeviceInfoList *list, ram_addr_t addr)
>  return -1;
>  }
>  
> -/*
> - * Adds ibm,dynamic-reconfiguration-memory node.
> - * Refer to docs/specs/ppc-spapr-hotplug.txt for the documentation
> - * of this device tree node.
> - */
> -static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
> +struct of_drconf_cell_v2 {
> + uint32_t seq_lmbs;
> + uint64_t base_addr;
> + uint32_t drc_index;
> + uint32_t aa_index;
> + uint32_t flags;
> +} __attribute__((packed));
> +
> +#define SPAPR_DRCONF_CELL_SIZE 6
> +
> +/* ibm,dynamic-memory-v2 */
> +static int spapr_populate_drmem_v2(sPAPRMachineState *spapr, void *fdt,
> +   int offset, MemoryDeviceInfoList *dimms)
>  {
> -MachineState *machine = MACHINE(spapr);
> -int ret, i, offset;
> -uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
> -uint32_t prop_lmb_size[] = {0, cpu_to_be32(lmb_size)};
> -uint32_t hotplug_lmb_start = spapr->hotplug_memory.base / lmb_size;
> -uint32_t nr_lmbs = (spapr->hotplug_memory.base +
> -   memory_region_size(>hotplug_memory.mr)) /
> -   lmb_size;
>  uint32_t *int_buf, *cur_index, buf_len;
> -int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
> -MemoryDeviceInfoList *dimms = NULL;
> +int ret;
> +uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
> +uint64_t addr, cur_addr, size;
> +uint32_t nr_boot_lmbs = (spapr->hotplug_memory.base / lmb_size);
> +uint64_t mem_end = spapr->hotplug_memory.base +
> +   memory_region_size(>hotplug_memory.mr);
> +uint32_t node, nr_entries = 0;
> +sPAPRDRConnector *drc;
> +typedef struct drconf_cell_queue {
> +struct of_drconf_cell_v2 cell;
> +QSIMPLEQ_ENTRY(drconf_cell_queue) entry;
> +} drconf_cell_queue;
> +QSIMPLEQ_HEAD(, drconf_cell_queue) drconf_queue
> += QSIMPLEQ_HEAD_INITIALIZER(drconf_queue);
> +drconf_cell_queue *elem, *next;
> +MemoryDeviceInfoList *info;
>  
> -/*
> - * Don't create the node if there is no hotpluggable memory
> - */
> -if (machine->ram_size == machine->maxram_size) {
> -return 0;
> -}
> +/* Entry to cover RAM and the gap area */
> +elem = g_malloc0(sizeof(drconf_cell_queue));
> +elem->cell.seq_lmbs = cpu_to_be32(nr_boot_lmbs);
> 

Re: [Qemu-devel] [PULL 1/1] linux-user: fix preadv/pwritev offsets

2018-04-09 Thread Max Filippov
On Mon, Apr 9, 2018 at 8:20 PM, Richard Henderson
 wrote:
> On 04/10/2018 12:17 PM, Max Filippov wrote:
>> +uint64_t off = tlow |
>> +((unsigned long long)thigh << TARGET_LONG_BITS / 2) <<
>
> There's a second instance of long long here; needs uint64_t.

Ehh, will fix.
BTW, why is uint64_t here better than unsigned long long? Just
shorter?

-- 
Thanks.
-- Max



Re: [Qemu-devel] [PATCH v2 05/17] target/sparc: convert to TranslatorOps

2018-04-09 Thread Richard Henderson
On 04/07/2018 04:19 AM, Emilio G. Cota wrote:
> Notes:
> 
> - Moved the cross-page check from the end of translate_insn to
>   init_disas_context.
> 
> Tested-by: Mark Cave-Ayland 
> Cc: Mark Cave-Ayland 
> Cc: Artyom Tarasenko 
> Signed-off-by: Emilio G. Cota 
> ---
>  target/sparc/translate.c | 174 
> +++
>  1 file changed, 86 insertions(+), 88 deletions(-)

Reviewed-by: Richard Henderson 


r~




Re: [Qemu-devel] [PATCH v2 04/17] target/sparc: convert to DisasContextBase

2018-04-09 Thread Richard Henderson
On 04/07/2018 04:19 AM, Emilio G. Cota wrote:
> Notes:
> 
> - pc and npc are left unmodified, since they can point to out-of-TB
>   jump targets.
> 
> - Got rid of last_pc in gen_intermediate_code(), using base.pc_next
>   instead. Only update pc_next (1) on a breakpoint (so that tb->size
>   includes the insn), and (2) after reading the current instruction
>   from memory. This allows us to use base.pc_next in the BP check,
>   which is what the translator loop does.
> 
> Cc: Mark Cave-Ayland 
> Cc: Artyom Tarasenko 
> Signed-off-by: Emilio G. Cota 
> ---
>  target/sparc/translate.c | 92 
> +++-
>  1 file changed, 45 insertions(+), 47 deletions(-)

Reviewed-by: Richard Henderson 


r~



Re: [Qemu-devel] [PULL 1/1] linux-user: fix preadv/pwritev offsets

2018-04-09 Thread Richard Henderson
On 04/10/2018 12:17 PM, Max Filippov wrote:
> +uint64_t off = tlow |
> +((unsigned long long)thigh << TARGET_LONG_BITS / 2) <<

There's a second instance of long long here; needs uint64_t.


r~



Re: [Qemu-devel] [PATCH for-2.12] monitor: bind dispatch bh to iohandler context

2018-04-09 Thread Stefan Hajnoczi
On Tue, Apr 03, 2018 at 01:01:15PM +0800, Peter Xu wrote:

I have changed my mind about this: this patch is necessary.  It is
needed in QEMU 2.12.

> Eric Auger reported the problem days ago that OOB broke ARM when running
> with libvirt:
> 
> http://lists.gnu.org/archive/html/qemu-devel/2018-03/msg06231.html
> 
> This patch fixes the problem.

The general problem is that the monitor should only dispatch commands
from main_loop_wait().  That's how the code worked before OOB.  After
OOB commands are also dispatched from aio_poll().  This causes
unexpected behavior.

Here is another scenario that this patch fixes:

If the monitor IOThread parses a command while a monitor command is in
aio_poll() then qmp_dispatch() is re-entered.  Monitor command code is
not designed to handle this!

> It's not really needed now since we have turned OOB off now, but it's
> still a bug fix, and it'll start to work when we turn OOB on for ARM.

No, it is needed even when OOB is disabled.  Consider the case where the
chardev handler is invoked by main_loop_wait() and command parsing
completes.  qmp_dispatcher_bh will be marked scheduled=1.

Before qmp_dispatcher_bh executes another fd handler runs and invokes
aio_poll().  Now qmp_dispatcher_bh runs from aio_poll() instead of from
main_loop_wait().  Before OOB this was impossible and it's likely to
hang or crash.

> The problem was that the monitor dispatcher bottom half was bound to
> qemu_aio_context, but that context seems to be for block only.

s/but that context seems to be for block only/which is used for nested
event loops/

It's not true that qemu_aio_context is used for block only.  All
qemu_bh_new() users (many emulated devices, for example) use
qemu_aio_context, as well as TPM and 9p.

> For the
> rest of the QEMU world we should be using iohandler context.  So
> assigning monitor dispatcher bottom half to that context.

TPM and 9p also use nested event loops, they need to use
qemu_aio_context.

The choice depends on whether or not nested event loops are needed.
Code that isn't designed for nested event loops has to use iohandler_ctx
(usually via qemu_set_fd_handler()).  Code that is designed for nested
event loops has to use qemu_aio_context.


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH v8 19/23] SiFive RISC-V UART Device

2018-04-09 Thread Antony Pavlov
On Sat,  3 Mar 2018 02:51:47 +1300
Michael Clark  wrote:

> QEMU model of the UART on the SiFive E300 and U500 series SOCs.
> BBL supports the SiFive UART for early console access via the SBI
> (Supervisor Binary Interface) and the linux kernel SBI console.
> 
> The SiFive UART implements the pre qom legacy interface consistent
> with the 16550a UART in 'hw/char/serial.c'.
> 
> Acked-by: Richard Henderson 
> Signed-off-by: Stefan O'Rear 
> Signed-off-by: Palmer Dabbelt 
> Signed-off-by: Michael Clark 
> ---
>  hw/riscv/sifive_uart.c | 176 
> +
>  include/hw/riscv/sifive_uart.h |  71 +
>  2 files changed, 247 insertions(+)
>  create mode 100644 hw/riscv/sifive_uart.c
>  create mode 100644 include/hw/riscv/sifive_uart.h
> 
> diff --git a/hw/riscv/sifive_uart.c b/hw/riscv/sifive_uart.c
> new file mode 100644
> index 000..b0c3798
> --- /dev/null
> +++ b/hw/riscv/sifive_uart.c
> @@ -0,0 +1,176 @@
> +/*
> + * QEMU model of the UART on the SiFive E300 and U500 series SOCs.
> + *
> + * Copyright (c) 2016 Stefan O'Rear
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2 or later, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along 
> with
> + * this program.  If not, see .
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qapi/error.h"
> +#include "hw/sysbus.h"
> +#include "chardev/char.h"
> +#include "chardev/char-fe.h"
> +#include "target/riscv/cpu.h"
> +#include "hw/riscv/sifive_uart.h"
>
> +/*
> + * Not yet implemented:
> + *
> + * Transmit FIFO using "qemu/fifo8.h"
> + * SIFIVE_UART_IE_TXWM interrupts
> + * SIFIVE_UART_IE_RXWM interrupts must honor fifo watermark
> + * Rx FIFO watermark interrupt trigger threshold
> + * Tx FIFO watermark interrupt trigger threshold.
> + */
> +
> +static void update_irq(SiFiveUARTState *s)
> +{
> +int cond = 0;
> +if ((s->ie & SIFIVE_UART_IE_RXWM) && s->rx_fifo_len) {
> +cond = 1;
> +}
> +if (cond) {
> +qemu_irq_raise(s->irq);
> +} else {
> +qemu_irq_lower(s->irq);
> +}
> +}
> +
> +static uint64_t
> +uart_read(void *opaque, hwaddr addr, unsigned int size)
> +{
> +SiFiveUARTState *s = opaque;
> +unsigned char r;
> +switch (addr) {
> +case SIFIVE_UART_RXFIFO:
> +if (s->rx_fifo_len) {
> +r = s->rx_fifo[0];
> +memmove(s->rx_fifo, s->rx_fifo + 1, s->rx_fifo_len - 1);
> +s->rx_fifo_len--;
> +qemu_chr_fe_accept_input(>chr);
> +update_irq(s);
> +return r;
> +}
> +return 0x8000;
> +
> +case SIFIVE_UART_TXFIFO:
> +return 0; /* Should check tx fifo */
> +case SIFIVE_UART_IE:
> +return s->ie;
> +case SIFIVE_UART_IP:
> +return s->rx_fifo_len ? SIFIVE_UART_IP_RXWM : 0;
> +case SIFIVE_UART_TXCTRL:
> +return s->txctrl;
> +case SIFIVE_UART_RXCTRL:
> +return s->rxctrl;
> +case SIFIVE_UART_DIV:
> +return s->div;
> +}
> +
> +hw_error("%s: bad read: addr=0x%x\n",
> +__func__, (int)addr);
> +return 0;
> +}
> +
> +static void
> +uart_write(void *opaque, hwaddr addr,
> +   uint64_t val64, unsigned int size)
> +{
> +SiFiveUARTState *s = opaque;
> +uint32_t value = val64;
> +unsigned char ch = value;
> +
> +switch (addr) {
> +case SIFIVE_UART_TXFIFO:
> +qemu_chr_fe_write(>chr, , 1);
> +return;
> +case SIFIVE_UART_IE:
> +s->ie = val64;
> +update_irq(s);
> +return;
> +case SIFIVE_UART_TXCTRL:
> +s->txctrl = val64;
> +return;
> +case SIFIVE_UART_RXCTRL:
> +s->rxctrl = val64;
> +return;
> +case SIFIVE_UART_DIV:
> +s->div = val64;
> +return;
> +}
> +hw_error("%s: bad write: addr=0x%x v=0x%x\n",
> +__func__, (int)addr, (int)value);
> +}
> +
> +static const MemoryRegionOps uart_ops = {
> +.read = uart_read,
> +.write = uart_write,
> +.endianness = DEVICE_NATIVE_ENDIAN,
> +.valid = {
> +.min_access_size = 4,
> +.max_access_size = 4
> +}
> +};
> +
> +static void uart_rx(void *opaque, const uint8_t *buf, int size)
> +{
> +SiFiveUARTState *s = opaque;
> +
> +/* Got a byte.  */
> +if (s->rx_fifo_len >= sizeof(s->rx_fifo)) {
> +printf("WARNING: UART dropped char.\n");
> +return;
> +}
> +

[Qemu-devel] [PULL 0/1] linux-user: fix file offset for preadv/pwritev

2018-04-09 Thread Max Filippov
Hi Peter,

please pull the following linux-user fix for QEMU-2.12.

The following changes since commit 9abfc88af3ffd3b33c7fab4471da86462ee71d95:

  Merge remote-tracking branch 'remotes/xtensa/tags/20180402-xtensa' into 
staging (2018-04-03 19:02:46 +0100)

are available in the git repository at:

  git://github.com/OSLL/qemu-xtensa.git tags/20180409-xtensa

for you to fetch changes up to 9ac225171c9d6b2a1cba35a94ae7eeaa0106cf7d:

  linux-user: fix preadv/pwritev offsets (2018-04-09 18:57:49 -0700)


Fix file offset for preadv/pwritev linux-user syscalls.


Max Filippov (1):
  linux-user: fix preadv/pwritev offsets

 linux-user/syscall.c | 27 +--
 1 file changed, 25 insertions(+), 2 deletions(-)

-- 
Thanks.
-- Max



[Qemu-devel] [PULL 1/1] linux-user: fix preadv/pwritev offsets

2018-04-09 Thread Max Filippov
preadv/pwritev accept low and high parts of file offset in two separate
parameters. When host bitness doesn't match guest bitness these parts
must be appropriately recombined.
Introduce target_to_host_low_high that does this recombination and use
it in preadv/pwritev syscalls.

This fixes glibc testsuite test misc/tst-preadvwritev64.

Reviewed-by: Laurent Vivier 
Reviewed-by: Richard Henderson 
Signed-off-by: Max Filippov 
---
 linux-user/syscall.c | 27 +--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 5ef517613577..0eab5cc6ade1 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -3386,6 +3386,23 @@ static abi_long do_getsockopt(int sockfd, int level, int 
optname,
 return ret;
 }
 
+/* Convert target low/high pair representing file offset into the host
+ * low/high pair. This function doesn't handle offsets bigger than 64 bits
+ * as the kernel doesn't handle them either.
+ */
+static void target_to_host_low_high(abi_ulong tlow,
+abi_ulong thigh,
+unsigned long *hlow,
+unsigned long *hhigh)
+{
+uint64_t off = tlow |
+((unsigned long long)thigh << TARGET_LONG_BITS / 2) <<
+TARGET_LONG_BITS / 2;
+
+*hlow = off;
+*hhigh = (off >> HOST_LONG_BITS / 2) >> HOST_LONG_BITS / 2;
+}
+
 static struct iovec *lock_iovec(int type, abi_ulong target_addr,
 abi_ulong count, int copy)
 {
@@ -10449,7 +10466,10 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
arg1,
 {
 struct iovec *vec = lock_iovec(VERIFY_WRITE, arg2, arg3, 0);
 if (vec != NULL) {
-ret = get_errno(safe_preadv(arg1, vec, arg3, arg4, arg5));
+unsigned long low, high;
+
+target_to_host_low_high(arg4, arg5, , );
+ret = get_errno(safe_preadv(arg1, vec, arg3, low, high));
 unlock_iovec(vec, arg2, arg3, 1);
 } else {
 ret = -host_to_target_errno(errno);
@@ -10462,7 +10482,10 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
arg1,
 {
 struct iovec *vec = lock_iovec(VERIFY_READ, arg2, arg3, 1);
 if (vec != NULL) {
-ret = get_errno(safe_pwritev(arg1, vec, arg3, arg4, arg5));
+unsigned long low, high;
+
+target_to_host_low_high(arg4, arg5, , );
+ret = get_errno(safe_pwritev(arg1, vec, arg3, low, high));
 unlock_iovec(vec, arg2, arg3, 0);
 } else {
 ret = -host_to_target_errno(errno);
-- 
2.11.0




Re: [Qemu-devel] [PATCH] scripts/qemugdb: support coroutine backtrace in coredumps

2018-04-09 Thread Stefan Hajnoczi
On Mon, Apr 09, 2018 at 04:01:24PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> 04.04.2018 13:34, Stefan Hajnoczi wrote:
> > Use the 'select-frame' GDB command to switch stacks instead of manually
> > setting the debugged thread's registers (this only works when debugging
> > a live process, not in a coredump).
> > 
> > Cc: Vladimir Sementsov-Ogievskiy 
> > Signed-off-by: Stefan Hajnoczi 
> > ---
> > Vladimir: Does this work for you?
> > 
> >   scripts/qemugdb/coroutine.py | 6 ++
> >   1 file changed, 2 insertions(+), 4 deletions(-)
> > 
> > diff --git a/scripts/qemugdb/coroutine.py b/scripts/qemugdb/coroutine.py
> > index ab699794ab..ed96434aee 100644
> > --- a/scripts/qemugdb/coroutine.py
> > +++ b/scripts/qemugdb/coroutine.py
> > @@ -77,13 +77,11 @@ def bt_jmpbuf(jmpbuf):
> >   for i in regs:
> >   old[i] = gdb.parse_and_eval('(uint64_t)$%s' % i)
> > -for i in regs:
> > -gdb.execute('set $%s = %s' % (i, regs[i]))
> > +gdb.execute('select-frame %s %s' % (regs['rsp'], regs['rip']))
> >   gdb.execute('bt')
> > -for i in regs:
> > -gdb.execute('set $%s = %s' % (i, old[i]))
> > +gdb.execute('select-frame %s %s' % (old['rsp'], old['rip']))
> >   def coroutine_to_jmpbuf(co):
> >   coroutine_pointer = 
> > co.cast(gdb.lookup_type('CoroutineUContext').pointer())
> 
> strange, but it doesn't work. it prints the same backtrace, as if I just
> call bt.
> (I applied it onto "[PATCH 2/4] scripts/qemugdb: improve "qemu coroutine"
> command")
> 
> also, I can just call select-frame with zeros or any garbage in gdb, with
> same effect:
> (gdb) select-frame 0 0
> (gdb) bt
> 
> and get same backtrace.
> 
> so, bt command not related to selected frame. also, up and down commands
> don't help too, they go to frames in current bt, instead of moving
> relatively to selected frame.

I wonder what the point of select-frame is then...

I have CCed the GDB mailing list.  Maybe someone can help us.  Context:

QEMU implements coroutines using jmpbuf.  We'd like to print coroutine
call stacks in GDB and have a script that works when a process is being
debugged (it sets the registers).

Now we'd like to extend the script to work on core dumps where it's not
possible to set registers (since there is no process being debugged).

Is there a way to backtrace an arbitrary call stack in a core dump?

Stefan


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH for-2.12 v2] iothread: workaround glib bug which hangs qmp-test

2018-04-09 Thread Stefan Hajnoczi
On Mon, Apr 09, 2018 at 04:39:56PM +0800, Peter Xu wrote:
> Free the AIO context earlier than the GMainContext (if we have) to
> workaround a glib2 bug that GSource context pointer is not cleared even
> if the context has already been destroyed (while it should).
> 
> The patch itself only changed the order to destroy the objects, no
> functional change at all. Without this workaround, we can encounter
> qmp-test hang with oob (and possibly any other use case when iothread is
> used with GMainContexts):
> 
>   #0  0x7f35ffe45334 in __lll_lock_wait () from /lib64/libpthread.so.0
>   #1  0x7f35ffe405d8 in _L_lock_854 () from /lib64/libpthread.so.0
>   #2  0x7f35ffe404a7 in pthread_mutex_lock () from /lib64/libpthread.so.0
>   #3  0x7f35fc5b9c9d in g_source_unref_internal (source=0x24f0600, 
> context=0x7f35f960, have_lock=0) at gmain.c:1685
>   #4  0x00aa6672 in aio_context_unref (ctx=0x24f0600) at 
> /root/qemu/util/async.c:497
>   #5  0x0065851c in iothread_instance_finalize (obj=0x24f0380) at 
> /root/qemu/iothread.c:129
>   #6  0x00962d79 in object_deinit (obj=0x24f0380, type=0x242e960) at 
> /root/qemu/qom/object.c:462
>   #7  0x00962e0d in object_finalize (data=0x24f0380) at 
> /root/qemu/qom/object.c:476
>   #8  0x00964146 in object_unref (obj=0x24f0380) at 
> /root/qemu/qom/object.c:924
>   #9  0x00965880 in object_finalize_child_property (obj=0x24ec640, 
> name=0x24efca0 "mon_iothread", opaque=0x24f0380) at 
> /root/qemu/qom/object.c:1436
>   #10 0x00962c33 in object_property_del_child (obj=0x24ec640, 
> child=0x24f0380, errp=0x0) at /root/qemu/qom/object.c:436
>   #11 0x00962d26 in object_unparent (obj=0x24f0380) at 
> /root/qemu/qom/object.c:455
>   #12 0x00658f00 in iothread_destroy (iothread=0x24f0380) at 
> /root/qemu/iothread.c:365
>   #13 0x004c67a8 in monitor_cleanup () at /root/qemu/monitor.c:4663
>   #14 0x00669e27 in main (argc=16, argv=0x7ffc8b1ae2f8, 
> envp=0x7ffc8b1ae380) at /root/qemu/vl.c:4749
> 
> The glib2 bug is fixed in commit 26056558b ("gmain: allow
> g_source_get_context() on destroyed sources", 2012-07-30), the first
> good version is glib2 2.33.10. So this error will be encountered before
> any glib version older than 2.33.10 (not including). Since we are still
> supporting even older glib versions, we may want this workaround.
> 
> Let's make sure we destroy the GSources first before its owner context
> until we drop support for glibs older than 2.33.10.
> 
> Signed-off-by: Peter Xu 
> ---
> v2:
> - verified the root cause of the bug, and enhance commit message and
>   comments correspondingly
> ---
>  iothread.c | 18 ++
>  1 file changed, 14 insertions(+), 4 deletions(-)

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH v4] linux-user: fix preadv/pwritev offsets

2018-04-09 Thread Max Filippov
On Mon, Apr 9, 2018 at 5:23 PM, Richard Henderson
 wrote:
> On 04/06/2018 11:36 AM, Max Filippov wrote:
>> +static void target_to_host_low_high(abi_ulong tlow,
>> +abi_ulong thigh,
>> +unsigned long *hlow,
>> +unsigned long *hhigh)
>> +{
>> +unsigned long long off = tlow |
>> +((unsigned long long)thigh << TARGET_LONG_BITS / 2) <<
>> +TARGET_LONG_BITS / 2;
>
> Use uint64_t instead of unsigned long long.

Ok.

>> +
>> +*hlow = (unsigned long)off;
>> +*hhigh = (unsigned long)((off >> HOST_LONG_BITS / 2) >>
>> + HOST_LONG_BITS / 2);
>
> The casts here are unnecessary and are implied by the assignment.

Did that to avoid value truncation warning. Will drop.

> Otherwise,
> Reviewed-by: Richard Henderson 

-- 
Thanks.
-- Max



Re: [Qemu-devel] [PATCH v2 17/17] target/riscv: convert to TranslatorOps

2018-04-09 Thread Richard Henderson
On 04/07/2018 04:20 AM, Emilio G. Cota wrote:
> +next_page = (ctx->base.pc_first & TARGET_PAGE_MASK) + 
> TARGET_PAGE_SIZE;
> +if (ctx->base.pc_next >= next_page) {

This fails for the last page of the address space.
Better is

  page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
  if (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE) {


r~



Re: [Qemu-devel] [PATCH] migration: calculate expected_downtime with ram_bytes_remaining()

2018-04-09 Thread David Gibson
On Mon, 9 Apr 2018 19:57:47 +0100
"Dr. David Alan Gilbert"  wrote:

> * Balamuruhan S (bal...@linux.vnet.ibm.com) wrote:
> > On 2018-04-04 13:36, Peter Xu wrote:  
> > > On Wed, Apr 04, 2018 at 11:55:14AM +0530, Balamuruhan S wrote:
[snip]
> > > > > - postcopy: that'll let you start the destination VM even without
> > > > >   transferring all the RAMs before hand  
> > > > 
> > > > I am seeing issue in postcopy migration between POWER8(16M) ->
> > > > POWER9(1G)
> > > > where the hugepage size is different. I am trying to enable it but
> > > > host
> > > > start
> > > > address have to be aligned with 1G page size in
> > > > ram_block_discard_range(),
> > > > which I am debugging further to fix it.  
> > > 
> > > I thought the huge page size needs to be matched on both side
> > > currently for postcopy but I'm not sure.  
> > 
> > you are right! it should be matched, but we need to support
> > POWER8(16M) -> POWER9(1G)
> >   
> > > CC Dave (though I think Dave's still on PTO).  
> 
> There's two problems there:
>   a) Postcopy with really big huge pages is a problem, because it takes
>  a long time to send the whole 1G page over the network and the vCPU
>  is paused during that time;  for example on a 10Gbps link, it takes
>  about 1 second to send a 1G page, so that's a silly time to keep
>  the vCPU paused.
> 
>   b) Mismatched pagesizes are a problem on postcopy; we require that the
>  whole of a hostpage is sent continuously, so that it can be
>  atomically placed in memory, the source knows to do this based on
>  the page sizes that it sees.  There are some other cases as well 
>  (e.g. discards have to be page aligned.)

I'm not entirely clear on what mismatched means here.  Mismatched
between where and where?  I *think* the relevant thing is a mismatch
between host backing page size on source and destination, but I'm not
certain.

> Both of the problems are theoretically fixable; but neither case is
> easy.
> (b) could be fixed by sending the hugepage size back to the source,
> so that it knows to perform alignments on a larger boundary to it's
> own RAM blocks.

Sounds feasible, but like something that will take some thought and
time upstream.

> (a) is a much much harder problem; one *idea* would be a major
> reorganisation of the kernels hugepage + userfault code to somehow
> allow them to temporarily present as normal pages rather than a
> hugepage.

Yeah... for Power specifically, I think doing that would be really
hard, verging on impossible, because of the way the MMU is
virtualized.  Well.. it's probably not too bad for a native POWER9
guest (using the radix MMU), but the issue here is for POWER8 compat
guests which use the hash MMU.

> Does P9 really not have a hugepage that's smaller than 1G?

It does (2M), but we can't use it in this situation.  As hinted above,
POWER9 has two very different MMU modes, hash and radix.  In hash mode
(which is similar to POWER8 and earlier CPUs) the hugepage sizes are
16M and 16G, in radix mode (more like x86) they are 2M and 1G.

POWER9 hosts always run in radix mode.  Or at least, we only support
running them in radix mode.  We support both radix mode and hash mode
guests, the latter including all POWER8 compat mode guests.

The next complication is because the way the hash virtualization works,
any page used by the guest must be HPA-contiguous, not just
GPA-contiguous.  Which means that any pagesize used by the guest must
be smaller or equal than the host pagesizes used to back the guest.
We (sort of) cope with that by only advertising the 16M pagesize to the
guest if all guest RAM is backed by >= 16M pages.

But that advertisement only happens at guest boot.  So if we migrate a
guest from POWER8, backed by 16M pages to POWER9 backed by 2M pages,
the guest still thinks it can use 16M pages and jams up.  (I'm in the
middle of upstream work to make the failure mode less horrible).

So, the only way to run a POWER8 compat mode guest with access to 16M
pages on a POWER9 radix mode host is using 1G hugepages on the host
side.

-- 
David Gibson 
Principal Software Engineer, Virtualization, Red Hat


pgpqGClUf_MwQ.pgp
Description: OpenPGP digital signature


[Qemu-devel] [PATCH for-2.12] make-release: add skiboot .version file

2018-04-09 Thread Michael Roth
This is needed to build skiboot from tarball-distributed sources
since the git data the make_release.sh script relies on to generate
it is not available.

Cc: qemu-sta...@nongnu.org
Reported-by: Michael Tokarev 
Signed-off-by: Michael Roth 
---
 scripts/make-release | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/make-release b/scripts/make-release
index 04fa9defdc..c14f75b12c 100755
--- a/scripts/make-release
+++ b/scripts/make-release
@@ -19,6 +19,7 @@ pushd ${destination}
 git checkout "v${version}"
 git submodule update --init
 (cd roms/seabios && git describe --tags --long --dirty > .version)
+(cd roms/skiboot && ./make_version.sh > .version)
 # FIXME: The following line is a workaround for avoiding filename collisions
 # when unpacking u-boot sources on case-insensitive filesystems. Once we
 # update to something with u-boot commit 610eec7f0 we can drop this line.
-- 
2.11.0




Re: [Qemu-devel] [PATCH v2] scsi-disk: Don't enlarge min_io_size to max_io_size

2018-04-09 Thread David Gibson
On Mon, 9 Apr 2018 11:28:16 +0200
Paolo Bonzini  wrote:

> On 09/04/2018 03:08, David Gibson wrote:
>  [...]  
>  [...]  
>  [...]  
>  [...]  
>  [...]  
> 
> Hi David,
> 
> it's already included in my pull request from Friday.

And I see it in master now.  Thanks!

-- 
David Gibson 
Principal Software Engineer, Virtualization, Red Hat


pgpfrViXE3zQd.pgp
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH] icount: fix cpu_restore_state_from_tb for non-tb-exit cases

2018-04-09 Thread Richard Henderson
On 04/09/2018 07:13 PM, Pavel Dovgalyuk wrote:
> In icount mode instructions, that access io memory spaces in the middle
> of the translation blocks, invoke TB recompilation.
> After recompilation such instructions become last in the TB and are
> allowed to access io memory spaces.
> When the code includes instruction like i386 'xchg eax, 0xd080'
> which accesses APIC, QEMU goes into the infinite loop of the recompilation.
> This instruction includes two memory accesses - one read and one write.
> After first access APIC calls cpu_report_tpr_access, which restores
> the CPU state to get the current eip. But cpu_restore_state_from_tb
> resets cpu->can_do_io flag and makes second memory access invalid.
> Therefore second memory access causes a recompilation of the block.
> Then these operations repeat again and again.
> 
> This patch moves resetting cpu->can_do_io flag from cpu_restore_state_from_tb
> to cpu_loop_exit* functions. It also adds a parameter for cpu_restore_state*()
> which controls restoring icount. There is no need in restoring icount,
> when we only query CPU state without breaking the TB. Restoring it in such
> cases leads to the incorrect flow of the virtual time.
> 
> In most cases new parameter is true (icount should be recalculated).
> But there are two cases in i386 and openrisc when the CPU state is only
> queued without the need to break the TB. This patch fixes both
> of these cases.
> 
> Signed-off-by: Pavel Dovgalyuk 
> ---

Thanks for the patch and the detailed description.  I've applied this (with
some editing of the english in the description) to my tcg branch for 2.12.


r~



[Qemu-devel] [PATCH for-2.12] tcg: Introduce tcg_set_insn_start_param

2018-04-09 Thread Richard Henderson
The parameters for tcg_gen_insn_start are target_ulong, which may be split
into two TCGArg parameters for storage in the opcode on 32-bit hosts.

Fixes the ARM target and its direct use of tcg_set_insn_param, which would
set the wrong argument in the 64-on-32 case.

Cc: qemu-sta...@nongnu.org
Reported-by: alar...@ddci.com
Signed-off-by: Richard Henderson 
---

Peter, I'm not sure what the reproducer is for this reported problem.
I could boot my aa64 images both before and after this patch.  Perhaps
I would have needed to run an aa32 binary within the aa64 kernel?


r~

---
 target/arm/translate.h |  2 +-
 tcg/tcg.h  | 10 ++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/target/arm/translate.h b/target/arm/translate.h
index c47febf99d..4428c98e2e 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -120,7 +120,7 @@ static inline void disas_set_insn_syndrome(DisasContext *s, 
uint32_t syn)
 
 /* We check and clear insn_start_idx to catch multiple updates.  */
 assert(s->insn_start != NULL);
-tcg_set_insn_param(s->insn_start, 2, syn);
+tcg_set_insn_start_param(s->insn_start, 2, syn);
 s->insn_start = NULL;
 }
 
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 9e2d909a4a..30896ca304 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -825,6 +825,16 @@ static inline void tcg_set_insn_param(TCGOp *op, int arg, 
TCGArg v)
 op->args[arg] = v;
 }
 
+static inline void tcg_set_insn_start_param(TCGOp *op, int arg, target_ulong v)
+{
+#if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
+tcg_set_insn_param(op, arg, v);
+#else
+tcg_set_insn_param(op, arg * 2, v);
+tcg_set_insn_param(op, arg * 2 + 1, v >> 32);
+#endif
+}
+
 /* The last op that was emitted.  */
 static inline TCGOp *tcg_last_op(void)
 {
-- 
2.14.3




Re: [Qemu-devel] [PATCH v4] linux-user: fix preadv/pwritev offsets

2018-04-09 Thread Richard Henderson
On 04/06/2018 11:36 AM, Max Filippov wrote:
> +static void target_to_host_low_high(abi_ulong tlow,
> +abi_ulong thigh,
> +unsigned long *hlow,
> +unsigned long *hhigh)
> +{
> +unsigned long long off = tlow |
> +((unsigned long long)thigh << TARGET_LONG_BITS / 2) <<
> +TARGET_LONG_BITS / 2;

Use uint64_t instead of unsigned long long.

> +
> +*hlow = (unsigned long)off;
> +*hhigh = (unsigned long)((off >> HOST_LONG_BITS / 2) >>
> + HOST_LONG_BITS / 2);

The casts here are unnecessary and are implied by the assignment.

Otherwise,
Reviewed-by: Richard Henderson 


r~



Re: [Qemu-devel] qom-test on netbsd can be very slow

2018-04-09 Thread Kamil Rytarowski
On 09.04.2018 22:51, Kamil Rytarowski wrote:
> On 09.04.2018 19:10, Peter Maydell wrote:
>> My NetBSD build system recently seems to have taken a nosedive
>> in how long it takes to finish "make check". This seems to be
>> because qom-test (and probably other things where the test interacts
>> with the QEMU process) can run very slowly.
>>
>> netbsdvm# for i in 1 2 3 4 5 6 7 8 9 10; do
>> (QTEST_QEMU_BINARY=x86_64-softmmu/qemu-system-x86_64 time
>> tests/qom-test -p /x86_64/qom/pc-i440fx-2.0); done
>> /x86_64/qom/pc-i440fx-2.0: OK
>> 8.49 real 1.18 user 7.34 sys
>> /x86_64/qom/pc-i440fx-2.0: OK
>>10.41 real 1.32 user 9.09 sys
>> /x86_64/qom/pc-i440fx-2.0: OK
>> 8.45 real 1.24 user 7.24 sys
>> /x86_64/qom/pc-i440fx-2.0: OK
>> 9.88 real 1.10 user 8.31 sys
>> /x86_64/qom/pc-i440fx-2.0: OK
>>11.60 real 1.47 user 9.90 sys
>> /x86_64/qom/pc-i440fx-2.0: OK
>>10.94 real 1.28 user 9.68 sys
>> /x86_64/qom/pc-i440fx-2.0: OK
>>10.06 real 1.32 user 8.76 sys
>> /x86_64/qom/pc-i440fx-2.0: OK
>>13.38 real 1.37 user12.04 sys
>> /x86_64/qom/pc-i440fx-2.0: OK
>>16.19 real 1.46 user14.29 sys
>> /x86_64/qom/pc-i440fx-2.0: OK
>> 9.70 real 1.17 user 8.51 sys
>>
>> Admittedly this is running in a (KVM) VM, but still, there seems
>> something wrong with how long each of these is taking. On Linux
>> each run is less than a second, so there's an order-of-magnitude
>> slowdown here. Further, I've occasionally seen a run take 100 seconds!
>>
>> Does anybody else see this, and any ideas why it might be running slow?
>> I'm not very familiar with debugging on netbsd; I had a look at
>> ktrace output, and the QEMU process seems to sometimes spend
>> quite a lot of time in a poll() loop not actually doing anything.
>> I couldn't figure out how to get the ktrace logs to annotate
>> which thread was making which syscall though, so they weren't
>> easy to interpret. If anybody's more experienced at debugging
>> things in the BSDs that would also be helpful.
>>
>> On OpenBSD, for comparison, each test run seems to fairly reliably
>> take 5 seconds give-or-take, there's much less run-to-run
>> variation, though it's still much slower than Linux is.
>> The OpenBSD and FreeBSD build VMs seem to complete more
>> reasonably quickly than the NetBSD one.
>>
>> One thing I noticed looking at ktrace output is that we do
>> all our reading of QMP input and output with a read syscall
>> per character. I don't think that's the cause of this slowness,
>> though.
>>
>> thanks
>> -- PMM
>>
> 
> I'm running these tests natively and get apparently expected results:
> 
> /x86_64/qom/pc-i440fx-2.0: OK
> 2,05 real 1,36 user 0,71 sys
> /x86_64/qom/pc-i440fx-2.0: OK
> 1,92 real 1,17 user 0,79 sys
> /x86_64/qom/pc-i440fx-2.0: OK
> 1,84 real 1,14 user 0,77 sys
> /x86_64/qom/pc-i440fx-2.0: OK
> 1,85 real 1,16 user 0,76 sys
> /x86_64/qom/pc-i440fx-2.0: OK
> 1,79 real 1,04 user 0,82 sys
> /x86_64/qom/pc-i440fx-2.0: OK
> 1,84 real 1,00 user 0,91 sys
> /x86_64/qom/pc-i440fx-2.0: OK
> 1,86 real 1,18 user 0,75 sys
> /x86_64/qom/pc-i440fx-2.0: OK
> 1,85 real 1,27 user 0,64 sys
> /x86_64/qom/pc-i440fx-2.0: OK
> 1,84 real 1,27 user 0,64 sys
> /x86_64/qom/pc-i440fx-2.0: OK
> 1,82 real 1,18 user 0,71 sys
> 

There is a registered bug in the NetBSD database:

https://gnats.netbsd.org/52184
"Recent qemu performs badly on NetBSD hosts under load"

Could you please try to check if the following commit is culprit using
the reproducer in gnats or running qom-test?


commit 05e514b1d4d5bd4209e2c8bbc76ff05c85a235f3
Author: Paolo Bonzini 
Date:   Tue Jul 21 16:07:53 2015 +0200

AioContext: optimize clearing the EventNotifier

It is pretty rare for aio_notify to actually set the EventNotifier.  It
can happen with worker threads such as thread-pool.c's, but otherwise it
should never be set thanks to the ctx->notify_me optimization.  The
previous patch, unfortunately, added an unconditional call to
event_notifier_test_and_clear; now add a userspace fast path that
avoids the call.

Note that it is not possible to do the same with event_notifier_set;
it would break, as proved (again) by the included formal model.

This patch survived over 3000 reboots on aarch64 KVM.

Signed-off-by: Paolo Bonzini 
Reviewed-by: Fam Zheng 
Tested-by: Richard W.M. Jones 
Message-id: 1437487673-23740-7-git-send-email-pbonz...@redhat.com
Signed-off-by: Stefan Hajnoczi 



signature.asc

[Qemu-devel] [Bug 1762179] Re: Record and replay replay fails with: "ERROR:replay/replay-time.c:49:replay_read_clock: assertion failed"

2018-04-09 Thread Ciro Santilli 六四事件 法轮功
** Description changed:

- QEMU master at 08e173f29461396575c85510eb41474b993cb1fb Ubuntu 17.10
+ QEMU master at 915d34c5f99b0ab91517c69f54272bfdb6ca2b32 Ubuntu 17.10
  host.
  
  QEMU commands:
  
  ```
  #!/usr/bin/env bash
  cmd="\
  time \
  ./x86_64-softmmu/qemu-system-x86_64 \
  -append 'root=/dev/sda console=ttyS0 nokaslr printk.time=y - 
lkmc_eval=\"/rand_check.out;/sbin/ifup -a;wget -S google.com;/poweroff.out;\"' \
  -kernel 'out/x86_64/buildroot/images/bzImage' \
  -nographic \
  \
  -drive 
file=out/x86_64/buildroot/images/rootfs.ext2.qcow2,if=none,id=img-direct,format=qcow2
 \
  -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay \
  -device ide-hd,drive=img-blkreplay \
  \
  -netdev user,id=net1 \
  -device rtl8139,netdev=net1 \
  -object filter-replay,id=replay,netdev=net1 \
  "
  echo "$cmd"
  eval "$cmd -icount 'shift=7,rr=record,rrfile=replay.bin'"
  eval "$cmd -icount 'shift=7,rr=replay,rrfile=replay.bin'"
  ```
  
  This tries to stay as close as possible to the documented commands:
  
https://github.com/qemu/qemu/blob/08e173f29461396575c85510eb41474b993cb1fb/docs/replay.txt#L28
  
  Images uploaded to: https://github.com/cirosantilli/linux-kernel-module-
  cheat/releases/download/test-replay-arm/images4.zip
  
  Images generated with: https://github.com/cirosantilli/linux-kernel-
  module-cheat/tree/9513c162ef57e6cb70006dfe870856f94ee9a133
  
  The replay failed straight out with:
  
  ```
  ERROR:replay/replay-time.c:49:replay_read_clock: assertion failed: 
(replay_file && replay_mutex_locked())
  ```
  
  QEMU configure:
  
  ```
  ./configure --enable-debug --enable-trace-backends=simple 
--target-list="x86_64-softmmu"
  ```

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1762179

Title:
  Record and replay replay fails with: "ERROR:replay/replay-
  time.c:49:replay_read_clock: assertion failed"

Status in QEMU:
  New

Bug description:
  QEMU master at 915d34c5f99b0ab91517c69f54272bfdb6ca2b32 Ubuntu 17.10
  host.

  QEMU commands:

  ```
  #!/usr/bin/env bash
  cmd="\
  time \
  ./x86_64-softmmu/qemu-system-x86_64 \
  -append 'root=/dev/sda console=ttyS0 nokaslr printk.time=y - 
lkmc_eval=\"/rand_check.out;/sbin/ifup -a;wget -S google.com;/poweroff.out;\"' \
  -kernel 'out/x86_64/buildroot/images/bzImage' \
  -nographic \
  \
  -drive 
file=out/x86_64/buildroot/images/rootfs.ext2.qcow2,if=none,id=img-direct,format=qcow2
 \
  -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay \
  -device ide-hd,drive=img-blkreplay \
  \
  -netdev user,id=net1 \
  -device rtl8139,netdev=net1 \
  -object filter-replay,id=replay,netdev=net1 \
  "
  echo "$cmd"
  eval "$cmd -icount 'shift=7,rr=record,rrfile=replay.bin'"
  eval "$cmd -icount 'shift=7,rr=replay,rrfile=replay.bin'"
  ```

  This tries to stay as close as possible to the documented commands:
  
https://github.com/qemu/qemu/blob/08e173f29461396575c85510eb41474b993cb1fb/docs/replay.txt#L28

  Images uploaded to: https://github.com/cirosantilli/linux-kernel-
  module-cheat/releases/download/test-replay-arm/images4.zip

  Images generated with: https://github.com/cirosantilli/linux-kernel-
  module-cheat/tree/9513c162ef57e6cb70006dfe870856f94ee9a133

  The replay failed straight out with:

  ```
  ERROR:replay/replay-time.c:49:replay_read_clock: assertion failed: 
(replay_file && replay_mutex_locked())
  ```

  QEMU configure:

  ```
  ./configure --enable-debug --enable-trace-backends=simple 
--target-list="x86_64-softmmu"
  ```

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1762179/+subscriptions



Re: [Qemu-devel] [PATCH for-2.12] target/arm: Don't corrupt insn_start arguments on 32-bit hosts

2018-04-09 Thread Richard Henderson
On 04/10/2018 08:13 AM, Peter Maydell wrote:
> On 9 April 2018 at 23:09, Richard Henderson  wrote:
>> On 04/09/2018 08:38 PM, Peter Maydell wrote:
>>> +#if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
>>>  tcg_set_insn_param(s->insn_start, 2, syn);
>>> +#else
>>> +/* tcg_gen_insn_start has split every target_ulong argument to
>>> + * op_insn_start into two 32-bit arguments, so we want the low
>>> + * half of the 3rd argument, which is at index 4.
>>> + */
>>> +tcg_set_insn_param(s->insn_start, 4, syn);
>>> +#endif
>>>
>>
>> Ouch, good catch.
>>
>> I think we should fix this in tcg_set_insn_param instead,
>> as several other targets are also affected by this.
> 
> Are they? My grep didn't find anybody else using
> tcg_set_insn_param() except the gen-icount.h code,
> which isn't using target_long arguments.

Ah, the s390 code I thought I remembered is still sitting on a branch.  I shall
have to revive that for the next devel cycle...

> If we can fix it in the tcg generic code instead that
> would be nicer than an ifdef here, though.

I am working on this now.


r~




Re: [Qemu-devel] [PATCH for-2.12] linux-user/signal.c: Ensure AArch64 signal frame isn't too small

2018-04-09 Thread Peter Maydell
On 9 April 2018 at 23:05, Richard Henderson  wrote:
> On 04/10/2018 12:07 AM, Peter Maydell wrote:
>> In particular the dash shell
>> would segfault if the frame wasn't as big enough.
>
> Ah, that was the critical difference in my failure to replicate -- the fedora
> sysroot doesn't have dash.  As you say, the patch matches the kernel so,
>
> Reviewed-by: Richard Henderson 
>
> That said, what the hell is dash doing that relies on this?

Yeah, I want to look more closely at what's going on here
tomorrow -- this is definitely a bug fix but I'm wondering
if it only masks a different underlying issue.

The spurious SEGV is the result of the call to
lock_user_struct() in target_setup_frame() failing
if we use too small a frame size, resulting in our
calling force_sigsegv().

thanks
-- PMM



Re: [Qemu-devel] [PATCH for-2.12] target/arm: Don't corrupt insn_start arguments on 32-bit hosts

2018-04-09 Thread Peter Maydell
On 9 April 2018 at 23:09, Richard Henderson  wrote:
> On 04/09/2018 08:38 PM, Peter Maydell wrote:
>> +#if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
>>  tcg_set_insn_param(s->insn_start, 2, syn);
>> +#else
>> +/* tcg_gen_insn_start has split every target_ulong argument to
>> + * op_insn_start into two 32-bit arguments, so we want the low
>> + * half of the 3rd argument, which is at index 4.
>> + */
>> +tcg_set_insn_param(s->insn_start, 4, syn);
>> +#endif
>>
>
> Ouch, good catch.
>
> I think we should fix this in tcg_set_insn_param instead,
> as several other targets are also affected by this.

Are they? My grep didn't find anybody else using
tcg_set_insn_param() except the gen-icount.h code,
which isn't using target_long arguments.

If we can fix it in the tcg generic code instead that
would be nicer than an ifdef here, though.

thanks
-- PMM



Re: [Qemu-devel] [PATCH for-2.12] target/arm: Don't corrupt insn_start arguments on 32-bit hosts

2018-04-09 Thread Richard Henderson
On 04/09/2018 08:38 PM, Peter Maydell wrote:
> +#if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
>  tcg_set_insn_param(s->insn_start, 2, syn);
> +#else
> +/* tcg_gen_insn_start has split every target_ulong argument to
> + * op_insn_start into two 32-bit arguments, so we want the low
> + * half of the 3rd argument, which is at index 4.
> + */
> +tcg_set_insn_param(s->insn_start, 4, syn);
> +#endif
>   

Ouch, good catch.

I think we should fix this in tcg_set_insn_param instead,
as several other targets are also affected by this.


r~



Re: [Qemu-devel] [PATCH for-2.12] linux-user/signal.c: Ensure AArch64 signal frame isn't too small

2018-04-09 Thread Richard Henderson
On 04/10/2018 12:07 AM, Peter Maydell wrote:
> In particular the dash shell
> would segfault if the frame wasn't as big enough.

Ah, that was the critical difference in my failure to replicate -- the fedora
sysroot doesn't have dash.  As you say, the patch matches the kernel so,

Reviewed-by: Richard Henderson 

That said, what the hell is dash doing that relies on this?


r~



[Qemu-devel] [Bug 1762558] [NEW] Many crashes with "memslot_get_virt: slot_id 170 too big"-type errors in 2.12.0 rc2

2018-04-09 Thread Adam Williamson
Public bug reported:

Since qemu 2.12.0 rc2 - qemu-2.12.0-0.6.rc2.fc29 - landed in Fedora
Rawhide, just about all of our openQA-automated tests of Rawhide guests
which run with qxl / SPICE graphics in the guest have died partway in,
always shortly after the test switches from the installer (an X
environment) to a console on a tty. qemu is, I think, hanging. There are
always some errors like this right around the time of the hang:

[2018-04-09T20:13:42.0736 UTC] [debug] QEMU: id 0, group 0, virt start 0, virt 
end , generation 0, delta 0
[2018-04-09T20:13:42.0736 UTC] [debug] QEMU: id 1, group 1, virt start 
7f42dbc0, virt end 7f42dfbfe000, generation 0, delta 7f42dbc0
[2018-04-09T20:13:42.0736 UTC] [debug] QEMU: id 2, group 1, virt start 
7f42d7a0, virt end 7f42dba0, generation 0, delta 7f42d7a0
[2018-04-09T20:13:42.0736 UTC] [debug] QEMU: 
[2018-04-09T20:13:42.0736 UTC] [debug] QEMU: (process:45812): Spice-CRITICAL 
**: memslot.c:111:memslot_get_virt: slot_id 218 too big, addr=da8e21fbda8e21fb

or occasionally like this:

[2018-04-09T20:13:58.0717 UTC] [debug] QEMU: id 0, group 0, virt start 0, virt 
end , generation 0, delta 0
[2018-04-09T20:13:58.0720 UTC] [debug] QEMU: id 1, group 1, virt start 
7ff093c0, virt end 7ff097bfe000, generation 0, delta 7ff093c0
[2018-04-09T20:13:58.0720 UTC] [debug] QEMU: id 2, group 1, virt start 
7ff08fa0, virt end 7ff093a0, generation 0, delta 7ff08fa0
[2018-04-09T20:13:58.0720 UTC] [debug] QEMU: 
[2018-04-09T20:13:58.0720 UTC] [debug] QEMU: (process:25622): Spice-WARNING **: 
memslot.c:68:memslot_validate_virt: virtual address out of range
[2018-04-09T20:13:58.0720 UTC] [debug] QEMU: virt=0x0+0x18 slot_id=0 
group_id=1
[2018-04-09T20:13:58.0721 UTC] [debug] QEMU: slot=0x0-0x0 delta=0x0
[2018-04-09T20:13:58.0721 UTC] [debug] QEMU: 
[2018-04-09T20:13:58.0721 UTC] [debug] QEMU: (process:25622): Spice-WARNING **: 
display-channel.c:2426:display_channel_validate_surface: invalid surface_id 
1048576
[2018-04-09T20:14:14.0728 UTC] [debug] QEMU: id 0, group 0, virt start 0, virt 
end , generation 0, delta 0
[2018-04-09T20:14:14.0728 UTC] [debug] QEMU: id 1, group 1, virt start 
7ff093c0, virt end 7ff097bfe000, generation 0, delta 7ff093c0
[2018-04-09T20:14:14.0728 UTC] [debug] QEMU: id 2, group 1, virt start 
7ff08fa0, virt end 7ff093a0, generation 0, delta 7ff08fa0
[2018-04-09T20:14:14.0728 UTC] [debug] QEMU: 
[2018-04-09T20:14:14.0728 UTC] [debug] QEMU: (process:25622): Spice-CRITICAL 
**: memslot.c:122:memslot_get_virt: address generation is not valid, group_id 
1, slot_id 0, gen 110, slot_gen 0

The same tests running on Fedora 28 guests on the same hosts are not
hanging, and the same tests were not hanging right before the qemu
package got updated, so this seems very strongly tied to the new qemu.

** Affects: qemu
 Importance: Undecided
 Status: New

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1762558

Title:
  Many crashes with "memslot_get_virt: slot_id 170 too big"-type errors
  in 2.12.0 rc2

Status in QEMU:
  New

Bug description:
  Since qemu 2.12.0 rc2 - qemu-2.12.0-0.6.rc2.fc29 - landed in Fedora
  Rawhide, just about all of our openQA-automated tests of Rawhide
  guests which run with qxl / SPICE graphics in the guest have died
  partway in, always shortly after the test switches from the installer
  (an X environment) to a console on a tty. qemu is, I think, hanging.
  There are always some errors like this right around the time of the
  hang:

  [2018-04-09T20:13:42.0736 UTC] [debug] QEMU: id 0, group 0, virt start 0, 
virt end , generation 0, delta 0
  [2018-04-09T20:13:42.0736 UTC] [debug] QEMU: id 1, group 1, virt start 
7f42dbc0, virt end 7f42dfbfe000, generation 0, delta 7f42dbc0
  [2018-04-09T20:13:42.0736 UTC] [debug] QEMU: id 2, group 1, virt start 
7f42d7a0, virt end 7f42dba0, generation 0, delta 7f42d7a0
  [2018-04-09T20:13:42.0736 UTC] [debug] QEMU: 
  [2018-04-09T20:13:42.0736 UTC] [debug] QEMU: (process:45812): Spice-CRITICAL 
**: memslot.c:111:memslot_get_virt: slot_id 218 too big, addr=da8e21fbda8e21fb

  or occasionally like this:

  [2018-04-09T20:13:58.0717 UTC] [debug] QEMU: id 0, group 0, virt start 0, 
virt end , generation 0, delta 0
  [2018-04-09T20:13:58.0720 UTC] [debug] QEMU: id 1, group 1, virt start 
7ff093c0, virt end 7ff097bfe000, generation 0, delta 7ff093c0
  [2018-04-09T20:13:58.0720 UTC] [debug] QEMU: id 2, group 1, virt start 
7ff08fa0, virt end 7ff093a0, generation 0, delta 7ff08fa0
  [2018-04-09T20:13:58.0720 UTC] [debug] QEMU: 
  [2018-04-09T20:13:58.0720 UTC] [debug] QEMU: (process:25622): Spice-WARNING 
**: memslot.c:68:memslot_validate_virt: virtual address out of range
  [2018-04-09T20:13:58.0720 UTC] [debug] QEMU: 

Re: [Qemu-devel] qom-test on netbsd can be very slow

2018-04-09 Thread Kamil Rytarowski
On 09.04.2018 19:10, Peter Maydell wrote:
> My NetBSD build system recently seems to have taken a nosedive
> in how long it takes to finish "make check". This seems to be
> because qom-test (and probably other things where the test interacts
> with the QEMU process) can run very slowly.
> 
> netbsdvm# for i in 1 2 3 4 5 6 7 8 9 10; do
> (QTEST_QEMU_BINARY=x86_64-softmmu/qemu-system-x86_64 time
> tests/qom-test -p /x86_64/qom/pc-i440fx-2.0); done
> /x86_64/qom/pc-i440fx-2.0: OK
> 8.49 real 1.18 user 7.34 sys
> /x86_64/qom/pc-i440fx-2.0: OK
>10.41 real 1.32 user 9.09 sys
> /x86_64/qom/pc-i440fx-2.0: OK
> 8.45 real 1.24 user 7.24 sys
> /x86_64/qom/pc-i440fx-2.0: OK
> 9.88 real 1.10 user 8.31 sys
> /x86_64/qom/pc-i440fx-2.0: OK
>11.60 real 1.47 user 9.90 sys
> /x86_64/qom/pc-i440fx-2.0: OK
>10.94 real 1.28 user 9.68 sys
> /x86_64/qom/pc-i440fx-2.0: OK
>10.06 real 1.32 user 8.76 sys
> /x86_64/qom/pc-i440fx-2.0: OK
>13.38 real 1.37 user12.04 sys
> /x86_64/qom/pc-i440fx-2.0: OK
>16.19 real 1.46 user14.29 sys
> /x86_64/qom/pc-i440fx-2.0: OK
> 9.70 real 1.17 user 8.51 sys
> 
> Admittedly this is running in a (KVM) VM, but still, there seems
> something wrong with how long each of these is taking. On Linux
> each run is less than a second, so there's an order-of-magnitude
> slowdown here. Further, I've occasionally seen a run take 100 seconds!
> 
> Does anybody else see this, and any ideas why it might be running slow?
> I'm not very familiar with debugging on netbsd; I had a look at
> ktrace output, and the QEMU process seems to sometimes spend
> quite a lot of time in a poll() loop not actually doing anything.
> I couldn't figure out how to get the ktrace logs to annotate
> which thread was making which syscall though, so they weren't
> easy to interpret. If anybody's more experienced at debugging
> things in the BSDs that would also be helpful.
> 
> On OpenBSD, for comparison, each test run seems to fairly reliably
> take 5 seconds give-or-take, there's much less run-to-run
> variation, though it's still much slower than Linux is.
> The OpenBSD and FreeBSD build VMs seem to complete more
> reasonably quickly than the NetBSD one.
> 
> One thing I noticed looking at ktrace output is that we do
> all our reading of QMP input and output with a read syscall
> per character. I don't think that's the cause of this slowness,
> though.
> 
> thanks
> -- PMM
> 

I'm running these tests natively and get apparently expected results:

/x86_64/qom/pc-i440fx-2.0: OK
2,05 real 1,36 user 0,71 sys
/x86_64/qom/pc-i440fx-2.0: OK
1,92 real 1,17 user 0,79 sys
/x86_64/qom/pc-i440fx-2.0: OK
1,84 real 1,14 user 0,77 sys
/x86_64/qom/pc-i440fx-2.0: OK
1,85 real 1,16 user 0,76 sys
/x86_64/qom/pc-i440fx-2.0: OK
1,79 real 1,04 user 0,82 sys
/x86_64/qom/pc-i440fx-2.0: OK
1,84 real 1,00 user 0,91 sys
/x86_64/qom/pc-i440fx-2.0: OK
1,86 real 1,18 user 0,75 sys
/x86_64/qom/pc-i440fx-2.0: OK
1,85 real 1,27 user 0,64 sys
/x86_64/qom/pc-i440fx-2.0: OK
1,84 real 1,27 user 0,64 sys
/x86_64/qom/pc-i440fx-2.0: OK
1,82 real 1,18 user 0,71 sys



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH v5 4/9] i386: Add new property to control cache info

2018-04-09 Thread Eduardo Habkost
On Mon, Apr 09, 2018 at 07:51:24PM +, Moger, Babu wrote:
[...]
> > > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > > index 67faa53..f4fbe3a 100644
> > > --- a/target/i386/cpu.c
> > > +++ b/target/i386/cpu.c
> > > @@ -5132,6 +5132,7 @@ static Property x86_cpu_properties[] = {
> > >   false),
> > >  DEFINE_PROP_BOOL("vmware-cpuid-freq", X86CPU,
> > vmware_cpuid_freq, true),
> > >  DEFINE_PROP_BOOL("tcg-cpuid", X86CPU, expose_tcg, true),
> > > +DEFINE_PROP_BOOL("legacy-cache", X86CPU, legacy_cache, false),
> > 
> > I'm wondering the reason why Intel L1 caches aren't shared per threads,
> > L2 not shared per threads/cores etc? I mean, changing that will also
> > require new compat flag with very similar name.
> 
> I am not an expert on this topic. But, yes, If there is any future change in 
> cache topology
> then it would require similar change. 

In the future I really hope we manage to represent cache
information in a more structured way, so other incompatible
changes could be represented by more descriptive options like
"l1-cache-size=N", "l2-sharing=cores", etc.

-- 
Eduardo



Re: [Qemu-devel] [PATCH v5 4/9] i386: Add new property to control cache info

2018-04-09 Thread Moger, Babu
> -Original Message-
> From: Alexandr Iarygin 
> Sent: Monday, April 9, 2018 12:00 PM
> To: Moger, Babu ; m...@redhat.com;
> mar...@redhat.com; pbonz...@redhat.com; r...@twiddle.net;
> ehabk...@redhat.com; mtosa...@redhat.com
> Cc: qemu-devel@nongnu.org; k...@vger.kernel.org; k...@tripleback.net;
> Alexandr Iarygin 
> Subject: Re: [PATCH v5 4/9] i386: Add new property to control cache info
> 
> Hello,
> 
> Babu Moger  writes:
> 
> > This will be used to control the cache information.
> > By default new information will be displayed. If user
> > passes "-cpu legacy-cache" then older information will
> > be displayed even if the hardware supports new information.
> >
> > Signed-off-by: Babu Moger 
> > ---
> >  include/hw/i386/pc.h | 6 +-
> >  target/i386/cpu.c| 1 +
> >  target/i386/cpu.h| 5 +
> >  3 files changed, 11 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> > index ffee841..9cda1ab 100644
> > --- a/include/hw/i386/pc.h
> > +++ b/include/hw/i386/pc.h
> > @@ -327,7 +327,11 @@ bool e820_get_entry(int, uint32_t, uint64_t *,
> uint64_t *);
> >  .driver   = "q35-pcihost",\
> >  .property = "x-pci-hole64-fix",\
> >  .value= "off",\
> > -},
> > +},{\
> > +.driver   = TYPE_X86_CPU,\
> > +.property = "legacy-cache",\
> > +.value= "off",\
> > +},\
> 
> Should be "on". Also note that this introduces extra '\' at the end.

I think, I mis-understood Eduardo's earlier comments.  It should be "on" for 
machine type "pc-q35-2.10".
Yes. There is an extra  '\'.  Will remove in next version. I will fix both of 
these in next version. Thanks
 
> 
> >
> >  #define PC_COMPAT_2_9 \
> >  HW_COMPAT_2_9 \
> > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > index 67faa53..f4fbe3a 100644
> > --- a/target/i386/cpu.c
> > +++ b/target/i386/cpu.c
> > @@ -5132,6 +5132,7 @@ static Property x86_cpu_properties[] = {
> >   false),
> >  DEFINE_PROP_BOOL("vmware-cpuid-freq", X86CPU,
> vmware_cpuid_freq, true),
> >  DEFINE_PROP_BOOL("tcg-cpuid", X86CPU, expose_tcg, true),
> > +DEFINE_PROP_BOOL("legacy-cache", X86CPU, legacy_cache, false),
> 
> I'm wondering the reason why Intel L1 caches aren't shared per threads,
> L2 not shared per threads/cores etc? I mean, changing that will also
> require new compat flag with very similar name.

I am not an expert on this topic. But, yes, If there is any future change in 
cache topology
then it would require similar change. 

> 
> >
> >  /*
> >   * From "Requirements for Implementing the Microsoft
> > diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> > index 806c34b..bbe13f2 100644
> > --- a/target/i386/cpu.h
> > +++ b/target/i386/cpu.h
> > @@ -1394,6 +1394,11 @@ struct X86CPU {
> >   */
> >  bool enable_l3_cache;
> >
> > +/* Compatibility bits for old machine types.
> > + * If true present the old cache topology information
> > + */
> > +bool legacy_cache;
> > +
> >  /* Compatibility bits for old machine types: */
> >  bool enable_cpuid_0xb;
> >
> > --
> > 1.8.3.1



Re: [Qemu-devel] [PATCH v5 4/9] i386: Add new property to control cache info

2018-04-09 Thread Alexandr Iarygin
Hello,

Babu Moger  writes:

> This will be used to control the cache information.
> By default new information will be displayed. If user
> passes "-cpu legacy-cache" then older information will
> be displayed even if the hardware supports new information.
>
> Signed-off-by: Babu Moger 
> ---
>  include/hw/i386/pc.h | 6 +-
>  target/i386/cpu.c| 1 +
>  target/i386/cpu.h| 5 +
>  3 files changed, 11 insertions(+), 1 deletion(-)
>
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index ffee841..9cda1ab 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -327,7 +327,11 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t 
> *);
>  .driver   = "q35-pcihost",\
>  .property = "x-pci-hole64-fix",\
>  .value= "off",\
> -},
> +},{\
> +.driver   = TYPE_X86_CPU,\
> +.property = "legacy-cache",\
> +.value= "off",\
> +},\

Should be "on". Also note that this introduces extra '\' at the end.

>  
>  #define PC_COMPAT_2_9 \
>  HW_COMPAT_2_9 \
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index 67faa53..f4fbe3a 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -5132,6 +5132,7 @@ static Property x86_cpu_properties[] = {
>   false),
>  DEFINE_PROP_BOOL("vmware-cpuid-freq", X86CPU, vmware_cpuid_freq, true),
>  DEFINE_PROP_BOOL("tcg-cpuid", X86CPU, expose_tcg, true),
> +DEFINE_PROP_BOOL("legacy-cache", X86CPU, legacy_cache, false),

I'm wondering the reason why Intel L1 caches aren't shared per threads,
L2 not shared per threads/cores etc? I mean, changing that will also
require new compat flag with very similar name.

>  
>  /*
>   * From "Requirements for Implementing the Microsoft
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index 806c34b..bbe13f2 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -1394,6 +1394,11 @@ struct X86CPU {
>   */
>  bool enable_l3_cache;
>  
> +/* Compatibility bits for old machine types.
> + * If true present the old cache topology information
> + */
> +bool legacy_cache;
> +
>  /* Compatibility bits for old machine types: */
>  bool enable_cpuid_0xb;
>  
> -- 
> 1.8.3.1



Re: [Qemu-devel] [PATCH v3 00/10] migration: improve and cleanup compression

2018-04-09 Thread Dr. David Alan Gilbert
* Paolo Bonzini (pbonz...@redhat.com) wrote:
> On 08/04/2018 05:19, Xiao Guangrong wrote:
> > 
> > Hi Paolo, Michael, Stefan and others,
> > 
> > Could anyone merge this patchset if it is okay to you guys?
> 
> Hi Guangrong,
> 
> Dave and Juan will take care of merging it.  However, right now QEMU is
> in freeze so they may wait a week or two.  If they have reviewed it,
> it's certainly on their radar!

Yep, one of us will get it at the start of 2.13.

Dave

> Thanks,
> 
> Paolo
> 
> > On 03/30/2018 03:51 PM, guangrong.x...@gmail.com wrote:
> >> From: Xiao Guangrong 
> >>
> >> Changelog in v3:
> >> Following changes are from Peter's review:
> >> 1) use comp_param[i].file and decomp_param[i].compbuf to indicate if
> >>     the thread is properly init'd or not
> >> 2) save the file which is used by ram loader to the global variable
> >>     instead it is cached per decompression thread
> >>
> >> Changelog in v2:
> >> Thanks to the review from Dave, Peter, Wei and Jiang Biao, the changes
> >> in this version are:
> >> 1) include the performance number in the cover letter
> >> 2)add some comments to explain how to use z_stream->opaque in the
> >>     patchset
> >> 3) allocate a internal buffer for per thread to store the data to
> >>     be compressed
> >> 4) add a new patch that moves some code to ram_save_host_page() so
> >>     that 'goto' can be omitted gracefully
> >> 5) split the optimization of compression and decompress into two
> >>     separated patches
> >> 6) refine and correct code styles
> >>
> >>
> >> This is the first part of our work to improve compression to make it
> >> be more useful in the production.
> >>
> >> The first patch resolves the problem that the migration thread spends
> >> too much CPU resource to compression memory if it jumps to a new block
> >> that causes the network is used very deficient.
> >>
> >> The second patch fixes the performance issue that too many VM-exits
> >> happen during live migration if compression is being used, it is caused
> >> by huge memory returned to kernel frequently as the memory is allocated
> >> and freed for every signal call to compress2()
> >>
> >> The remaining patches clean the code up dramatically
> >>
> >> Performance numbers:
> >> We have tested it on my desktop, i7-4790 + 16G, by locally live migrate
> >> the VM which has 8 vCPUs + 6G memory and the max-bandwidth is limited to
> >> 350. During the migration, a workload which has 8 threads repeatedly
> >> written total 6G memory in the VM.
> >>
> >> Before this patchset, its bandwidth is ~25 mbps, after applying, the
> >> bandwidth is ~50 mbp.
> >>
> >> We also collected the perf data for patch 2 and 3 on our production,
> >> before the patchset:
> >> +  57.88%  kqemu  [kernel.kallsyms]    [k] queued_spin_lock_slowpath
> >> +  10.55%  kqemu  [kernel.kallsyms]    [k] __lock_acquire
> >> +   4.83%  kqemu  [kernel.kallsyms]    [k] flush_tlb_func_common
> >>
> >> -   1.16%  kqemu  [kernel.kallsyms]    [k]
> >> lock_acquire   ▒
> >>     -
> >> lock_acquire   
> >>  
> >> ▒
> >>    - 15.68%
> >> _raw_spin_lock 
> >>    
> >> ▒
> >>   + 29.42%
> >> __schedule 
> >> 
> >> ▒
> >>   + 29.14%
> >> perf_event_context_sched_out   
> >> 
> >> ▒
> >>   + 23.60%
> >> tdp_page_fault 
> >> 
> >> ▒
> >>   + 10.54%
> >> do_anonymous_page  
> >> 
> >> ▒
> >>   + 2.07%
> >> kvm_mmu_notifier_invalidate_range_start
> >>  
> >> ▒
> >>   + 1.83%
> >> zap_pte_range  
> >>  
> >> ▒
> >>   + 1.44% kvm_mmu_notifier_invalidate_range_end
> >>
> >>
> >> apply our work:
> >> +  51.92%  kqemu  [kernel.kallsyms]    [k] queued_spin_lock_slowpath
> >> +  14.82%  kqemu  [kernel.kallsyms]    [k] __lock_acquire
> >> +   1.47%  kqemu  [kernel.kallsyms]    [k] mark_lock.clone.0
> >> +   1.46%  kqemu  [kernel.kallsyms]    [k] native_sched_clock
> >> +   1.31%  kqemu  [kernel.kallsyms]    [k] lock_acquire
> >> +   1.24%  kqemu  libc-2.12.so [.] __memset_sse2
> >>
> >> -  14.82%  kqemu  [kernel.kallsyms]    [k]
> >> __lock_acquire ▒
> >>     -
> >> __lock_acquire 
> >>  
> >> ▒
> >>    - 99.75%
> >> lock_acquire   
> >>    
> >> ▒
> >>   - 18.38%
> >> _raw_spin_lock  

Re: [Qemu-devel] [PATCH] migration: calculate expected_downtime with ram_bytes_remaining()

2018-04-09 Thread Dr. David Alan Gilbert
* Balamuruhan S (bal...@linux.vnet.ibm.com) wrote:
> On 2018-04-04 13:36, Peter Xu wrote:
> > On Wed, Apr 04, 2018 at 11:55:14AM +0530, Balamuruhan S wrote:
> > 
> > [...]
> > 
> > > > too. So still I'll put aside the "which one is better" question.
> > > >
> > > > For your use case, you can have a look on either of below way to
> > > > have a converged migration:
> > > >
> > > > - auto-converge: that's a migration capability that throttles CPU
> > > >   usage of guests
> > > 
> > > I used auto-converge option before hand and still it doesn't help
> > > for migration to complete
> > 
> > Have you digged about why?  AFAIK auto-convergence will at last absort
> > merely the whole vcpu resource (99% of them maximum).  Maybe you are
> > not with the best throttle values?  Or do you think that could be a
> > auto-convergence bug too?
> 
> I am not sure, I will work on it to find why.

> > 
> > > 
> > > >
> > > > - postcopy: that'll let you start the destination VM even without
> > > >   transferring all the RAMs before hand
> > > 
> > > I am seeing issue in postcopy migration between POWER8(16M) ->
> > > POWER9(1G)
> > > where the hugepage size is different. I am trying to enable it but
> > > host
> > > start
> > > address have to be aligned with 1G page size in
> > > ram_block_discard_range(),
> > > which I am debugging further to fix it.
> > 
> > I thought the huge page size needs to be matched on both side
> > currently for postcopy but I'm not sure.
> 
> you are right! it should be matched, but we need to support
> POWER8(16M) -> POWER9(1G)
> 
> > CC Dave (though I think Dave's still on PTO).

There's two problems there:
  a) Postcopy with really big huge pages is a problem, because it takes
 a long time to send the whole 1G page over the network and the vCPU
 is paused during that time;  for example on a 10Gbps link, it takes
 about 1 second to send a 1G page, so that's a silly time to keep
 the vCPU paused.

  b) Mismatched pagesizes are a problem on postcopy; we require that the
 whole of a hostpage is sent continuously, so that it can be
 atomically placed in memory, the source knows to do this based on
 the page sizes that it sees.  There are some other cases as well 
 (e.g. discards have to be page aligned.)

Both of the problems are theoretically fixable; but neither case is
easy.
(b) could be fixed by sending the hugepage size back to the source,
so that it knows to perform alignments on a larger boundary to it's
own RAM blocks.

(a) is a much much harder problem; one *idea* would be a major
reorganisation of the kernels hugepage + userfault code to somehow
allow them to temporarily present as normal pages rather than a
hugepage.

Does P9 really not have a hugepage that's smaller than 1G?

Dave

--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK



[Qemu-devel] [Bug 1740219] Re: static linux-user ARM emulation has several-second startup time

2018-04-09 Thread Launchpad Bug Tracker
This bug was fixed in the package qemu - 1:2.11+dfsg-1ubuntu6

---
qemu (1:2.11+dfsg-1ubuntu6) bionic; urgency=medium

  * Remove LP: 1752026 changes to d/p/ubuntu/define-ubuntu-machine-types.patch.
The Kernel fixes are preferred and already committed to the kernel.
Therefore remove the default disabling of the HTM feature (LP: #1761175)
  * d/p/ubuntu/lp1739665-SSE-AVX-AVX512-cpu-features.patch: Enable new
SSE/AVX/AVX512 cpu features (LP: #1739665)
  * d/p/ubuntu/lp1740219-continuous-space-commpage.patch: make Arm
space+commpage continuous which avoids long startup times on
qemu-user-static (LP: #1740219)
  * d/p/ubuntu/lp-1761372-*: provide pseries-bionic-2.11-sxxm type as
convenience with all meltdown/spectre workarounds enabled by default.
This is not the default type following upstream and x86 on that.
(LP: #1761372).
  * d/p/ubuntu/lp-1704312-1-* provide means to manually handle filesystem-dax
with pmem by backporting align and unarmed options (LP: #1704312).
  * d/p/ubuntu/lp-1762315-slirp-Add-domainname.patch: slirp: Add domainname
option to slirp's DHCP server (LP: #1762315)

 -- Christian Ehrhardt   Wed, 04 Apr
2018 15:16:07 +0200

** Changed in: qemu (Ubuntu)
   Status: In Progress => Fix Released

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1740219

Title:
  static linux-user ARM emulation has several-second startup time

Status in QEMU:
  Fix Committed
Status in qemu package in Ubuntu:
  Fix Released

Bug description:
  static linux-user emulation has several-second startup time

  My problem: I'm a Parabola packager, and I'm updating our
  qemu-user-static package from 2.8 to 2.11.  With my new
  statically-linked 2.11, running `qemu-arm /my/arm-chroot/bin/true`
  went from taking 0.006s to 3s!  This does not happen with the normal
  dynamically linked 2.11, or the old static 2.8.

  What happens is it gets stuck in
  `linux-user/elfload.c:init_guest_space()`.  What `init_guest_space`
  does is map 2 parts of the address space: `[base, base+guest_size]`
  and `[base+0x, base+0x+page_size]`; where it must find
  an acceptable `base`.  Its strategy is to `mmap(NULL, guest_size,
  ...)` decide where the first range is, and then check if that
  +0x is also available.  If it isn't, then it starts trying
  `mmap(base, ...)` for the entire address space from low-address to
  high-address.

  "Normally," it finds an accaptable `base` within the first 2 tries.
  With a static 2.11, it's taking thousands of tries.

  

  Now, from my understanding, there are 2 factors working together to
  cause that in static 2.11 but not the other builds:

   - 2.11 increased the default `guest_size` from 0xf700 to 0x
   - PIE (and thus ASLR) is disabled for static builds

  For some reason that I don't understand, with the smaller
  `guest_size` the initial `mmap(NULL, guest_size, ...)` usually
  returns an acceptable address range; but larger `guest_size` makes it
  consistently return a block of memory that butts right up against
  another already mapped chunk of memory.  This isn't just true on the
  older builds, it's true with the 2.11 builds if I use the `-R` flag to
  shrink the `guest_size` back down to 0xf700.  That is with
  linux-hardened 4.13.13 on x86-64.

  So then, it it falls back to crawling the entire address space; so it
  tries base=0x1000.  With ASLR, that probably succeeds.  But with
  ASLR being disabled on static builds, the text segment is at
  0x6000; which is does not leave room for the needed
  0x1000-size block before it.  So then it tries base=0x2000.
  And so on, more than 6000 times until it finally gets to and passes
  the text segment; calling mmap more than 12000 times.

  

  I'm not sure what the fix is.  Perhaps try to mmap a continuous chunk
  of size 0x1000, then munmap it and then mmap the 2 chunks that we
  actually need.  The disadvantage to that is that it does not support
  the sparse address space that the current algorithm supports for
  `guest_size < 0x`.  If `guest_size < 0x` *and* the big
  mmap fails, then it could fall back to a sparse search; though I'm not
  sure the current algorithm is a good choice for it, as we see in this
  bug.  Perhaps it should inspect /proc/self/maps to try to find a
  suitable range before ever calling mmap?

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1740219/+subscriptions



Re: [Qemu-devel] [Qemu-arm] Use of -machine when running via qemu-system-arm

2018-04-09 Thread Ajay Garg
Thanks a ton Peter, that cleared things up a great deal !!!
Thanks again for the quick help !!


Thanks and Regards,
Ajay

On Mon, Apr 9, 2018 at 11:17 PM, Peter Maydell  wrote:
> On 9 April 2018 at 18:41, Ajay Garg  wrote:
>> Hi Peter.
>>
>> The link is pretty good, just that I am a complete noob.
>>
>> Maybe a more concrete example on the following, clearly specifying why
>> things work on x86 without the need of -machine flag, will make things
>> crystal clear :
>>
>> Because ARM systems differ so much and in fundamental ways, typically
>> operating system or firmware images intended to run on one machine
>> will not run at all on any other. This is often surprising for new
>> users who are used to the x86 world where every system looks like a
>> standard PC. (Once the kernel has booted, most userspace software
>> cares much less about the detail of the hardware.)
>
> For instance, on x86 you can always find the UART for serial
> output at IO port 0x3f8, and it's always an 8250 or compatible.
> On Arm, the memory address where the UART is will be likely
> different for every board. And once you know the address of the
> UART, each SoC likely may well have its own implementation,
> which has different registers that do different things.
> And there's no way to probe for what's there -- you have to
> know in advance. So you can't even usefully print bootup
> messages without knowing what you're running on.
>
> thanks
> -- PMM



Re: [Qemu-devel] [qemu RFC] qapi: add "firmware.json"

2018-04-09 Thread Laszlo Ersek
On 04/09/18 10:49, Daniel P. Berrangé wrote:
> On Sat, Apr 07, 2018 at 02:01:17AM +0200, Laszlo Ersek wrote:
>> Add a schema that describes the properties of virtual machine firmware.
>>
>> Each firmware executable installed on a host system should come with a
>> JSON file that conforms to this schema, and informs the management
>> applications about the firmware's properties.
>>
>> In addition, a configuration directory with symlinks to the JSON files
>> should exist, with the symlinks carefully named to reflect a priority
>> order. Management applications can then search this directory in priority
>> order for the first firmware executable that satisfies their search
>> criteria. The found JSON file provides the management layer with domain
>> configuration bits that are required to run the firmware binary.
>>
>> Cc: "Daniel P. Berrange" 
>> Cc: Alexander Graf 
>> Cc: Ard Biesheuvel 
>> Cc: David Gibson 
>> Cc: Eric Blake 
>> Cc: Gary Ching-Pang Lin 
>> Cc: Gerd Hoffmann 
>> Cc: Kashyap Chamarthy 
>> Cc: Markus Armbruster 
>> Cc: Michael Roth 
>> Cc: Michal Privoznik 
>> Cc: Peter Krempa 
>> Cc: Peter Maydell 
>> Cc: Thomas Huth 
>> Signed-off-by: Laszlo Ersek 
>> ---
>>
>> Notes:
>> Folks on the CC list, please try to see if the suggested schema is
>> flexible enough to describe the virtual firmware(s) that you are
>> familiar with. Thanks!
>>
>>  Makefile  |   9 ++
>>  Makefile.objs |   4 +
>>  qapi/firmware.json| 343 
>> ++
>>  qapi/qapi-schema.json |   1 +
>>  qmp.c |   5 +
>>  .gitignore|   4 +
>>  6 files changed, 366 insertions(+)
>>  create mode 100644 qapi/firmware.json
>>
> 
>> diff --git a/qapi/firmware.json b/qapi/firmware.json
>> new file mode 100644
>> index ..f267240f44dd
>> --- /dev/null
>> +++ b/qapi/firmware.json
>> @@ -0,0 +1,343 @@
>> +# -*- Mode: Python -*-
>> +
>> +##
>> +# = Firmware
>> +##
>> +
>> +##
>> +# @FirmwareDevice:
>> +#
>> +# Defines the device types that a firmware file can be mapped into.
>> +#
>> +# @memory: The firmware file is to be mapped into memory.
>> +#
>> +# @kernel: The firmware file is to be loaded like a Linux kernel. This is
>> +#  similar to @memory but may imply additional processing that is
>> +#  specific to the target architecture.
>> +#
>> +# @flash: The firmware file is to be mapped into a pflash chip.
>> +#
>> +# Since: 2.13
>> +##
>> +{ 'enum' : 'FirmwareDevice',
>> +  'data' : [ 'memory', 'kernel', 'flash' ] }
>> +
>> +##
>> +# @FirmwareAccess:
>> +#
>> +# Defines the possible permissions for a given access mode to a device that
>> +# maps a firmware file.
>> +#
>> +# @denied: The access is denied.
>> +#
>> +# @permitted: The access is permitted.
>> +#
>> +# @restricted-to-secure-context: The access is permitted for guest code that
>> +#runs in a secure context; otherwise the 
>> access
>> +#is denied. The definition of "secure 
>> context"
>> +#is specific to the target architecture.
>> +#
>> +# Since: 2.13
>> +##
>> +{ 'enum' : 'FirmwareAccess',
>> +  'data' : [ 'denied', 'permitted', 'restricted-to-secure-context' ] }
> 
> I'm not really understanding the purpose of this - what does it map to
> on the command line ?

That's difficult to answer generally, because -bios and -kernel have
different meanings per board type. So I didn't aim at command line
switches here; instead I tried to capture where and how the firmware
wants to "end up" in the virtual hardware. How that maps to a particular
board is a separate question.

For example, OVMF can be loaded in a multitude of ways:

(1) "OVMF.fd" (a unified image that contains an executable and a live
variable store too) can be loaded with "-bios". This will place the full
image into ROM (that is FirmwareDevice=memory, read and exec: permitted,
write: denied). This will not provide a spec-compatible UEFI variable
service to the guest, but many people use OVMF like this. The libvirt
domain XML can accommodate this case:

  OVMF.fd

(2) "OVMF.fd" can be loaded into a single pflash chip (single pflash
drive, read/write). The command line switch is "-drive
if=pflash,format=raw,file=OVMF.fd,unit=0,readonly=off". This gives the
guest a spec-compliant UEFI variable service; however, the variable
store is inseparable from the firmware binary, and upgrading the latter
without losing the former is not possible, from a packaging perspective.
This maps to FirmwareDevice=flash, with all of
read/write/exec=permitted. Libvirt can describe this too in the domain XML:

  OVMF.fd

(3) 

Re: [Qemu-devel] [Qemu-arm] Use of -machine when running via qemu-system-arm

2018-04-09 Thread Peter Maydell
On 9 April 2018 at 18:41, Ajay Garg  wrote:
> Hi Peter.
>
> The link is pretty good, just that I am a complete noob.
>
> Maybe a more concrete example on the following, clearly specifying why
> things work on x86 without the need of -machine flag, will make things
> crystal clear :
>
> Because ARM systems differ so much and in fundamental ways, typically
> operating system or firmware images intended to run on one machine
> will not run at all on any other. This is often surprising for new
> users who are used to the x86 world where every system looks like a
> standard PC. (Once the kernel has booted, most userspace software
> cares much less about the detail of the hardware.)

For instance, on x86 you can always find the UART for serial
output at IO port 0x3f8, and it's always an 8250 or compatible.
On Arm, the memory address where the UART is will be likely
different for every board. And once you know the address of the
UART, each SoC likely may well have its own implementation,
which has different registers that do different things.
And there's no way to probe for what's there -- you have to
know in advance. So you can't even usefully print bootup
messages without knowing what you're running on.

thanks
-- PMM



Re: [Qemu-devel] [PULL v2 00/20] Miscellaneous patches for QEMU 2.12-rc

2018-04-09 Thread Peter Maydell
On 9 April 2018 at 18:38, Paolo Bonzini  wrote:
> On 09/04/2018 19:21, Peter Maydell wrote:
>> On 9 April 2018 at 15:59, Paolo Bonzini  wrote:
>>> The following changes since commit 0e87fdc966d05f4e5ad868034fcd8ee2a08ca62d:
>>>
>>>   Update version for v2.12.0-rc2 release (2018-04-04 20:37:20 +0100)
>>>
>>> are available in the git repository at:
>>>
>>>   git://github.com/bonzini/qemu.git tags/for-upstream
>>>
>>> for you to fetch changes up to e0014d4b3a955cfd8d517674703bfa87f340290a:
>>>
>>>   Add missing bit for SSE instr in VEX decoding (2018-04-09 16:36:40 +0200)
>>>
>>> 
>>> Miscellaneous bugfixes, including crash fixes from Alexey, Peter M. and
>>> Thomas.
>>>
>>> 
>>
>> SPARC guest boottest, SPARC host:
>>
>> TEST: tests/boot-serial-test... (pid=99658)
>>   /sparc/boot-serial/LX:   **
>> ERROR:/srv/pm215/qemu/tests/boot-serial-test.c:139:check_guest_output:
>> assertion failed: (output_ok)
>> FAIL
>> GTester: last random seed: R02Sec8351c4b01f899a24e5f71f6615a712
>> (pid=100565)
>>   /sparc/boot-serial/SS-4: **
>> ERROR:/srv/pm215/qemu/tests/boot-serial-test.c:139:check_guest_output:
>> assertion failed: (output_ok)
>> FAIL
>> GTester: last random seed: R02Scdc4c1568274a87c76f6e37e7c72f44b
>> (pid=101395)
>>   /sparc/boot-serial/SS-600MP: OK
>> FAIL: tests/boot-serial-test
>>
>> Apparently intermittent, I did a couple of by-hand reruns of
>> that test case and they were fine...
>
> What about before my patch?

Yes, I think we've seen this particular intermittent before;
eg https://lists.gnu.org/archive/html/qemu-devel/2018-03/msg04003.html
though that was on a different host.

thanks
-- PMM



Re: [Qemu-devel] [Qemu-arm] Use of -machine when running via qemu-system-arm

2018-04-09 Thread Ajay Garg
Hi Peter.

The link is pretty good, just that I am a complete noob.

Maybe a more concrete example on the following, clearly specifying why
things work on x86 without the need of -machine flag, will make things
crystal clear :

Because ARM systems differ so much and in fundamental ways, typically
operating system or firmware images intended to run on one machine
will not run at all on any other. This is often surprising for new
users who are used to the x86 world where every system looks like a
standard PC. (Once the kernel has booted, most userspace software
cares much less about the detail of the hardware.)


Sorry again for my zero knowledge on the ARM world.




On Mon, Apr 9, 2018 at 7:31 PM, Peter Maydell  wrote:
> On 9 April 2018 at 14:43, Ajay Garg  wrote:
>> On Mon, Apr 9, 2018 at 7:08 PM, Peter Maydell  
>> wrote:
>>> On 9 April 2018 at 14:35, Ajay Garg  wrote:
 Hi All.

 Not sure if this is purely an ARM-related question, but I will be
 grateful if someone could point to some literature that explains what
 difference choosing a  machine makes (when we don't need to have any
 such flag for qemu-system-i386 or qemu-system-x86_64).
>>>
>>> Hi; this is a pretty common question, which we talk about on our
>>> wiki page here:
>>>  https://wiki.qemu.org/Documentation/Platforms/ARM
>>
>> Yep, had already been through it :)
>>
>>>
>>> The underlying reason why you need this on Arm but not on x86
>>> is that for x86 every single piece of x86 hardware is pretty
>>> much identical. For Arm (as part of its history in the embedded
>>> space) the general rule is that every board is different.
>>
>> Yep, perhaps I am needing a more layman-kinda example-cum-explanation
>> for this, so would be grateful for some help in this regard.
>
> Well, the wiki text is the best explanation I have; perhaps
> you could suggest what part of it is confusing, and we could
> improve it?
>
> thanks
> -- PMM



-- 
Regards,
Ajay



Re: [Qemu-devel] [PULL v2 00/20] Miscellaneous patches for QEMU 2.12-rc

2018-04-09 Thread Paolo Bonzini
On 09/04/2018 19:21, Peter Maydell wrote:
> On 9 April 2018 at 15:59, Paolo Bonzini  wrote:
>> The following changes since commit 0e87fdc966d05f4e5ad868034fcd8ee2a08ca62d:
>>
>>   Update version for v2.12.0-rc2 release (2018-04-04 20:37:20 +0100)
>>
>> are available in the git repository at:
>>
>>   git://github.com/bonzini/qemu.git tags/for-upstream
>>
>> for you to fetch changes up to e0014d4b3a955cfd8d517674703bfa87f340290a:
>>
>>   Add missing bit for SSE instr in VEX decoding (2018-04-09 16:36:40 +0200)
>>
>> 
>> Miscellaneous bugfixes, including crash fixes from Alexey, Peter M. and
>> Thomas.
>>
>> 
> 
> SPARC guest boottest, SPARC host:
> 
> TEST: tests/boot-serial-test... (pid=99658)
>   /sparc/boot-serial/LX:   **
> ERROR:/srv/pm215/qemu/tests/boot-serial-test.c:139:check_guest_output:
> assertion failed: (output_ok)
> FAIL
> GTester: last random seed: R02Sec8351c4b01f899a24e5f71f6615a712
> (pid=100565)
>   /sparc/boot-serial/SS-4: **
> ERROR:/srv/pm215/qemu/tests/boot-serial-test.c:139:check_guest_output:
> assertion failed: (output_ok)
> FAIL
> GTester: last random seed: R02Scdc4c1568274a87c76f6e37e7c72f44b
> (pid=101395)
>   /sparc/boot-serial/SS-600MP: OK
> FAIL: tests/boot-serial-test
> 
> Apparently intermittent, I did a couple of by-hand reruns of
> that test case and they were fine...

What about before my patch?

Paolo





Re: [Qemu-devel] [PATCH] configure: don't warn GTK if disabled

2018-04-09 Thread Paolo Bonzini
On 09/04/2018 11:24, Peter Maydell wrote:
> On 9 April 2018 at 09:31, Daniel P. Berrangé  wrote:
>> Feels to me that since we've deprecated 2.0, we could just *never* auto
>> detect - just do test -z "$gtkabi" && gtkabi=3.0
>>
>> Anyone who wants gtk2 should have to use an explicit --with-gtkabi=2.0
>
> I think if we still work with gtk2 then we should go ahead and
> warn-but-use it.

Both are valid choices of course.  Since we have Peter (Xu)'s patch, I
think we should go for that and revisit in 2.13 whether to remove GTK+
2.0 support altogether, require an explicit configure argument, or leave
things as they are.

Paolo



Re: [Qemu-devel] [PATCH V4] migration: add capability to bypass the shared memory

2018-04-09 Thread Dr. David Alan Gilbert
Hi,

* Lai Jiangshan (jiangshan...@gmail.com) wrote:
> 1) What's this
> 
> When the migration capability 'bypass-shared-memory'
> is set, the shared memory will be bypassed when migration.
> 
> It is the key feature to enable several excellent features for
> the qemu, such as qemu-local-migration, qemu-live-update,
> extremely-fast-save-restore, vm-template, vm-fast-live-clone,
> yet-another-post-copy-migration, etc..
> 
> The philosophy behind this key feature, including the resulting
> advanced key features, is that a part of the memory management
> is separated out from the qemu, and let the other toolkits
> such as libvirt, kata-containers (https://github.com/kata-containers)
> runv(https://github.com/hyperhq/runv/) or some multiple cooperative
> qemu commands directly access to it, manage it, provide features on it.
> 
> 2) Status in real world
> 
> The hyperhq(http://hyper.sh  http://hypercontainer.io/)
> introduced the feature vm-template(vm-fast-live-clone)
> to the hyper container for several years, it works perfect.
> (see https://github.com/hyperhq/runv/pull/297).
> 
> The feature vm-template makes the containers(VMs) can
> be started in 130ms and save 80M memory for every
> container(VM). So that the hyper containers are fast
> and high-density as normal containers.
> 
> kata-containers project (https://github.com/kata-containers)
> which was launched by hyper, intel and friends and which descended
> from runv (and clear-container) should have this feature enabled.
> Unfortunately, due to the code confliction between runv,
> this feature was temporary disabled and it is being brought
> back by hyper and intel team.
> 
> 3) How to use and bring up advanced features.
> 
> In current qemu command line, shared memory has
> to be configured via memory-object.
> 
> a) feature: qemu-local-migration, qemu-live-update
> Set the mem-path on the tmpfs and set share=on for it when
> start the vm. example:
> -object \
> memory-backend-file,id=mem,size=128M,mem-path=/dev/shm/memory,share=on \
> -numa node,nodeid=0,cpus=0-7,memdev=mem
> 
> when you want to migrate the vm locally (after fixed a security bug
> of the qemu-binary, or other reason), you can start a new qemu with
> the same command line and -incoming, then you can migrate the
> vm from the old qemu to the new qemu with the migration capability
> 'bypass-shared-memory' set. The migration will migrate the device-state
> *ONLY*, the memory is the origin memory backed by tmpfs file.
> 
> b) feature: extremely-fast-save-restore
> the same above, but the mem-path is on the persistent file system.
> 
> c)  feature: vm-template, vm-fast-live-clone
> the template vm is started as 1), and paused when the guest reaches
> the template point(example: the guest app is ready), then the template
> vm is saved. (the qemu process of the template can be killed now, because
> we need only the memory and the device state files (in tmpfs)).
> 
> Then we can launch one or multiple VMs base on the template vm states,
> the new VMs are started without the “share=on”, all the new VMs share
> the initial memory from the memory file, they save a lot of memory.
> all the new VMs start from the template point, the guest app can go to
> work quickly.

How do you handle the storage in this case, or giving each VM it's own
MAC address?

> The new VM booted from template vm can’t become template again,
> if you need this unusual chained-template feature, you can write
> a cloneable-tmpfs kernel module for it.
> 
> The libvirt toolkit can’t manage vm-template currently, in the
> hyperhq/runv, we use qemu wrapper script to do it. I hope someone add
> “libvrit managed template” feature to libvirt.

> d) feature: yet-another-post-copy-migration
> It is a possible feature, no toolkit can do it well now.
> Using nbd server/client on the memory file is reluctantly Ok but
> inconvenient. A special feature for tmpfs might be needed to
> fully complete this feature.
> No one need yet another post copy migration method,
> but it is possible when some crazy man need it.

As the crazy person who did the existing postcopy; one is enough!

Some minor fix requests below, but this looks nice and simple.

Shared memory is interesting because tehre are lots of different uses;
e.g. your uses, but also vhost-user which is sharing for a completely
different reason.

> Cc: Samuel Ortiz 
> Cc: Sebastien Boeuf 
> Cc: James O. D. Hunt 
> Cc: Xu Wang 
> Cc: Peng Tao 
> Cc: Xiao Guangrong 
> Cc: Xiao Guangrong 
> Signed-off-by: Lai Jiangshan 
> ---
> 
> Changes in V4:
>  fixes checkpatch.pl errors
> 
> Changes in V3:
>  rebased on upstream master
>  update the available version of the capability to
>  v2.13
> 
> Changes in V2:
>  rebased on 2.11.1
> 
>  migration/migration.c | 14 ++
>  

[Qemu-devel] tests/Makefile.include gtester rules defeat attempts to parallelize

2018-04-09 Thread Peter Maydell
I noticed that the rules for gtester in tests/Makefile.include
work against attempts to parallelize "make check" with make's
-j argument, because from Make's point of view we run a
single command which looks like

gtester tests/endianness-test tests/fdc-test tests/ide-test
tests/ahci-test tests/hd-geo-test tests/boot-order-test
tests/bios-tables-test tests/boot-serial-test tests/pxe-test
tests/rtc-test tests/ipmi-kcs-test tests/ipmi-bt-test
tests/i440fx-test tests/fw_cfg-test tests/drive_del-test
tests/wdt_ib700-test tests/tco-test tests/e1000-test tests/e1000e-test
tests/rtl8139-test tests/pcnet-test tests/eepro100-test
tests/ne2000-test tests/nvme-test tests/ac97-test tests/es1370-test
tests/virtio-net-test tests/virtio-balloon-test tests/virtio-blk-test
tests/virtio-rng-test tests/virtio-scsi-test tests/virtio-serial-test
tests/virtio-console-test tests/tpci200-test tests/ipoctal232-test
tests/display-vga-test tests/intel-hda-test tests/megasas-test
tests/vmxnet3-test tests/pvpanic-test tests/i82801b11-test
tests/ioh3420-test tests/usb-hcd-ohci-test tests/usb-hcd-uhci-test
tests/usb-hcd-ehci-test tests/usb-hcd-xhci-test tests/cpu-plug-test
tests/q35-test tests/vmgenid-test tests/tpm-crb-test
tests/tpm-tis-test tests/test-netfilter tests/test-filter-mirror
tests/test-filter-redirector tests/migration-test
tests/test-x86-cpuid-compat tests/numa-test tests/qmp-test
tests/device-introspect-test tests/machine-none-test tests/qom-test
tests/test-hmp

and then all of those individual tests/foo-test get run in series
by gtester, rather than being possibly parallelized by make.

Does anybody feel like playing around with the makefile
rules to see if they can make that a bit less serialized?

thanks
-- PMM



Re: [Qemu-devel] [PULL v2 00/20] Miscellaneous patches for QEMU 2.12-rc

2018-04-09 Thread Peter Maydell
On 9 April 2018 at 15:59, Paolo Bonzini  wrote:
> The following changes since commit 0e87fdc966d05f4e5ad868034fcd8ee2a08ca62d:
>
>   Update version for v2.12.0-rc2 release (2018-04-04 20:37:20 +0100)
>
> are available in the git repository at:
>
>   git://github.com/bonzini/qemu.git tags/for-upstream
>
> for you to fetch changes up to e0014d4b3a955cfd8d517674703bfa87f340290a:
>
>   Add missing bit for SSE instr in VEX decoding (2018-04-09 16:36:40 +0200)
>
> 
> Miscellaneous bugfixes, including crash fixes from Alexey, Peter M. and
> Thomas.
>
> 

SPARC guest boottest, SPARC host:

TEST: tests/boot-serial-test... (pid=99658)
  /sparc/boot-serial/LX:   **
ERROR:/srv/pm215/qemu/tests/boot-serial-test.c:139:check_guest_output:
assertion failed: (output_ok)
FAIL
GTester: last random seed: R02Sec8351c4b01f899a24e5f71f6615a712
(pid=100565)
  /sparc/boot-serial/SS-4: **
ERROR:/srv/pm215/qemu/tests/boot-serial-test.c:139:check_guest_output:
assertion failed: (output_ok)
FAIL
GTester: last random seed: R02Scdc4c1568274a87c76f6e37e7c72f44b
(pid=101395)
  /sparc/boot-serial/SS-600MP: OK
FAIL: tests/boot-serial-test

Apparently intermittent, I did a couple of by-hand reruns of
that test case and they were fine...

Applied to master.

thanks
-- PMM



[Qemu-devel] qom-test on netbsd can be very slow

2018-04-09 Thread Peter Maydell
My NetBSD build system recently seems to have taken a nosedive
in how long it takes to finish "make check". This seems to be
because qom-test (and probably other things where the test interacts
with the QEMU process) can run very slowly.

netbsdvm# for i in 1 2 3 4 5 6 7 8 9 10; do
(QTEST_QEMU_BINARY=x86_64-softmmu/qemu-system-x86_64 time
tests/qom-test -p /x86_64/qom/pc-i440fx-2.0); done
/x86_64/qom/pc-i440fx-2.0: OK
8.49 real 1.18 user 7.34 sys
/x86_64/qom/pc-i440fx-2.0: OK
   10.41 real 1.32 user 9.09 sys
/x86_64/qom/pc-i440fx-2.0: OK
8.45 real 1.24 user 7.24 sys
/x86_64/qom/pc-i440fx-2.0: OK
9.88 real 1.10 user 8.31 sys
/x86_64/qom/pc-i440fx-2.0: OK
   11.60 real 1.47 user 9.90 sys
/x86_64/qom/pc-i440fx-2.0: OK
   10.94 real 1.28 user 9.68 sys
/x86_64/qom/pc-i440fx-2.0: OK
   10.06 real 1.32 user 8.76 sys
/x86_64/qom/pc-i440fx-2.0: OK
   13.38 real 1.37 user12.04 sys
/x86_64/qom/pc-i440fx-2.0: OK
   16.19 real 1.46 user14.29 sys
/x86_64/qom/pc-i440fx-2.0: OK
9.70 real 1.17 user 8.51 sys

Admittedly this is running in a (KVM) VM, but still, there seems
something wrong with how long each of these is taking. On Linux
each run is less than a second, so there's an order-of-magnitude
slowdown here. Further, I've occasionally seen a run take 100 seconds!

Does anybody else see this, and any ideas why it might be running slow?
I'm not very familiar with debugging on netbsd; I had a look at
ktrace output, and the QEMU process seems to sometimes spend
quite a lot of time in a poll() loop not actually doing anything.
I couldn't figure out how to get the ktrace logs to annotate
which thread was making which syscall though, so they weren't
easy to interpret. If anybody's more experienced at debugging
things in the BSDs that would also be helpful.

On OpenBSD, for comparison, each test run seems to fairly reliably
take 5 seconds give-or-take, there's much less run-to-run
variation, though it's still much slower than Linux is.
The OpenBSD and FreeBSD build VMs seem to complete more
reasonably quickly than the NetBSD one.

One thing I noticed looking at ktrace output is that we do
all our reading of QMP input and output with a read syscall
per character. I don't think that's the cause of this slowness,
though.

thanks
-- PMM



Re: [Qemu-devel] [qemu RFC] qapi: add "firmware.json"

2018-04-09 Thread Laszlo Ersek
On 04/09/18 10:26, Gerd Hoffmann wrote:
>> +# {
>> +# "executable": {
>> +# "pathname": "/usr/share/OVMF/OVMF_CODE.secboot.fd",
>> +# "description": "OVMF with Secure Boot and SMM-protected varstore",
>> +# "tags": [
>> +# "FD_SIZE_4MB",
>> +# "IA32X64",
>> +# "SECURE_BOOT_ENABLE",
>> +# "SMM_REQUIRE"
>> +# ]
>> +# },
>> +# "type": "uefi",
>> +# "targets": [
>> +# "x86_64"
>> +# ],
>> +# "sysfw-map": {
>> +# "device": "flash",
>> +# "write": "denied"
>> +# },
>> +# "nvram-slots": [
>> +# {
>> +# "slot-id": 1,
>> +# "nvram-map" : {
>> +# "device": "flash",
>> +# "write": "restricted-to-secure-context"
>> +# },
> 
> What is "slot-id"?  The pflash index?

Yes, it might be defined like that, for the i440fx and q35 machine
types. This correspondence would be implemented in libvirtd, I suppose.

However, I don't think such a correspondence is mandatory. At first
approach, slot-id is just the key that tells the nvramslots apart.

> shouldn't we also specify the
> index for the executable somewhere?

Maybe :)

> Maybe the field should be moved
> into FirmwareMapping?

I couldn't come up with a good use case where you wouldn't map the
*system* firmware in a predefined pflash unit (or other device unit). So
I thought that needed no slot-id.

Thanks
Laszlo



Re: [Qemu-devel] [qemu RFC] qapi: add "firmware.json"

2018-04-09 Thread Laszlo Ersek
On 04/09/18 10:19, Gerd Hoffmann wrote:
>>> +{ 'enum' : 'SystemFirmwareType',
>>> +  'data' : [ 'bios', 'slof', 'uboot', 'uefi' ] }
>>
>> The naming here is quite a bad mixture between firmware interface
>> ('bios', 'uefi') and firmware implementations ('slof', 'uboot'). There
>> could be other implementations of BIOS or UEFI than SeaBIOS and EDK2 ...
>> so I'd suggest to rather name them 'seabios' and 'edk2' here instead.
> 
> uboot for example implements uefi unterfaces too (dunno how complete,
> but reportly recent versions can run uefi shell and grub just fine).

Indeed: when I was struggling with this enum type and tried to look for
more firmware types to add, my googling turned up the "UEFI on Top of
U-Boot" whitepaper, from Alex and Andreas :)

Again, this reaches to the root of the problem: when a user creates a
new domain, using high-level tools, they just want to tick "UEFI". (Dan
has emphasized this to me several times, so I think I get the idea by
now, if not the full environment.) We cannot ask the user, "please be
more specific, do you want UEFI from edk2, or UEFI on top of U-Boot?"

Instead, each of those firmware images will have to come with a JSON
document that states "uefi" in SystemFirmware.type, and the host admin
will be responsible for establishing a priority order between them.
Then, when the user asks for "UEFI" (and no more details), they'll get
(compatibly with the target architecture) whichever firmware the host
admin marked as "higher priority".

(Please be aware that with this argument, I'm trying to put myself into
Dan's shoes. It doesn't come *naturally* to me; in fact I'm viscerally
screaming inside at this amount of "fuzz". Personally I'd say, "I can
give you what you *say*, not what you *mean*, so *say* what you mean".
Apparently, that cannot work here, because what users mean is "UEFI" and
nothing more. I have to accept that.)

Thanks
Laszlo



Re: [Qemu-devel] [PATCH 1/1] mach-virt: Change default cpu and gic-version setting to "max"

2018-04-09 Thread Wei Huang


On 04/09/2018 10:55 AM, Peter Maydell wrote:
> On 9 April 2018 at 16:49, Wei Huang  wrote:
>> Running mach-virt machine types (i.e. "-M virt") on different systems can
>> result in various misleading warnings if -cpu and/or gic-version not 
>> specified.
>> For KVM, this can be solved mostly by using "host" type. But the "host" type
>> doesn't work for TCG. Compared with "host", the "max" type not only supports
>> auto detection under KVM mode, but also works with TCG.
>> So this patch set "max" as the default types for both -cpu and gic-version.
> 
> Using "max" will make the VM not migratable by default, and also
> means that the behaviour might differ between host machines and
> between different QEMU versions. I'm not sure we want that for the
> default ?

Understood. If you think the -cpu shouldn't be changed due to the
reasons list above, we should at lease give a better warning message
when cpu doesn't match. Right now, the error msg is something like
"kvm_init_vcpu failed, ...".

Secondly, how about gic-version part? Should we use "max" as the default
gic-version?

> 
>> Signed-off-by: Wei Huang 
>> ---
>>  hw/arm/virt.c | 10 +-
>>  1 file changed, 5 insertions(+), 5 deletions(-)
>>
>> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
>> index 94dcb125d3..1a9d68b8d5 100644
>> --- a/hw/arm/virt.c
>> +++ b/hw/arm/virt.c
>> @@ -1555,7 +1555,7 @@ static void virt_machine_class_init(ObjectClass *oc, 
>> void *data)
>>  mc->minimum_page_bits = 12;
>>  mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids;
>>  mc->cpu_index_to_instance_props = virt_cpu_index_to_props;
>> -mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a15");
>> +mc->default_cpu_type = ARM_CPU_TYPE_NAME("max");
>>  mc->get_default_cpu_node_id = virt_get_default_cpu_node_id;
>>  }
>>
>> @@ -1609,13 +1609,13 @@ static void virt_2_12_instance_init(Object *obj)
>>  "Set on/off to enable/disable using "
>>  "physical address space above 32 bits",
>>  NULL);
>> -/* Default GIC type is v2 */
>> -vms->gic_version = 2;
>> +/* Default GIC type is max */
>> +vms->gic_version = -1;
>>  object_property_add_str(obj, "gic-version", virt_get_gic_version,
>>  virt_set_gic_version, NULL);
>>  object_property_set_description(obj, "gic-version",
>> -"Set GIC version. "
>> -"Valid values are 2, 3 and host", NULL);
>> +"Set GIC version. Valid values are 2, 
>> 3, "
>> +"host, and max", NULL);
> 
> This change in the help string is a bug fix not related to the
> rest of the patch. You should send that as a separate patch,
> because we want that regardless of what we do about the default
> value.
> 
> thanks
> -- PMM
> 



Re: [Qemu-devel] [qemu RFC] qapi: add "firmware.json"

2018-04-09 Thread Laszlo Ersek
On 04/09/18 10:08, Thomas Huth wrote:
> On 07.04.2018 02:01, Laszlo Ersek wrote:
>> Add a schema that describes the properties of virtual machine firmware.
> [...]
>> +##
>> +# @SystemFirmware:
>> +#
>> +# Describes a system firmware binary and any NVRAM slots that it requires.
>> +#
>> +# @executable: Identifies the platform firmware executable.
>> +#
>> +# @type: The type by which the system firmware is commonly known. This is 
>> the
>> +#main search key by which management software looks up a system
>> +#firmware image for a new domain.
>> +#
>> +# @targets: a non-empty list of target architectures that are capable of
>> +#   executing the system firmware
>> +#
>> +# @sysfw-map: the mapping requirements of the system firmware binary
>> +#
>> +# @nvram-slots: A list of NVRAM slots that are required by the system 
>> firmware.
>> +#   The @slot-id field must be unique across the list. 
>> Importantly,
>> +#   if any @FirmwareAccess is @restricted-to-secure-context in
>> +#   @sysfw-map or in any @nvram-map in @nvram-slots, then (a) 
>> the
>> +#   virtual machine configuration is required to emulate the 
>> secure
>> +#   code execution context (as defined for @targets), and (b) 
>> the
>> +#   virtual machine configuration is required to actually 
>> restrict
>> +#   the access in question to the secure execution context.
>> +#
>> +# @supports-uefi-secure-boot: Whether the system firmware implements the
>> +# software interfaces for UEFI Secure Boot, as
>> +# defined in the UEFI specification. If the 
>> field
>> +# is missing, its assumed value is 'false'.
>> +#
>> +# @supports-amd-sev: Whether the system firmware supports running under AMD
>> +#Secure Encrypted Virtualization, as specified in the 
>> AMD64
>> +#Architecture Programmer's Manual. If the field is 
>> missing,
>> +#its assumed value is 'false'.
>> +#
>> +# @supports-acpi-s3: Whether the system firmware supports S3 sleep (suspend 
>> to
>> +#RAM), as defined in the ACPI specification. If the 
>> field
>> +#is missing, its assumed value is 'false'.
>> +#
>> +# @supports-acpi-s4: Whether the system firmware supports S4 hibernation
>> +#(suspend to disk), as defined in the ACPI 
>> specification.
>> +#If the field is missing, its assumed value is 'false'.
>> +#
>> +# Since: 2.13
>> +#
>> +# Examples:
>> +#
>> +# {
>> +# "executable": {
>> +# "pathname": "/usr/share/seabios/bios-256k.bin",
>> +# "description": "SeaBIOS",
>> +# "tags": [
>> +# "CONFIG_ROM_SIZE=256"
>> +# ]
>> +# },
>> +# "type": "bios",
>> +# "targets": [
>> +# "i386",
>> +# "x86_64"
>> +# ],
>> +# "sysfw-map": {
>> +# "device": "memory",
>> +# "write": "denied"
>> +# },
>> +# "supports-acpi-s3": true,
>> +# "supports-acpi-s4": true
>> +# }
>> +#
>> +# {
>> +# "executable": {
>> +# "pathname": "/usr/share/OVMF/OVMF_CODE.secboot.fd",
>> +# "description": "OVMF with Secure Boot and SMM-protected varstore",
>> +# "tags": [
>> +# "FD_SIZE_4MB",
>> +# "IA32X64",
>> +# "SECURE_BOOT_ENABLE",
>> +# "SMM_REQUIRE"
>> +# ]
>> +# },
>> +# "type": "uefi",
>> +# "targets": [
>> +# "x86_64"
>> +# ],
>> +# "sysfw-map": {
>> +# "device": "flash",
>> +# "write": "denied"
>> +# },
>> +# "nvram-slots": [
>> +# {
>> +# "slot-id": 1,
>> +# "nvram-map" : {
>> +# "device": "flash",
>> +# "write": "restricted-to-secure-context"
>> +# },
>> +# "templates": [
>> +# {
>> +# "pathname": "/usr/share/OVMF/OVMF_VARS.fd",
>> +# "description": "empty varstore template"
>> +# },
>> +# {
>> +# "pathname": "/usr/share/OVMF/OVMF_VARS.secboot.fd",
>> +# "description": "varstore template with the Microsoft 
>> certificates enrolled for Secure Boot",
>> +# "tags": [
>> +# "mscerts"
>> +# ]
>> +# }
>> +# ]
>> +# }
>> +# ],
>> +# "supports-uefi-secure-boot": true,
>> +# "supports-amd-sev": true,
>> +# "supports-acpi-s3": true
>> +# }
>> +#
>> +# {
>> +# "executable": {
>> +# "pathname": "/usr/share/AAVMF/AAVMF_CODE.fd",
>> +# "description": "ARM64 UEFI firmware",
>> +# "tags": [
>> +# "AARCH64"
>> +# ]
>> +# },
>> +# "type": "uefi",
>> +# "targets": [
>> +# 

Re: [Qemu-devel] [qemu RFC] qapi: add "firmware.json"

2018-04-09 Thread Laszlo Ersek
On 04/09/18 09:26, Thomas Huth wrote:
>  Hi Laszlo,
> 
> On 07.04.2018 02:01, Laszlo Ersek wrote:
>> Add a schema that describes the properties of virtual machine firmware.
>>
>> Each firmware executable installed on a host system should come with a
>> JSON file that conforms to this schema, and informs the management
>> applications about the firmware's properties.
>>
>> In addition, a configuration directory with symlinks to the JSON files
>> should exist, with the symlinks carefully named to reflect a priority
>> order. Management applications can then search this directory in priority
>> order for the first firmware executable that satisfies their search
>> criteria. The found JSON file provides the management layer with domain
>> configuration bits that are required to run the firmware binary.
> [...]
>> +##
>> +# @FirmwareDevice:
>> +#
>> +# Defines the device types that a firmware file can be mapped into.
>> +#
>> +# @memory: The firmware file is to be mapped into memory.
>> +#
>> +# @kernel: The firmware file is to be loaded like a Linux kernel. This is
>> +#  similar to @memory but may imply additional processing that is
>> +#  specific to the target architecture.
>> +#
>> +# @flash: The firmware file is to be mapped into a pflash chip.
>> +#
>> +# Since: 2.13
>> +##
>> +{ 'enum' : 'FirmwareDevice',
>> +  'data' : [ 'memory', 'kernel', 'flash' ] }
> 
> This is not fully clear to me... what is this exactly good for? Is this
> a way to say how the firmware should be loaded, i.e. via "-bios",
> "-kernel" or "-pflash" parameter? If so, the term "memory" is quite
> misleading since files that are loaded via -bios can also end up in an
> emulated ROM chip.

I threw in "-kernel" because, although it also (usually?) means
"memory", I expected people would want it separate.

Regarding memory vs. pflash, I thought that these two, combined with the
access permissions, could cover all of RAM, ROM, and read-only and
read-write pflash too.

So, "-bios" (-> ROM) boils down to "memory", with write access denied --
please see the SeaBIOS example near the end.

> 
> [...]
>> +##
>> +# @NVRAMSlot:
>> +#
>> +# Defines the mapping properties of an NVRAM slot, and associates compatible
>> +# NVRAM templates with the NVRAM slot.
>> +#
>> +# @slot-id: The numeric identifier of the NVRAM slot. The interpretation of
>> +#   @slot-id is specific to the target architecture and the chosen
>> +#   system firmware.
>> +#
>> +# @nvram-map: the mapping requirements of this NVRAM slot
>> +#
>> +# @templates: A non-empty list of @FirmwareFile elements. Any @FirmwareFile
>> +# identified by this list as an NVRAM template can be copied to
>> +# create an actual NVRAM file, and the NVRAM file can be mapped
>> +# into the NVRAM slot identified by @slot-id, subject to the
>> +# mapping requirements in @nvram-map.
>> +#
>> +# Since: 2.13
>> +##
>> +{ 'struct' : 'NVRAMSlot',
>> +  'data'   : { 'slot-id'   : 'uint64',
> 
> Not sure whether I've got the idea here right, so take this with a grain
> of salt: Maybe 'uint64' is not flexible enough here. PAPR uses both, an
> ID and a name (string), see chapter 8.3 in
> https://members.openpowerfoundation.org/document/dl/469 ... but I guess
> we could start with the 'slot-id' here and add a name field later if
> necessary.

I only added "slot-id" as an initial place holder for telling apart the
individual NVRAMSlot elements in "SystemFirmware.nvram-slots". I
expected a scalar wouldn't be expressive enough for all arches, but, as
you say, it can be extended later.

> 
>> +   'nvram-map' : 'FirmwareMapping',
>> +   'templates' : [ 'FirmwareFile' ] } }
>> +
>> +##
>> +# @SystemFirmwareType:
>> +#
>> +# Lists system firmware types commonly used with QEMU virtual machines.
>> +#
>> +# @bios: The system firmware was built from the SeaBIOS project.
>> +#
>> +# @slof: The system firmware was built from the Slimline Open Firmware 
>> project.
>> +#
>> +# @uboot: The system firmware was built from the U-Boot project.
>> +#
>> +# @uefi: The system firmware was built from the edk2 (EFI Development Kit 
>> II)
>> +#project.
>> +#
>> +# Since: 2.13
>> +##
>> +{ 'enum' : 'SystemFirmwareType',
>> +  'data' : [ 'bios', 'slof', 'uboot', 'uefi' ] }
> 
> The naming here is quite a bad mixture between firmware interface
> ('bios', 'uefi') and firmware implementations ('slof', 'uboot'). There
> could be other implementations of BIOS or UEFI than SeaBIOS and EDK2 ...
> so I'd suggest to rather name them 'seabios' and 'edk2' here instead.

Sure, I'm totally ready to follow community advice here (too).

In fact this is the one element I dislike the most about the schema --
it's the fuzziest part, yet it is the most important element for
libvirt. Because users and higher level apps just want to say "give me
UEFI". If I have to ask "OK, but UEFI built from edk2 or something
else?", then it's a lost cause 

Re: [Qemu-devel] [PATCH 1/1] mach-virt: Change default cpu and gic-version setting to "max"

2018-04-09 Thread Wei Huang


On 04/09/2018 10:56 AM, Daniel P. Berrangé wrote:
> On Mon, Apr 09, 2018 at 10:49:21AM -0500, Wei Huang wrote:
>> Running mach-virt machine types (i.e. "-M virt") on different systems can
>> result in various misleading warnings if -cpu and/or gic-version not 
>> specified.
>> For KVM, this can be solved mostly by using "host" type. But the "host" type
>> doesn't work for TCG. Compared with "host", the "max" type not only supports
>> auto detection under KVM mode, but also works with TCG. So this patch set
>> "max" as the default types for both -cpu and gic-version.
> 
> Hmm, generally we aim for the config provided by a machine type to be stable
> across QEMU versions.

I understand this principle. But in reality, under KVM mode, the default
config most time doesn't work. If end users specify cpu type manually,
it still doesn't work because the host CPU is vendor-specific design
(e.g. "cortex-a57" doesn't work on QCOM's machine). So we end up with
using "-cpu host" all the time. My argument for this patch is that "-cpu
max" isn't worse than "-cpu host".

> 
> By specifying "max", the machine type will potentially change if new QEMU
> turns on new features in the "max" CPU model, as well as also varying
> depending on what the host OS supports.
> 
> This is a general conceptual problem with the "host" CPU model for all
> target arches and is unfixable by design. This is fine if you accept
> the caveats with using "-cpu host" and opt-in to using it knowing the
> tradeoffs. I'm just not convinced it is reasonable to make "-cpu host"
> be the default value for a machine type out of the box.
> 
> 
>> Signed-off-by: Wei Huang 
>> ---
>>  hw/arm/virt.c | 10 +-
>>  1 file changed, 5 insertions(+), 5 deletions(-)
>>
>> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
>> index 94dcb125d3..1a9d68b8d5 100644
>> --- a/hw/arm/virt.c
>> +++ b/hw/arm/virt.c
>> @@ -1555,7 +1555,7 @@ static void virt_machine_class_init(ObjectClass *oc, 
>> void *data)
>>  mc->minimum_page_bits = 12;
>>  mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids;
>>  mc->cpu_index_to_instance_props = virt_cpu_index_to_props;
>> -mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a15");
>> +mc->default_cpu_type = ARM_CPU_TYPE_NAME("max");
>>  mc->get_default_cpu_node_id = virt_get_default_cpu_node_id;
>>  }
>>  
>> @@ -1609,13 +1609,13 @@ static void virt_2_12_instance_init(Object *obj)
>>  "Set on/off to enable/disable using "
>>  "physical address space above 32 bits",
>>  NULL);
>> -/* Default GIC type is v2 */
>> -vms->gic_version = 2;
>> +/* Default GIC type is max */
>> +vms->gic_version = -1;
>>  object_property_add_str(obj, "gic-version", virt_get_gic_version,
>>  virt_set_gic_version, NULL);
>>  object_property_set_description(obj, "gic-version",
>> -"Set GIC version. "
>> -"Valid values are 2, 3 and host", NULL);
>> +"Set GIC version. Valid values are 2, 
>> 3, "
>> +"host, and max", NULL);
>>  
>>  if (vmc->no_its) {
>>  vms->its = false;
>> -- 
>> 2.14.3
>>
>>
> 
> Regards,
> Daniel
> 



Re: [Qemu-devel] [PULL 0/5] virtio,vhost: fixes

2018-04-09 Thread Peter Maydell
On 9 April 2018 at 15:42, Michael S. Tsirkin  wrote:
> The following changes since commit 0e87fdc966d05f4e5ad868034fcd8ee2a08ca62d:
>
>   Update version for v2.12.0-rc2 release (2018-04-04 20:37:20 +0100)
>
> are available in the git repository at:
>
>   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream
>
> for you to fetch changes up to d434e5ac5d70e9da7d20e50246af9251a125bdad:
>
>   virtio-serial: fix heap-over-flow (2018-04-09 17:35:46 +0300)
>
> 
> virtio,vhost: fixes
>
> Add a feature flag for new protocol messages.
> Misc fixes.
>
> Signed-off-by: Michael S. Tsirkin 
>
> 

Applied, thanks.

-- PMM



Re: [Qemu-devel] [PATCH v2 00/17] Translation loop conversion for sh4/sparc/mips/s390x/openrisc/riscv targets

2018-04-09 Thread Emilio G. Cota
On Mon, Apr 09, 2018 at 16:01:36 +0200, Bastian Koppelmann wrote:
> Thanks for doing this grunt work. Me and a colleague were planning to do
> this as well after converting the RISC-V frontend to decodetree. Do you
> have any plans to do this for the TriCore frontend as well? I have the
> same plan for Tricore: Convert it to decodetree, then to translation loop.

I won't do any further conversions in the near future, so please go
ahead with your plans :-)

Thanks,

Emilio




Re: [Qemu-devel] [PATCH v2 16/17] target/riscv: convert to DisasContextBase

2018-04-09 Thread Emilio G. Cota
On Mon, Apr 09, 2018 at 16:22:53 +0200, Bastian Koppelmann wrote:
> On 04/06/2018 08:19 PM, Emilio G. Cota wrote:
> > Notes:
> > 
> > - Did not convert {num,max}_insns, since the corresponding code
> >   will go away in the next patch.
> > 
> > - ctx->pc becomes ctx->base.pc_next, and ctx->next_pc becomes ctx->pc_tmp.
> 
> Please call pc_tmp something meaningful, like pc_succ_insn, or pc_next_next.

For v3 I've renamed it to ctx->pc_succ_insn, as you suggested.

Thanks,

E.



Re: [Qemu-devel] [PATCH 1/1] mach-virt: Change default cpu and gic-version setting to "max"

2018-04-09 Thread Daniel P . Berrangé
On Mon, Apr 09, 2018 at 10:49:21AM -0500, Wei Huang wrote:
> Running mach-virt machine types (i.e. "-M virt") on different systems can
> result in various misleading warnings if -cpu and/or gic-version not 
> specified.
> For KVM, this can be solved mostly by using "host" type. But the "host" type
> doesn't work for TCG. Compared with "host", the "max" type not only supports
> auto detection under KVM mode, but also works with TCG. So this patch set
> "max" as the default types for both -cpu and gic-version.

Hmm, generally we aim for the config provided by a machine type to be stable
across QEMU versions.

By specifying "max", the machine type will potentially change if new QEMU
turns on new features in the "max" CPU model, as well as also varying
depending on what the host OS supports.

This is a general conceptual problem with the "host" CPU model for all
target arches and is unfixable by design. This is fine if you accept
the caveats with using "-cpu host" and opt-in to using it knowing the
tradeoffs. I'm just not convinced it is reasonable to make "-cpu host"
be the default value for a machine type out of the box.


> Signed-off-by: Wei Huang 
> ---
>  hw/arm/virt.c | 10 +-
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 94dcb125d3..1a9d68b8d5 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -1555,7 +1555,7 @@ static void virt_machine_class_init(ObjectClass *oc, 
> void *data)
>  mc->minimum_page_bits = 12;
>  mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids;
>  mc->cpu_index_to_instance_props = virt_cpu_index_to_props;
> -mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a15");
> +mc->default_cpu_type = ARM_CPU_TYPE_NAME("max");
>  mc->get_default_cpu_node_id = virt_get_default_cpu_node_id;
>  }
>  
> @@ -1609,13 +1609,13 @@ static void virt_2_12_instance_init(Object *obj)
>  "Set on/off to enable/disable using "
>  "physical address space above 32 bits",
>  NULL);
> -/* Default GIC type is v2 */
> -vms->gic_version = 2;
> +/* Default GIC type is max */
> +vms->gic_version = -1;
>  object_property_add_str(obj, "gic-version", virt_get_gic_version,
>  virt_set_gic_version, NULL);
>  object_property_set_description(obj, "gic-version",
> -"Set GIC version. "
> -"Valid values are 2, 3 and host", NULL);
> +"Set GIC version. Valid values are 2, 3, 
> "
> +"host, and max", NULL);
>  
>  if (vmc->no_its) {
>  vms->its = false;
> -- 
> 2.14.3
> 
> 

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|



Re: [Qemu-devel] [PATCH 1/1] mach-virt: Change default cpu and gic-version setting to "max"

2018-04-09 Thread Peter Maydell
On 9 April 2018 at 16:49, Wei Huang  wrote:
> Running mach-virt machine types (i.e. "-M virt") on different systems can
> result in various misleading warnings if -cpu and/or gic-version not 
> specified.
> For KVM, this can be solved mostly by using "host" type. But the "host" type
> doesn't work for TCG. Compared with "host", the "max" type not only supports
> auto detection under KVM mode, but also works with TCG.
> So this patch set "max" as the default types for both -cpu and gic-version.

Using "max" will make the VM not migratable by default, and also
means that the behaviour might differ between host machines and
between different QEMU versions. I'm not sure we want that for the
default ?

> Signed-off-by: Wei Huang 
> ---
>  hw/arm/virt.c | 10 +-
>  1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 94dcb125d3..1a9d68b8d5 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -1555,7 +1555,7 @@ static void virt_machine_class_init(ObjectClass *oc, 
> void *data)
>  mc->minimum_page_bits = 12;
>  mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids;
>  mc->cpu_index_to_instance_props = virt_cpu_index_to_props;
> -mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a15");
> +mc->default_cpu_type = ARM_CPU_TYPE_NAME("max");
>  mc->get_default_cpu_node_id = virt_get_default_cpu_node_id;
>  }
>
> @@ -1609,13 +1609,13 @@ static void virt_2_12_instance_init(Object *obj)
>  "Set on/off to enable/disable using "
>  "physical address space above 32 bits",
>  NULL);
> -/* Default GIC type is v2 */
> -vms->gic_version = 2;
> +/* Default GIC type is max */
> +vms->gic_version = -1;
>  object_property_add_str(obj, "gic-version", virt_get_gic_version,
>  virt_set_gic_version, NULL);
>  object_property_set_description(obj, "gic-version",
> -"Set GIC version. "
> -"Valid values are 2, 3 and host", NULL);
> +"Set GIC version. Valid values are 2, 3, 
> "
> +"host, and max", NULL);

This change in the help string is a bug fix not related to the
rest of the patch. You should send that as a separate patch,
because we want that regardless of what we do about the default
value.

thanks
-- PMM



[Qemu-devel] [PATCH 1/1] mach-virt: Change default cpu and gic-version setting to "max"

2018-04-09 Thread Wei Huang
Running mach-virt machine types (i.e. "-M virt") on different systems can
result in various misleading warnings if -cpu and/or gic-version not specified.
For KVM, this can be solved mostly by using "host" type. But the "host" type
doesn't work for TCG. Compared with "host", the "max" type not only supports
auto detection under KVM mode, but also works with TCG. So this patch set
"max" as the default types for both -cpu and gic-version.

Signed-off-by: Wei Huang 
---
 hw/arm/virt.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 94dcb125d3..1a9d68b8d5 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1555,7 +1555,7 @@ static void virt_machine_class_init(ObjectClass *oc, void 
*data)
 mc->minimum_page_bits = 12;
 mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids;
 mc->cpu_index_to_instance_props = virt_cpu_index_to_props;
-mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a15");
+mc->default_cpu_type = ARM_CPU_TYPE_NAME("max");
 mc->get_default_cpu_node_id = virt_get_default_cpu_node_id;
 }
 
@@ -1609,13 +1609,13 @@ static void virt_2_12_instance_init(Object *obj)
 "Set on/off to enable/disable using "
 "physical address space above 32 bits",
 NULL);
-/* Default GIC type is v2 */
-vms->gic_version = 2;
+/* Default GIC type is max */
+vms->gic_version = -1;
 object_property_add_str(obj, "gic-version", virt_get_gic_version,
 virt_set_gic_version, NULL);
 object_property_set_description(obj, "gic-version",
-"Set GIC version. "
-"Valid values are 2, 3 and host", NULL);
+"Set GIC version. Valid values are 2, 3, "
+"host, and max", NULL);
 
 if (vmc->no_its) {
 vms->its = false;
-- 
2.14.3




Re: [Qemu-devel] [PATCH] migration: Don't activate block devices if using -S

2018-04-09 Thread Dr. David Alan Gilbert
* Kevin Wolf (kw...@redhat.com) wrote:
> Am 09.04.2018 um 16:04 hat Dr. David Alan Gilbert geschrieben:
> > * Kevin Wolf (kw...@redhat.com) wrote:
> > > Am 09.04.2018 um 12:27 hat Dr. David Alan Gilbert geschrieben:
> > > > * Kevin Wolf (kw...@redhat.com) wrote:
> > > > > Am 03.04.2018 um 22:52 hat Dr. David Alan Gilbert geschrieben:
> > > > > > * Kevin Wolf (kw...@redhat.com) wrote:
> > > > > > > Am 28.03.2018 um 19:02 hat Dr. David Alan Gilbert (git) 
> > > > > > > geschrieben:
> > > > > > > > From: "Dr. David Alan Gilbert" 
> > > > > > > > 
> > > > > > > > Activating the block devices causes the locks to be taken on
> > > > > > > > the backing file.  If we're running with -S and the destination 
> > > > > > > > libvirt
> > > > > > > > hasn't started the destination with 'cont', it's expecting the 
> > > > > > > > locks are
> > > > > > > > still untaken.
> > > > > > > > 
> > > > > > > > Don't activate the block devices if we're not going to 
> > > > > > > > autostart the VM;
> > > > > > > > 'cont' already will do that anyway.
> > > > > > > > 
> > > > > > > > bz: https://bugzilla.redhat.com/show_bug.cgi?id=1560854
> > > > > > > > Signed-off-by: Dr. David Alan Gilbert 
> > > > > > > 
> > > > > > > I'm not sure that this is a good idea. Going back to my old 
> > > > > > > writeup of
> > > > > > > the migration phases...
> > > > > > > 
> > > > > > > https://lists.gnu.org/archive/html/qemu-devel/2017-09/msg07917.html
> > > > > > > 
> > > > > > > ...the phase between migration completion and 'cont' is described 
> > > > > > > like
> > > > > > > this:
> > > > > > > 
> > > > > > > b) Migration converges:
> > > > > > >Both VMs are stopped (assuming -S is given on the 
> > > > > > > destination,
> > > > > > >otherwise this phase is skipped), the destination is in 
> > > > > > > control of
> > > > > > >the resources
> > > > > > > 
> > > > > > > This patch changes the definition of the phase so that neither 
> > > > > > > side is
> > > > > > > in control of the resources. We lose the phase where the 
> > > > > > > destination is
> > > > > > > in control, but the VM isn't running yet. This feels like a 
> > > > > > > problem to
> > > > > > > me.
> > > > > > 
> > > > > > But see Jiri's writeup on that bz;  libvirt is hitting the opposite
> > > > > > problem;   in this corner case they can't have the destination 
> > > > > > taking
> > > > > > control yet.
> > > > > 
> > > > > I wonder if they can't already grant the destination QEMU the 
> > > > > necessary
> > > > > permission in the pre-switchover phase. Just a thought, I don't know 
> > > > > how
> > > > > this works in detail, so it might not possible after all.
> > > > 
> > > > It's a fairly hairy failure case they had; if I remember correctly it's:
> > > >   a) Start migration
> > > >   b) Migration gets to completion point
> > > >   c) Destination is still paused
> > > >   d) Libvirt is restarted on the source
> > > >   e) Since libvirt was restarted it fails the migration (and hence knows
> > > >  the destination won't be started)
> > > >   f) It now tries to resume the qemu on the source
> > > > 
> > > > (f) fails because (b) caused the locks to be taken on the destination;
> > > > hence this patch stops doing that.  It's a case we don't really think
> > > > about - i.e. that the migration has actually completed and all the data
> > > > is on the destination, but libvirt decides for some other reason to
> > > > abandon migration.
> > > 
> > > If you do remember correctly, that scenario doesn't feel tricky at all.
> > > libvirt needs to quit the destination qemu, which will inactivate the
> > > images on the destination and release the lock, and then it can continue
> > > the source.
> > > 
> > > In fact, this is so straightforward that I wonder what else libvirt is
> > > doing. Is the destination qemu only shut down after trying to continue
> > > the source? That would be libvirt using the wrong order of steps.
> > 
> > I'll leave Jiri to reply to this; I think this is a case of the source
> > realising libvirt has restarted, then trying to recover all of it's VMs
> > without being in the position of being able to check on the destination.
> > 
> > > > > > > Consider a case where the management tool keeps a mirror job with
> > > > > > > sync=none running to expose all I/O requests to some external 
> > > > > > > process.
> > > > > > > It needs to shut down the old block job on the source in the
> > > > > > > 'pre-switchover' state, and start a new block job on the 
> > > > > > > destination
> > > > > > > when the destination controls the images, but the VM doesn't run 
> > > > > > > yet (so
> > > > > > > that it doesn't miss an I/O request). This patch removes the 
> > > > > > > migration
> > > > > > > phase that the management tool needs to implement this correctly.
> > > > > > > 
> > > > > > > If we need a "neither side has control" phase, we might need to
> > > > > > > introduce it 

Re: [Qemu-devel] [RFC] Defining firmware (OVMF, et al) metadata format & file

2018-04-09 Thread Laszlo Ersek
On 04/09/18 11:02, Kashyap Chamarthy wrote:

>   - It also provides a persistent command history in a convenient file:
> '~/.qmp-shell_history'

I noticed it used readline, but I didn't know about the dedicated
history file. Nice! Thanks!

Laszlo



Re: [Qemu-devel] [PATCH] migration: Don't activate block devices if using -S

2018-04-09 Thread Jiri Denemark
On Wed, Apr 04, 2018 at 12:03:03 +0200, Kevin Wolf wrote:
> Am 03.04.2018 um 22:52 hat Dr. David Alan Gilbert geschrieben:
> > * Kevin Wolf (kw...@redhat.com) wrote:
> > > Consider a case where the management tool keeps a mirror job with
> > > sync=none running to expose all I/O requests to some external process.
> > > It needs to shut down the old block job on the source in the
> > > 'pre-switchover' state, and start a new block job on the destination
> > > when the destination controls the images, but the VM doesn't run yet (so
> > > that it doesn't miss an I/O request). This patch removes the migration
> > > phase that the management tool needs to implement this correctly.
> > > 
> > > If we need a "neither side has control" phase, we might need to
> > > introduce it in addition to the existing phases rather than replacing a
> > > phase that is still needed in other cases.
> > 
> > This is yet another phase to be added.
> > IMHO this needs the managment tool to explicitly take control in the
> > case you're talking about.
> 
> What kind of mechanism do you have in mind there?
> 
> Maybe what could work would be separate QMP commands to inactivate (and
> possibly for symmetry activate) all block nodes. Then the management
> tool could use the pre-switchover phase to shut down its block jobs
> etc., inactivate all block nodes, transfer its own locks and then call
> migrate-continue.

Libvirt's migration protocol consists of several phases, the ones
relevant to QEMU are:

1. destination libvirtd starts a new QEMU process with -incoming
2. source libvirtd calls migrate QMP command and waits until migration
   completes; in this step we actually wait for the pre-switchover, shut
   down all block jobs, and call migrate-continue
3. destination libvirtd calls cont QMP command

The daemons only communicated between these steps, i.e., the result of
each steps is transferred to the other side of migration. In other
words, transferring of locks and other state

IIUC what you suggested would require step 2. to be split so that some
work can be done on the destination between "migrate" command and
completed migration. This would be pretty complicated and I don't think
the problem we're trying to solve would be worth it. Not to mention that
it would multiply the number of possible code paths in the migration
code.

However, I don't think the extra step is even necessary. We could just
have a separate QMP command to activate block nodes. Thus the source
would wait for pre-switchover, shut down all block jobs and call
migrate-continue. Once migration completes, the control would be
transferred to the destination which would call the new command to
activate block nodes followed by cont. That is, the "cont" command would
just be replaced with two commands. And this similarly to the
pre-switchover state, this could be controlled by a new migration
capability to make sure all block nodes are activated automatically with
old libvirt which doesn't know anything about the new command. This
would solve compatibility with non-libvirt use cases too.

Jirka



Re: [Qemu-devel] [PATCH for-2.12] gdbstub: fix off-by-one in gdb_handle_packet()

2018-04-09 Thread Peter Maydell
On 9 April 2018 at 10:39, Paolo Bonzini  wrote:
> On 09/04/2018 07:58, Stefan Hajnoczi wrote:
>> On Sun, Apr 08, 2018 at 11:59:33AM -0300, Philippe Mathieu-Daudé wrote:
>>> memtohex() adds an extra trailing NUL character.
>>>
>>> Reported-by: AddressSanitizer
>>> Signed-off-by: Philippe Mathieu-Daudé 
>>> ---
>>> (gdb) dump binary memory /tmp/dram.bin 0x9400 0x9410
>>> Remote connection closed
>>>
>>> =
>>> ==22732==ERROR: AddressSanitizer: stack-buffer-overflow on address 
>>> 0x7ffe43018340 at pc 0x55f2655fde81 bp 0x7ffe43017210 sp 0x7ffe43017208
>>> WRITE of size 1 at 0x7ffe43018340 thread T0
>>>
>>> ---
>>>  gdbstub.c | 4 +++-
>>>  1 file changed, 3 insertions(+), 1 deletion(-)
>>
>> Reviewed-by: Stefan Hajnoczi 
>
> Peter, can you apply this directly to master?

Applied, thanks. (patchwork and patches made a pig's ear of this
for some reason, I think they got confused about where the
commit message stopped and the patch started, so I had to
hand-edit the files.)

-- PMM



Re: [Qemu-devel] [PATCH] migration: Don't activate block devices if using -S

2018-04-09 Thread Kevin Wolf
Am 09.04.2018 um 16:04 hat Dr. David Alan Gilbert geschrieben:
> * Kevin Wolf (kw...@redhat.com) wrote:
> > Am 09.04.2018 um 12:27 hat Dr. David Alan Gilbert geschrieben:
> > > * Kevin Wolf (kw...@redhat.com) wrote:
> > > > Am 03.04.2018 um 22:52 hat Dr. David Alan Gilbert geschrieben:
> > > > > * Kevin Wolf (kw...@redhat.com) wrote:
> > > > > > Am 28.03.2018 um 19:02 hat Dr. David Alan Gilbert (git) geschrieben:
> > > > > > > From: "Dr. David Alan Gilbert" 
> > > > > > > 
> > > > > > > Activating the block devices causes the locks to be taken on
> > > > > > > the backing file.  If we're running with -S and the destination 
> > > > > > > libvirt
> > > > > > > hasn't started the destination with 'cont', it's expecting the 
> > > > > > > locks are
> > > > > > > still untaken.
> > > > > > > 
> > > > > > > Don't activate the block devices if we're not going to autostart 
> > > > > > > the VM;
> > > > > > > 'cont' already will do that anyway.
> > > > > > > 
> > > > > > > bz: https://bugzilla.redhat.com/show_bug.cgi?id=1560854
> > > > > > > Signed-off-by: Dr. David Alan Gilbert 
> > > > > > 
> > > > > > I'm not sure that this is a good idea. Going back to my old writeup 
> > > > > > of
> > > > > > the migration phases...
> > > > > > 
> > > > > > https://lists.gnu.org/archive/html/qemu-devel/2017-09/msg07917.html
> > > > > > 
> > > > > > ...the phase between migration completion and 'cont' is described 
> > > > > > like
> > > > > > this:
> > > > > > 
> > > > > > b) Migration converges:
> > > > > >Both VMs are stopped (assuming -S is given on the 
> > > > > > destination,
> > > > > >otherwise this phase is skipped), the destination is in 
> > > > > > control of
> > > > > >the resources
> > > > > > 
> > > > > > This patch changes the definition of the phase so that neither side 
> > > > > > is
> > > > > > in control of the resources. We lose the phase where the 
> > > > > > destination is
> > > > > > in control, but the VM isn't running yet. This feels like a problem 
> > > > > > to
> > > > > > me.
> > > > > 
> > > > > But see Jiri's writeup on that bz;  libvirt is hitting the opposite
> > > > > problem;   in this corner case they can't have the destination taking
> > > > > control yet.
> > > > 
> > > > I wonder if they can't already grant the destination QEMU the necessary
> > > > permission in the pre-switchover phase. Just a thought, I don't know how
> > > > this works in detail, so it might not possible after all.
> > > 
> > > It's a fairly hairy failure case they had; if I remember correctly it's:
> > >   a) Start migration
> > >   b) Migration gets to completion point
> > >   c) Destination is still paused
> > >   d) Libvirt is restarted on the source
> > >   e) Since libvirt was restarted it fails the migration (and hence knows
> > >  the destination won't be started)
> > >   f) It now tries to resume the qemu on the source
> > > 
> > > (f) fails because (b) caused the locks to be taken on the destination;
> > > hence this patch stops doing that.  It's a case we don't really think
> > > about - i.e. that the migration has actually completed and all the data
> > > is on the destination, but libvirt decides for some other reason to
> > > abandon migration.
> > 
> > If you do remember correctly, that scenario doesn't feel tricky at all.
> > libvirt needs to quit the destination qemu, which will inactivate the
> > images on the destination and release the lock, and then it can continue
> > the source.
> > 
> > In fact, this is so straightforward that I wonder what else libvirt is
> > doing. Is the destination qemu only shut down after trying to continue
> > the source? That would be libvirt using the wrong order of steps.
> 
> I'll leave Jiri to reply to this; I think this is a case of the source
> realising libvirt has restarted, then trying to recover all of it's VMs
> without being in the position of being able to check on the destination.
> 
> > > > > > Consider a case where the management tool keeps a mirror job with
> > > > > > sync=none running to expose all I/O requests to some external 
> > > > > > process.
> > > > > > It needs to shut down the old block job on the source in the
> > > > > > 'pre-switchover' state, and start a new block job on the destination
> > > > > > when the destination controls the images, but the VM doesn't run 
> > > > > > yet (so
> > > > > > that it doesn't miss an I/O request). This patch removes the 
> > > > > > migration
> > > > > > phase that the management tool needs to implement this correctly.
> > > > > > 
> > > > > > If we need a "neither side has control" phase, we might need to
> > > > > > introduce it in addition to the existing phases rather than 
> > > > > > replacing a
> > > > > > phase that is still needed in other cases.
> > > > > 
> > > > > This is yet another phase to be added.
> > > > > IMHO this needs the managment tool to explicitly take control in the
> > > > > case you're talking 

[Qemu-devel] [PULL for-2.12 5/8] pc-bios/s390: update images

2018-04-09 Thread Cornelia Huck
Contains the following commits:
- s390: Do not pass inofficial IPL type to the guest

For s390-netboot.img, this also contains the following commits (update
was forgotten last time):
- pc-bios/s390-ccw: Move string arrays from bootmap header to .c file
- pc-bios/s390-ccw: Increase virtio timeout to 30 seconds

Signed-off-by: Cornelia Huck 
---
 pc-bios/s390-ccw.img | Bin 30568 -> 30520 bytes
 pc-bios/s390-netboot.img | Bin 83776 -> 83856 bytes
 2 files changed, 0 insertions(+), 0 deletions(-)

diff --git a/pc-bios/s390-ccw.img b/pc-bios/s390-ccw.img
index 
d17e85995db582b871f937cf74338b2eaf65a2ef..fdd6809c707274be47b21fac102c41942f1379d7
 100644
GIT binary patch
delta 2564
zcmZ8jdsLLy5x?`XvLFcGK4f=UmR)uimWQ~Dh`dC>kO*>AjK?El6Oba(8j{B1WPZ
z8_i>7)SC7v%27XKZMyMmb3m)?Y0}1|YORky;_*SnSnDgRqM;3CJNM(H>FNG)XYQT3
z^Sk#qGxyuOkD%ufcqa<0hIV)kk8Mb1%C8}LM}MDY&*-G;zY=|LMBkhee`~R4!C%6p
zmhIE`V&|>DRZhd#gw;@s{lZ2Xt3$S?U-FBLCB1b~ACN@m|C%w)N4+Z6&9#_$ej8
zj?nzLVi|*al?$%pR8^PhB3}`=agW6Fir+6pb$HYU9^Ra+-@1a~Rc%hE&!
zQU{Bvu(c~Vwjf`(4r%SQ+RpWHCzNPyT!(r^uTb_1_+(72kQj{>q08J|(qOQe8=a@Q
zkv&!PD_oC$SJ59R`c|$d|44omIvvJcQ&4n@p%e4Mw!kmZALbNnD!dbxv(f)`Fc>_=
znaJNXU?ZU#@Yo`mSDMCxQWJ~p6l{^if*_d~lakq(%|b{R_g=rS*T7D4pw!`u5wyVk
zB1Jk=?qgx)B2#g^r~=vrn>Dn9{2ZU&?+Xl(Wsv;5YW4l!1j-q%2;UyN>5-JeM7|?V
zL#J(GhmGBSFJqf-p?2(MLzuBfds6BUe7!L)6#J_mFKD9vkb1D|l^ey1LHqqm|Vt
zMr34kzfk03Mf1FHs;Hb%4qEQ73UO1Bzu=;!MI-5e2XueG!v{E
z5$@<*DQEP;{zAsK+(3Ke>u>;fMrN9CR*2qie>G#J-Tp=zHl!(oR%

  1   2   3   >