date:20170519

[Qemu-devel] [PULL 00/20] Misc patches for 2017-05-19

2017-05-19 Thread Paolo Bonzini

The following changes since commit 56821559f0ba682fe6b367815572e6f974d329ab:

  Merge remote-tracking branch 'dgilbert/tags/pull-hmp-20170517' into staging 
(2017-05-18 13:36:15 +0100)

are available in the git repository at:


  git://github.com/bonzini/qemu.git tags/for-upstream

for you to fetch changes up to e10dc0ca6854c4f47cc5e9d47e20c62aa875f518:

  target/i386: use multiple CPU AddressSpaces (2017-05-19 13:01:32 +0200)


* virtio-scsi use-after-free fix (Fam)
* vhost-user-scsi support (Felipe)
* SMM fixes and improvements for TCG (myself)
* irqchip and AddressSpaceDispatch cleanups and fixes (Peter)
* Coverity fix (Stefano)
* NBD cleanups (Vladimir)
* RTC accuracy improvements and code cleanups (Guangrong+Yunfang)


Fam Zheng (1):
  virtio-scsi: Unset hotplug handler when unrealize

Felipe Franciosi (2):
  vhost-user-scsi: Introduce vhost-user-scsi host device
  vhost-user-scsi: Introduce a vhost-user-scsi sample application

Paolo Bonzini (2):
  target/i386: enable A20 automatically in system management mode
  target/i386: use multiple CPU AddressSpaces

Peter Xu (4):
  kvm: irqchip: trace changes on msi add/remove
  msix: trace control bit write op
  kvm: irqchip: skip update msi when disabled
  exec: simplify phys_page_find() params

Stefano Stabellini (1):
  Check the return value of fcntl in qemu_set_cloexec

Tai Yunfang (1):
  mc146818rtc: precisely count the clock for periodic timer

Vladimir Sementsov-Ogievskiy (5):
  nbd: strict nbd_wr_syncv
  nbd: read_sync and friends: return 0 on success
  nbd: add errp parameter to nbd_wr_syncv()
  nbd: add errp to read_sync, write_sync and drop_sync
  nbd/client.c: use errp instead of LOG

Xiao Guangrong (4):
  mc146818rtc: update periodic timer only if it is needed
  mc146818rtc: ensure LOST_TICK_POLICY_SLEW is only enabled on TARGET_I386
  mc146818rtc: drop unnecessary '#ifdef TARGET_I386'
  mc146818rtc: embrace all x86 specific code

 .gitignore|   1 +
 Makefile  |   3 +
 Makefile.objs |   4 +
 block/nbd-client.c|  11 +-
 contrib/vhost-user-scsi/Makefile.objs |   1 +
 contrib/vhost-user-scsi/vhost-user-scsi.c | 886 ++
 default-configs/pci.mak   |   1 +
 default-configs/s390x-softmmu.mak |   1 +
 exec.c|  13 +-
 hw/pci/msix.c |  11 +-
 hw/pci/trace-events   |   3 +
 hw/scsi/Makefile.objs |   1 +
 hw/scsi/vhost-user-scsi.c | 215 
 hw/scsi/virtio-scsi.c |   3 +
 hw/timer/mc146818rtc.c| 206 ---
 hw/virtio/virtio-pci.c|  54 ++
 hw/virtio/virtio-pci.h|  11 +
 include/block/nbd.h   |   8 +-
 include/hw/virtio/vhost-user-scsi.h   |  35 ++
 include/hw/virtio/virtio-scsi.h   |   3 +
 kvm-all.c |   4 +-
 nbd/client.c  | 125 ++---
 nbd/common.c  |  23 +-
 nbd/nbd-internal.h|  40 +-
 nbd/server.c  |  92 ++--
 qemu-nbd.c|   3 +-
 target/i386/arch_memory_mapping.c |  18 +-
 target/i386/cpu.c |  15 +-
 target/i386/cpu.h |  20 +-
 target/i386/helper.c  |  96 ++--
 target/i386/kvm.c |  12 +-
 target/i386/machine.c |   4 -
 target/i386/smm_helper.c  |  18 -
 trace-events  |   3 +-
 util/oslib-posix.c|   4 +-
 35 files changed, 1642 insertions(+), 306 deletions(-)
 create mode 100644 contrib/vhost-user-scsi/Makefile.objs
 create mode 100644 contrib/vhost-user-scsi/vhost-user-scsi.c
 create mode 100644 hw/scsi/vhost-user-scsi.c
 create mode 100644 include/hw/virtio/vhost-user-scsi.h
-- 
1.8.3.1

[Qemu-devel] [PULL 01/20] mc146818rtc: update periodic timer only if it is needed

2017-05-19 Thread Paolo Bonzini

From: Xiao Guangrong 

Currently, the timer is updated whenever RegA or RegB is written
even if the periodic timer related configuration is not changed

This patch optimizes it slightly to make the update happen only
if its period or enable-status is changed, also later patches are
depend on this optimization

Signed-off-by: Xiao Guangrong 
Message-Id: <20170510083259.3900-2-xiaoguangr...@tencent.com>
Signed-off-by: Paolo Bonzini 
---
 hw/timer/mc146818rtc.c | 18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c
index 93de3e1..7d78391 100644
--- a/hw/timer/mc146818rtc.c
+++ b/hw/timer/mc146818rtc.c
@@ -391,6 +391,7 @@ static void cmos_ioport_write(void *opaque, hwaddr addr,
   uint64_t data, unsigned size)
 {
 RTCState *s = opaque;
+bool update_periodic_timer;
 
 if ((addr & 1) == 0) {
 s->cmos_index = data & 0x7f;
@@ -423,6 +424,8 @@ static void cmos_ioport_write(void *opaque, hwaddr addr,
 }
 break;
 case RTC_REG_A:
+update_periodic_timer = (s->cmos_data[RTC_REG_A] ^ data) & 0x0f;
+
 if ((data & 0x60) == 0x60) {
 if (rtc_running(s)) {
 rtc_update_time(s);
@@ -445,10 +448,17 @@ static void cmos_ioport_write(void *opaque, hwaddr addr,
 /* UIP bit is read only */
 s->cmos_data[RTC_REG_A] = (data & ~REG_A_UIP) |
 (s->cmos_data[RTC_REG_A] & REG_A_UIP);
-periodic_timer_update(s, qemu_clock_get_ns(rtc_clock));
+
+if (update_periodic_timer) {
+periodic_timer_update(s, qemu_clock_get_ns(rtc_clock));
+}
+
 check_update_timer(s);
 break;
 case RTC_REG_B:
+update_periodic_timer = (s->cmos_data[RTC_REG_B] ^ data)
+   & REG_B_PIE;
+
 if (data & REG_B_SET) {
 /* update cmos to when the rtc was stopping */
 if (rtc_running(s)) {
@@ -475,7 +485,11 @@ static void cmos_ioport_write(void *opaque, hwaddr addr,
 qemu_irq_lower(s->irq);
 }
 s->cmos_data[RTC_REG_B] = data;
-periodic_timer_update(s, qemu_clock_get_ns(rtc_clock));
+
+if (update_periodic_timer) {
+periodic_timer_update(s, qemu_clock_get_ns(rtc_clock));
+}
+
 check_update_timer(s);
 break;
 case RTC_REG_C:
-- 
1.8.3.1

[Qemu-devel] [PULL 07/20] msix: trace control bit write op

2017-05-19 Thread Paolo Bonzini

From: Peter Xu 

Meanwhile, abstract a function to detect msix masked bit.

Signed-off-by: Peter Xu 
Message-Id: <1494309644-18743-3-git-send-email-pet...@redhat.com>
Acked-by: Michael S. Tsirkin 
Reviewed-by: Michael S. Tsirkin 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Paolo Bonzini 
---
 hw/pci/msix.c   | 11 +--
 hw/pci/trace-events |  3 +++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index bb54e8b..fc5fe51 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -22,6 +22,7 @@
 #include "hw/xen/xen.h"
 #include "qemu/range.h"
 #include "qapi/error.h"
+#include "trace.h"
 
 #define MSIX_CAP_LENGTH 12
 
@@ -130,10 +131,14 @@ static void msix_handle_mask_update(PCIDevice *dev, int 
vector, bool was_masked)
 }
 }
 
+static bool msix_masked(PCIDevice *dev)
+{
+return dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & 
MSIX_MASKALL_MASK;
+}
+
 static void msix_update_function_masked(PCIDevice *dev)
 {
-dev->msix_function_masked = !msix_enabled(dev) ||
-(dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK);
+dev->msix_function_masked = !msix_enabled(dev) || msix_masked(dev);
 }
 
 /* Handle MSI-X capability config write. */
@@ -148,6 +153,8 @@ void msix_write_config(PCIDevice *dev, uint32_t addr,
 return;
 }
 
+trace_msix_write_config(dev->name, msix_enabled(dev), msix_masked(dev));
+
 was_masked = dev->msix_function_masked;
 msix_update_function_masked(dev);
 
diff --git a/hw/pci/trace-events b/hw/pci/trace-events
index 2b9cf24..83c8f5a 100644
--- a/hw/pci/trace-events
+++ b/hw/pci/trace-events
@@ -7,3 +7,6 @@ pci_update_mappings_add(void *d, uint32_t bus, uint32_t slot, 
uint32_t func, int
 # hw/pci/pci_host.c
 pci_cfg_read(const char *dev, unsigned devid, unsigned fnid, unsigned offs, 
unsigned val) "%s %02u:%u @0x%x -> 0x%x"
 pci_cfg_write(const char *dev, unsigned devid, unsigned fnid, unsigned offs, 
unsigned val) "%s %02u:%u @0x%x <- 0x%x"
+
+# hw/pci/msix.c
+msix_write_config(char *name, bool enabled, bool masked) "dev %s enabled %d 
masked %d"
-- 
1.8.3.1

[Qemu-devel] [PULL 12/20] nbd: add errp parameter to nbd_wr_syncv()

2017-05-19 Thread Paolo Bonzini

From: Vladimir Sementsov-Ogievskiy 

Will be used in following patch to provide actual error message in
some cases.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20170516094533.6160-4-vsement...@virtuozzo.com>
Signed-off-by: Paolo Bonzini 
---
 block/nbd-client.c  |  4 ++--
 include/block/nbd.h |  3 ++-
 nbd/common.c| 12 +---
 nbd/nbd-internal.h  |  4 ++--
 4 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/block/nbd-client.c b/block/nbd-client.c
index 1e2952f..538d95e 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -136,7 +136,7 @@ static int nbd_co_send_request(BlockDriverState *bs,
 rc = nbd_send_request(s->ioc, request);
 if (rc >= 0) {
 ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len,
-   false);
+   false, NULL);
 if (ret != request->len) {
 rc = -EIO;
 }
@@ -165,7 +165,7 @@ static void nbd_co_receive_reply(NBDClientSession *s,
 } else {
 if (qiov && reply->error == 0) {
 ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len,
-   true);
+   true, NULL);
 if (ret != request->len) {
 reply->error = EIO;
 }
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 0ed0775..9d385ea 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -127,7 +127,8 @@ ssize_t nbd_wr_syncv(QIOChannel *ioc,
  struct iovec *iov,
  size_t niov,
  size_t length,
- bool do_read);
+ bool do_read,
+ Error **errp);
 int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
   QCryptoTLSCreds *tlscreds, const char *hostname,
   QIOChannel **outioc,
diff --git a/nbd/common.c b/nbd/common.c
index 4db45b3..bd81637 100644
--- a/nbd/common.c
+++ b/nbd/common.c
@@ -28,10 +28,10 @@ ssize_t nbd_wr_syncv(QIOChannel *ioc,
  struct iovec *iov,
  size_t niov,
  size_t length,
- bool do_read)
+ bool do_read,
+ Error **errp)
 {
 ssize_t done = 0;
-Error *local_err = NULL;
 struct iovec *local_iov = g_new(struct iovec, niov);
 struct iovec *local_iov_head = local_iov;
 unsigned int nlocal_iov = niov;
@@ -41,19 +41,17 @@ ssize_t nbd_wr_syncv(QIOChannel *ioc,
 while (nlocal_iov > 0) {
 ssize_t len;
 if (do_read) {
-len = qio_channel_readv(ioc, local_iov, nlocal_iov, &local_err);
+len = qio_channel_readv(ioc, local_iov, nlocal_iov, errp);
 } else {
-len = qio_channel_writev(ioc, local_iov, nlocal_iov, &local_err);
+len = qio_channel_writev(ioc, local_iov, nlocal_iov, errp);
 }
 if (len == QIO_CHANNEL_ERR_BLOCK) {
+/* errp should not be set */
 assert(qemu_in_coroutine());
 qio_channel_yield(ioc, do_read ? G_IO_IN : G_IO_OUT);
 continue;
 }
 if (len < 0) {
-TRACE("I/O error: %s", error_get_pretty(local_err));
-error_free(local_err);
-/* XXX handle Error objects */
 done = -EIO;
 goto cleanup;
 }
diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index e6bbc7c..1d479fe 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -108,7 +108,7 @@ static inline ssize_t read_sync_eof(QIOChannel *ioc, void 
*buffer, size_t size)
  * our request/reply.  Synchronization is done with recv_coroutine, so
  * that this is coroutine-safe.
  */
-return nbd_wr_syncv(ioc, &iov, 1, size, true);
+return nbd_wr_syncv(ioc, &iov, 1, size, true, NULL);
 }
 
 /* read_sync
@@ -132,7 +132,7 @@ static inline int write_sync(QIOChannel *ioc, const void 
*buffer, size_t size)
 {
 struct iovec iov = { .iov_base = (void *) buffer, .iov_len = size };
 
-ssize_t ret = nbd_wr_syncv(ioc, &iov, 1, size, false);
+ssize_t ret = nbd_wr_syncv(ioc, &iov, 1, size, false, NULL);
 
 assert(ret < 0 || ret == size);
 
-- 
1.8.3.1

[Qemu-devel] [PULL 04/20] mc146818rtc: drop unnecessary '#ifdef TARGET_I386'

2017-05-19 Thread Paolo Bonzini

From: Xiao Guangrong 

If the code purely depends on LOST_TICK_POLICY_SLEW, we can simply
drop '#ifdef TARGET_I386' as only x86 can enable this tick policy

Signed-off-by: Xiao Guangrong 
Message-Id: <20170510083259.3900-5-xiaoguangr...@tencent.com>
Signed-off-by: Paolo Bonzini 
---
 hw/timer/mc146818rtc.c | 16 +++-
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c
index 4870a72..f9d6181 100644
--- a/hw/timer/mc146818rtc.c
+++ b/hw/timer/mc146818rtc.c
@@ -112,7 +112,6 @@ static uint64_t get_guest_rtc_ns(RTCState *s)
 guest_clock - s->last_update + s->offset;
 }
 
-#ifdef TARGET_I386
 static void rtc_coalesced_timer_update(RTCState *s)
 {
 if (s->irq_coalesced == 0) {
@@ -126,6 +125,7 @@ static void rtc_coalesced_timer_update(RTCState *s)
 }
 }
 
+#ifdef TARGET_I386
 static void rtc_coalesced_timer(void *opaque)
 {
 RTCState *s = opaque;
@@ -198,7 +198,6 @@ periodic_timer_update(RTCState *s, int64_t current_time, 
uint32_t old_period)
 assert(lost_clock >= 0);
 }
 
-#ifdef TARGET_I386
 /*
  * s->irq_coalesced can change for two reasons:
  *
@@ -227,9 +226,7 @@ periodic_timer_update(RTCState *s, int64_t current_time, 
uint32_t old_period)
   s->irq_coalesced, old_period, s->period);
 rtc_coalesced_timer_update(s);
 }
-} else
-#endif
-{
+} else {
/*
  * no way to compensate the interrupt if LOST_TICK_POLICY_SLEW
  * is not used, we should make the time progress anyway.
@@ -244,9 +241,7 @@ periodic_timer_update(RTCState *s, int64_t current_time, 
uint32_t old_period)
  RTC_CLOCK_RATE) + 1;
 timer_mod(s->periodic_timer, s->next_periodic_time);
 } else {
-#ifdef TARGET_I386
 s->irq_coalesced = 0;
-#endif
 timer_del(s->periodic_timer);
 }
 }
@@ -835,13 +830,11 @@ static int rtc_post_load(void *opaque, int version_id)
 }
 }
 
-#ifdef TARGET_I386
 if (version_id >= 2) {
 if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
 rtc_coalesced_timer_update(s);
 }
 }
-#endif
 return 0;
 }
 
@@ -898,11 +891,10 @@ static void rtc_notify_clock_reset(Notifier *notifier, 
void *data)
 rtc_set_date_from_host(ISA_DEVICE(s));
 periodic_timer_update(s, now, 0);
 check_update_timer(s);
-#ifdef TARGET_I386
+
 if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
 rtc_coalesced_timer_update(s);
 }
-#endif
 }
 
 /* set CMOS shutdown status register (index 0xF) as S3_resume(0xFE)
@@ -923,12 +915,10 @@ static void rtc_reset(void *opaque)
 
 qemu_irq_lower(s->irq);
 
-#ifdef TARGET_I386
 if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
 s->irq_coalesced = 0;
 s->irq_reinject_on_ack_count = 0;  
 }
-#endif
 }
 
 static const MemoryRegionOps cmos_ops = {
-- 
1.8.3.1

[Qemu-devel] [PULL 11/20] nbd: read_sync and friends: return 0 on success

2017-05-19 Thread Paolo Bonzini

From: Vladimir Sementsov-Ogievskiy 

functions read_sync, drop_sync, write_sync, and also
nbd_negotiate_write, nbd_negotiate_read, nbd_negotiate_drop_sync
returns number of processed bytes. But what this number can be,
except requested number of bytes?

Actually, underlying nbd_wr_syncv function returns a value >= 0 and
!= requested_bytes only on eof on read operation. So, firstly, it is
impossible on write (let's add an assert) and on read it actually
means, that communication is broken (except nbd_receive_reply, see
below).

Most of callers operate like this:
   if (func(..., size) != size) {
   /* error path */
   }
, i.e.:
  1. They are not interested in partial success
  2. Extra duplications in code (especially bad are duplications of
 magic numbers)
  3. User doesn't see actual error message, as return code is lost.
 (this patch doesn't fix this point, but it makes fixing easier)

Several callers handles ret >= 0 and != requested-size separately, by
just returning EINVAL in this case. This patch makes read_sync and
friends return EINVAL in this case, so final behavior is the same.

And only one caller - nbd_receive_reply() does something not so
obvious. It returns EINVAL for ret > 0 and != requested-size, like
previous group, but for ret == 0 it returns 0. The only caller of
nbd_receive_reply() - nbd_read_reply_entry() handles ret == 0 in the
same way as ret < 0, so for now it doesn't matter. However, in
following commits error path handling will be improved and we'll need
to distinguish success from fail in this case too. So, this patch adds
separate helper for this case - read_sync_eof.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20170516094533.6160-3-vsement...@virtuozzo.com>
Signed-off-by: Paolo Bonzini 
---
 nbd/client.c   | 63 
 nbd/nbd-internal.h | 34 +++---
 nbd/server.c   | 84 +-
 3 files changed, 88 insertions(+), 93 deletions(-)

diff --git a/nbd/client.c b/nbd/client.c
index a58fb02..6b74a62 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -86,9 +86,9 @@ static QTAILQ_HEAD(, NBDExport) exports = 
QTAILQ_HEAD_INITIALIZER(exports);
 
 */
 
-/* Discard length bytes from channel.  Return -errno on failure, or
- * the amount of bytes consumed. */
-static ssize_t drop_sync(QIOChannel *ioc, size_t size)
+/* Discard length bytes from channel.  Return -errno on failure and 0 on
+ * success*/
+static int drop_sync(QIOChannel *ioc, size_t size)
 {
 ssize_t ret = 0;
 char small[1024];
@@ -96,14 +96,13 @@ static ssize_t drop_sync(QIOChannel *ioc, size_t size)
 
 buffer = sizeof(small) >= size ? small : g_malloc(MIN(65536, size));
 while (size > 0) {
-ssize_t count = read_sync(ioc, buffer, MIN(65536, size));
+ssize_t count = MIN(65536, size);
+ret = read_sync(ioc, buffer, MIN(65536, size));
 
-if (count <= 0) {
+if (ret < 0) {
 goto cleanup;
 }
-assert(count <= size);
 size -= count;
-ret += count;
 }
 
  cleanup:
@@ -136,12 +135,12 @@ static int nbd_send_option_request(QIOChannel *ioc, 
uint32_t opt,
 stl_be_p(&req.option, opt);
 stl_be_p(&req.length, len);
 
-if (write_sync(ioc, &req, sizeof(req)) != sizeof(req)) {
+if (write_sync(ioc, &req, sizeof(req)) < 0) {
 error_setg(errp, "Failed to send option request header");
 return -1;
 }
 
-if (len && write_sync(ioc, (char *) data, len) != len) {
+if (len && write_sync(ioc, (char *) data, len) < 0) {
 error_setg(errp, "Failed to send option request data");
 return -1;
 }
@@ -170,7 +169,7 @@ static int nbd_receive_option_reply(QIOChannel *ioc, 
uint32_t opt,
 nbd_opt_reply *reply, Error **errp)
 {
 QEMU_BUILD_BUG_ON(sizeof(*reply) != 20);
-if (read_sync(ioc, reply, sizeof(*reply)) != sizeof(*reply)) {
+if (read_sync(ioc, reply, sizeof(*reply)) < 0) {
 error_setg(errp, "failed to read option reply");
 nbd_send_opt_abort(ioc);
 return -1;
@@ -219,7 +218,7 @@ static int nbd_handle_reply_err(QIOChannel *ioc, 
nbd_opt_reply *reply,
 goto cleanup;
 }
 msg = g_malloc(reply->length + 1);
-if (read_sync(ioc, msg, reply->length) != reply->length) {
+if (read_sync(ioc, msg, reply->length) < 0) {
 error_setg(errp, "failed to read option error message");
 goto cleanup;
 }
@@ -321,7 +320,7 @@ static int nbd_receive_list(QIOChannel *ioc, const char 
*want, bool *match,
 nbd_send_opt_abort(ioc);
 return -1;
 }
-if (read_sync(ioc, &namelen, sizeof(namelen)) != sizeof(namelen)) {
+if (read_sync(ioc, &namelen, sizeof(namelen)) < 0) {
 error_setg(errp, "failed to read option name length");
 nbd_send_opt_abort(ioc);
 return -1;
@@ -334,7 +333,7 @@ static int nbd_receive_list

[Qemu-devel] [PULL 08/20] kvm: irqchip: skip update msi when disabled

2017-05-19 Thread Paolo Bonzini

From: Peter Xu 

It's possible that one device kept its irqfd/virq there even when
MSI/MSIX was disabled globally for that device. One example is
virtio-net-pci (see commit f1d0f15a6 and virtio_pci_vq_vector_mask()).
It is used as a fast path to avoid allocate/release irqfd/virq
frequently when guest enables/disables MSIX.

However, this fast path brought a problem to msi_route_list, that the
device MSIRouteEntry is still dangling there even if MSIX disabled -
then we cannot know which message to fetch, even if we can, the messages
are meaningless. In this case, we can just simply ignore this entry.

It's safe, since when MSIX is enabled again, we'll rebuild them no
matter what.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1448813

Signed-off-by: Peter Xu 
Message-Id: <1494309644-18743-4-git-send-email-pet...@redhat.com>
Signed-off-by: Paolo Bonzini 
---
 target/i386/kvm.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 011d4a5..82c72d2 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -43,6 +43,7 @@
 #include "standard-headers/asm-x86/hyperv.h"
 #include "hw/pci/pci.h"
 #include "hw/pci/msi.h"
+#include "hw/pci/msix.h"
 #include "migration/blocker.h"
 #include "exec/memattrs.h"
 #include "trace.h"
@@ -3510,12 +3511,17 @@ static void kvm_update_msi_routes_all(void *private, 
bool global,
 int cnt = 0;
 MSIRouteEntry *entry;
 MSIMessage msg;
+PCIDevice *dev;
+
 /* TODO: explicit route update */
 QLIST_FOREACH(entry, &msi_route_list, list) {
 cnt++;
-msg = pci_get_msi_message(entry->dev, entry->vector);
-kvm_irqchip_update_msi_route(kvm_state, entry->virq,
- msg, entry->dev);
+dev = entry->dev;
+if (!msix_enabled(dev) && !msi_enabled(dev)) {
+continue;
+}
+msg = pci_get_msi_message(dev, entry->vector);
+kvm_irqchip_update_msi_route(kvm_state, entry->virq, msg, dev);
 }
 kvm_irqchip_commit_routes(kvm_state);
 trace_kvm_x86_update_msi_routes(cnt);
-- 
1.8.3.1

[Qemu-devel] [PULL 13/20] nbd: add errp to read_sync, write_sync and drop_sync

2017-05-19 Thread Paolo Bonzini

From: Vladimir Sementsov-Ogievskiy 

There a lot of calls of these functions, which already have errp, which
they are filling themselves. On the other hand, nbd_wr_syncv has errp
parameter too, so it would be great to connect them.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20170516094533.6160-5-vsement...@virtuozzo.com>
Signed-off-by: Paolo Bonzini 
---
 nbd/client.c   | 76 +++---
 nbd/nbd-internal.h | 16 +++-
 nbd/server.c   | 12 -
 3 files changed, 54 insertions(+), 50 deletions(-)

diff --git a/nbd/client.c b/nbd/client.c
index 6b74a62..f102375 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -88,7 +88,7 @@ static QTAILQ_HEAD(, NBDExport) exports = 
QTAILQ_HEAD_INITIALIZER(exports);
 
 /* Discard length bytes from channel.  Return -errno on failure and 0 on
  * success*/
-static int drop_sync(QIOChannel *ioc, size_t size)
+static int drop_sync(QIOChannel *ioc, size_t size, Error **errp)
 {
 ssize_t ret = 0;
 char small[1024];
@@ -97,7 +97,7 @@ static int drop_sync(QIOChannel *ioc, size_t size)
 buffer = sizeof(small) >= size ? small : g_malloc(MIN(65536, size));
 while (size > 0) {
 ssize_t count = MIN(65536, size);
-ret = read_sync(ioc, buffer, MIN(65536, size));
+ret = read_sync(ioc, buffer, MIN(65536, size), errp);
 
 if (ret < 0) {
 goto cleanup;
@@ -135,13 +135,13 @@ static int nbd_send_option_request(QIOChannel *ioc, 
uint32_t opt,
 stl_be_p(&req.option, opt);
 stl_be_p(&req.length, len);
 
-if (write_sync(ioc, &req, sizeof(req)) < 0) {
-error_setg(errp, "Failed to send option request header");
+if (write_sync(ioc, &req, sizeof(req), errp) < 0) {
+error_prepend(errp, "Failed to send option request header");
 return -1;
 }
 
-if (len && write_sync(ioc, (char *) data, len) < 0) {
-error_setg(errp, "Failed to send option request data");
+if (len && write_sync(ioc, (char *) data, len, errp) < 0) {
+error_prepend(errp, "Failed to send option request data");
 return -1;
 }
 
@@ -169,8 +169,8 @@ static int nbd_receive_option_reply(QIOChannel *ioc, 
uint32_t opt,
 nbd_opt_reply *reply, Error **errp)
 {
 QEMU_BUILD_BUG_ON(sizeof(*reply) != 20);
-if (read_sync(ioc, reply, sizeof(*reply)) < 0) {
-error_setg(errp, "failed to read option reply");
+if (read_sync(ioc, reply, sizeof(*reply), errp) < 0) {
+error_prepend(errp, "failed to read option reply");
 nbd_send_opt_abort(ioc);
 return -1;
 }
@@ -218,8 +218,8 @@ static int nbd_handle_reply_err(QIOChannel *ioc, 
nbd_opt_reply *reply,
 goto cleanup;
 }
 msg = g_malloc(reply->length + 1);
-if (read_sync(ioc, msg, reply->length) < 0) {
-error_setg(errp, "failed to read option error message");
+if (read_sync(ioc, msg, reply->length, errp) < 0) {
+error_prepend(errp, "failed to read option error message");
 goto cleanup;
 }
 msg[reply->length] = '\0';
@@ -320,8 +320,8 @@ static int nbd_receive_list(QIOChannel *ioc, const char 
*want, bool *match,
 nbd_send_opt_abort(ioc);
 return -1;
 }
-if (read_sync(ioc, &namelen, sizeof(namelen)) < 0) {
-error_setg(errp, "failed to read option name length");
+if (read_sync(ioc, &namelen, sizeof(namelen), errp) < 0) {
+error_prepend(errp, "failed to read option name length");
 nbd_send_opt_abort(ioc);
 return -1;
 }
@@ -333,8 +333,8 @@ static int nbd_receive_list(QIOChannel *ioc, const char 
*want, bool *match,
 return -1;
 }
 if (namelen != strlen(want)) {
-if (drop_sync(ioc, len) < 0) {
-error_setg(errp, "failed to skip export name with wrong length");
+if (drop_sync(ioc, len, errp) < 0) {
+error_prepend(errp, "failed to skip export name with wrong 
length");
 nbd_send_opt_abort(ioc);
 return -1;
 }
@@ -342,15 +342,15 @@ static int nbd_receive_list(QIOChannel *ioc, const char 
*want, bool *match,
 }
 
 assert(namelen < sizeof(name));
-if (read_sync(ioc, name, namelen) < 0) {
-error_setg(errp, "failed to read export name");
+if (read_sync(ioc, name, namelen, errp) < 0) {
+error_prepend(errp, "failed to read export name");
 nbd_send_opt_abort(ioc);
 return -1;
 }
 name[namelen] = '\0';
 len -= namelen;
-if (drop_sync(ioc, len) < 0) {
-error_setg(errp, "failed to read export description");
+if (drop_sync(ioc, len, errp) < 0) {
+error_prepend(errp, "failed to read export description");
 nbd_send_opt_abort(ioc);
 return -1;
 }
@@ -476,8 +476,8 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char 
*name, uint16_t *flags,
 goto fail;
 }
 
-if (read_sync(ioc, buf,

[Qemu-devel] [PULL 05/20] mc146818rtc: embrace all x86 specific code

2017-05-19 Thread Paolo Bonzini

From: Xiao Guangrong 

Introduce a function, rtc_policy_slew_deliver_irq(), which delivers
irq if LOST_TICK_POLICY_SLEW is used, as which is only supported on
x86, other platforms call it will trigger a assert

After that, we can move the x86 specific code to the common place

Signed-off-by: Xiao Guangrong 
Message-Id: <20170510083259.3900-6-xiaoguangr...@tencent.com>
Signed-off-by: Paolo Bonzini 
---
 hw/timer/mc146818rtc.c | 60 ++
 1 file changed, 31 insertions(+), 29 deletions(-)

diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c
index f9d6181..542cd09 100644
--- a/hw/timer/mc146818rtc.c
+++ b/hw/timer/mc146818rtc.c
@@ -125,17 +125,34 @@ static void rtc_coalesced_timer_update(RTCState *s)
 }
 }
 
+static QLIST_HEAD(, RTCState) rtc_devices =
+QLIST_HEAD_INITIALIZER(rtc_devices);
+
 #ifdef TARGET_I386
+void qmp_rtc_reset_reinjection(Error **errp)
+{
+RTCState *s;
+
+QLIST_FOREACH(s, &rtc_devices, link) {
+s->irq_coalesced = 0;
+}
+}
+
+static bool rtc_policy_slew_deliver_irq(RTCState *s)
+{
+apic_reset_irq_delivered();
+qemu_irq_raise(s->irq);
+return apic_get_irq_delivered();
+}
+
 static void rtc_coalesced_timer(void *opaque)
 {
 RTCState *s = opaque;
 
 if (s->irq_coalesced != 0) {
-apic_reset_irq_delivered();
 s->cmos_data[RTC_REG_C] |= 0xc0;
 DPRINTF_C("cmos: injecting from timer\n");
-qemu_irq_raise(s->irq);
-if (apic_get_irq_delivered()) {
+if (rtc_policy_slew_deliver_irq(s)) {
 s->irq_coalesced--;
 DPRINTF_C("cmos: coalesced irqs decreased to %d\n",
   s->irq_coalesced);
@@ -144,6 +161,12 @@ static void rtc_coalesced_timer(void *opaque)
 
 rtc_coalesced_timer_update(s);
 }
+#else
+static bool rtc_policy_slew_deliver_irq(RTCState *s)
+{
+assert(0);
+return false;
+}
 #endif
 
 static uint32_t rtc_periodic_clock_ticks(RTCState *s)
@@ -254,21 +277,17 @@ static void rtc_periodic_timer(void *opaque)
 s->cmos_data[RTC_REG_C] |= REG_C_PF;
 if (s->cmos_data[RTC_REG_B] & REG_B_PIE) {
 s->cmos_data[RTC_REG_C] |= REG_C_IRQF;
-#ifdef TARGET_I386
 if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
 if (s->irq_reinject_on_ack_count >= RTC_REINJECT_ON_ACK_COUNT)
-s->irq_reinject_on_ack_count = 0;  
-apic_reset_irq_delivered();
-qemu_irq_raise(s->irq);
-if (!apic_get_irq_delivered()) {
+s->irq_reinject_on_ack_count = 0;
+if (!rtc_policy_slew_deliver_irq(s)) {
 s->irq_coalesced++;
 rtc_coalesced_timer_update(s);
 DPRINTF_C("cmos: coalesced irqs increased to %d\n",
   s->irq_coalesced);
 }
 } else
-#endif
-qemu_irq_raise(s->irq);
+qemu_irq_raise(s->irq);
 }
 }
 
@@ -612,20 +631,6 @@ static void rtc_get_time(RTCState *s, struct tm *tm)
 rtc_from_bcd(s, s->cmos_data[RTC_CENTURY]) * 100 - 1900;
 }
 
-static QLIST_HEAD(, RTCState) rtc_devices =
-QLIST_HEAD_INITIALIZER(rtc_devices);
-
-#ifdef TARGET_I386
-void qmp_rtc_reset_reinjection(Error **errp)
-{
-RTCState *s;
-
-QLIST_FOREACH(s, &rtc_devices, link) {
-s->irq_coalesced = 0;
-}
-}
-#endif
-
 static void rtc_set_time(RTCState *s)
 {
 struct tm tm;
@@ -745,22 +750,19 @@ static uint64_t cmos_ioport_read(void *opaque, hwaddr 
addr,
 if (ret & (REG_C_UF | REG_C_AF)) {
 check_update_timer(s);
 }
-#ifdef TARGET_I386
+
 if(s->irq_coalesced &&
 (s->cmos_data[RTC_REG_B] & REG_B_PIE) &&
 s->irq_reinject_on_ack_count < RTC_REINJECT_ON_ACK_COUNT) {
 s->irq_reinject_on_ack_count++;
 s->cmos_data[RTC_REG_C] |= REG_C_IRQF | REG_C_PF;
-apic_reset_irq_delivered();
 DPRINTF_C("cmos: injecting on ack\n");
-qemu_irq_raise(s->irq);
-if (apic_get_irq_delivered()) {
+if (rtc_policy_slew_deliver_irq(s)) {
 s->irq_coalesced--;
 DPRINTF_C("cmos: coalesced irqs decreased to %d\n",
   s->irq_coalesced);
 }
 }
-#endif
 break;
 default:
 ret = s->cmos_data[s->cmos_index];
-- 
1.8.3.1

[Qemu-devel] [PULL 15/20] exec: simplify phys_page_find() params

2017-05-19 Thread Paolo Bonzini

From: Peter Xu 

It really only plays with the dispatchers, so the parameter list does
not need that complexity. This helps for readability at least.

Signed-off-by: Peter Xu 
Message-Id: <1494838260-30439-2-git-send-email-pet...@redhat.com>
Reviewed-by: David Gibson 
Signed-off-by: Paolo Bonzini 
---
 exec.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/exec.c b/exec.c
index 96e3ac9..29633cd 100644
--- a/exec.c
+++ b/exec.c
@@ -373,10 +373,11 @@ static inline bool section_covers_addr(const 
MemoryRegionSection *section,
  int128_getlo(section->size), addr);
 }
 
-static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
-   Node *nodes, MemoryRegionSection 
*sections)
+static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr 
addr)
 {
-PhysPageEntry *p;
+PhysPageEntry lp = d->phys_map, *p;
+Node *nodes = d->map.nodes;
+MemoryRegionSection *sections = d->map.sections;
 hwaddr index = addr >> TARGET_PAGE_BITS;
 int i;
 
@@ -414,8 +415,7 @@ static MemoryRegionSection 
*address_space_lookup_region(AddressSpaceDispatch *d,
 section_covers_addr(section, addr)) {
 update = false;
 } else {
-section = phys_page_find(d->phys_map, addr, d->map.nodes,
- d->map.sections);
+section = phys_page_find(d, addr);
 update = true;
 }
 if (resolve_subpage && section->mr->subpage) {
@@ -1283,8 +1283,7 @@ static void register_subpage(AddressSpaceDispatch *d, 
MemoryRegionSection *secti
 subpage_t *subpage;
 hwaddr base = section->offset_within_address_space
 & TARGET_PAGE_MASK;
-MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
-   d->map.nodes, 
d->map.sections);
+MemoryRegionSection *existing = phys_page_find(d, base);
 MemoryRegionSection subsection = {
 .offset_within_address_space = base,
 .size = int128_make64(TARGET_PAGE_SIZE),
-- 
1.8.3.1

[Qemu-devel] [PULL 06/20] kvm: irqchip: trace changes on msi add/remove

2017-05-19 Thread Paolo Bonzini

From: Peter Xu 

It'll be nice to know which virq belongs to which device/vector when
adding msi routes, so adding two more parameters for the add trace.

Meanwhile, releasing virq has no tracing before. Add one for it.

Signed-off-by: Peter Xu 
Message-Id: <1494309644-18743-2-git-send-email-pet...@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Paolo Bonzini 
---
 kvm-all.c| 4 +++-
 trace-events | 3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/kvm-all.c b/kvm-all.c
index 90b8573..2598b1f 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1144,6 +1144,7 @@ void kvm_irqchip_release_virq(KVMState *s, int virq)
 }
 clear_gsi(s, virq);
 kvm_arch_release_virq_post(virq);
+trace_kvm_irqchip_release_virq(virq);
 }
 
 static unsigned int kvm_hash_msi(uint32_t data)
@@ -1287,7 +1288,8 @@ int kvm_irqchip_add_msi_route(KVMState *s, int vector, 
PCIDevice *dev)
 return -EINVAL;
 }
 
-trace_kvm_irqchip_add_msi_route(virq);
+trace_kvm_irqchip_add_msi_route(dev ? dev->name : (char *)"N/A",
+vector, virq);
 
 kvm_add_routing_entry(s, &kroute);
 kvm_arch_add_msi_route_post(&kroute, vector, dev);
diff --git a/trace-events b/trace-events
index e582d63..f01ec05 100644
--- a/trace-events
+++ b/trace-events
@@ -69,8 +69,9 @@ kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, 
type 0x%x, arg %p"
 kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve 
ONEREG %" PRIu64 " from KVM: %s"
 kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set 
ONEREG %" PRIu64 " to KVM: %s"
 kvm_irqchip_commit_routes(void) ""
-kvm_irqchip_add_msi_route(int virq) "Adding MSI route virq=%d"
+kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d 
virq %d"
 kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
+kvm_irqchip_release_virq(int virq) "virq %d"
 
 # TCG related tracing (mostly disabled by default)
 # cpu-exec.c
-- 
1.8.3.1

[Qemu-devel] [PULL 10/20] nbd: strict nbd_wr_syncv

2017-05-19 Thread Paolo Bonzini

From: Vladimir Sementsov-Ogievskiy 

nbd_wr_syncv is called either from coroutine or from client negotiation
code, when socket is in blocking mode. So, -EAGAIN is impossible.

Furthermore, EAGAIN is confusing, as, what to read/write again? With
EAGAIN as a return code we don't know how much data is already
read or written by the function, so in case of EAGAIN the whole
communication is broken.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20170516094533.6160-2-vsement...@virtuozzo.com>
Signed-off-by: Paolo Bonzini 
---
 nbd/common.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/nbd/common.c b/nbd/common.c
index dccbb8e..4db45b3 100644
--- a/nbd/common.c
+++ b/nbd/common.c
@@ -20,6 +20,10 @@
 #include "qapi/error.h"
 #include "nbd-internal.h"
 
+/* nbd_wr_syncv
+ * The function may be called from coroutine or from non-coroutine context.
+ * When called from non-coroutine context @ioc must be in blocking mode.
+ */
 ssize_t nbd_wr_syncv(QIOChannel *ioc,
  struct iovec *iov,
  size_t niov,
@@ -42,11 +46,8 @@ ssize_t nbd_wr_syncv(QIOChannel *ioc,
 len = qio_channel_writev(ioc, local_iov, nlocal_iov, &local_err);
 }
 if (len == QIO_CHANNEL_ERR_BLOCK) {
-if (qemu_in_coroutine()) {
-qio_channel_yield(ioc, do_read ? G_IO_IN : G_IO_OUT);
-} else {
-return -EAGAIN;
-}
+assert(qemu_in_coroutine());
+qio_channel_yield(ioc, do_read ? G_IO_IN : G_IO_OUT);
 continue;
 }
 if (len < 0) {
-- 
1.8.3.1

[Qemu-devel] [PULL 19/20] target/i386: enable A20 automatically in system management mode

2017-05-19 Thread Paolo Bonzini

Ignore env->a20_mask when running in system management mode.

Reported-by: Anthony Xu 
Signed-off-by: Paolo Bonzini 
Message-Id: <1494502528-12670-1-git-send-email-pbonz...@redhat.com>
Signed-off-by: Paolo Bonzini 
---
 target/i386/arch_memory_mapping.c | 18 +
 target/i386/cpu.h |  9 +
 target/i386/helper.c  | 42 +--
 3 files changed, 42 insertions(+), 27 deletions(-)

diff --git a/target/i386/arch_memory_mapping.c 
b/target/i386/arch_memory_mapping.c
index 826aee5..647cff2 100644
--- a/target/i386/arch_memory_mapping.c
+++ b/target/i386/arch_memory_mapping.c
@@ -272,25 +272,27 @@ void x86_cpu_get_memory_mapping(CPUState *cs, 
MemoryMappingList *list,
 {
 X86CPU *cpu = X86_CPU(cs);
 CPUX86State *env = &cpu->env;
+int32_t a20_mask;
 
 if (!cpu_paging_enabled(cs)) {
 /* paging is disabled */
 return;
 }
 
+a20_mask = x86_get_a20_mask(env);
 if (env->cr[4] & CR4_PAE_MASK) {
 #ifdef TARGET_X86_64
 if (env->hflags & HF_LMA_MASK) {
 if (env->cr[4] & CR4_LA57_MASK) {
 hwaddr pml5e_addr;
 
-pml5e_addr = (env->cr[3] & PLM4_ADDR_MASK) & env->a20_mask;
-walk_pml5e(list, cs->as, pml5e_addr, env->a20_mask);
+pml5e_addr = (env->cr[3] & PLM4_ADDR_MASK) & a20_mask;
+walk_pml5e(list, cs->as, pml5e_addr, a20_mask);
 } else {
 hwaddr pml4e_addr;
 
-pml4e_addr = (env->cr[3] & PLM4_ADDR_MASK) & env->a20_mask;
-walk_pml4e(list, cs->as, pml4e_addr, env->a20_mask,
+pml4e_addr = (env->cr[3] & PLM4_ADDR_MASK) & a20_mask;
+walk_pml4e(list, cs->as, pml4e_addr, a20_mask,
 0xULL << 48);
 }
 } else
@@ -298,16 +300,16 @@ void x86_cpu_get_memory_mapping(CPUState *cs, 
MemoryMappingList *list,
 {
 hwaddr pdpe_addr;
 
-pdpe_addr = (env->cr[3] & ~0x1f) & env->a20_mask;
-walk_pdpe2(list, cs->as, pdpe_addr, env->a20_mask);
+pdpe_addr = (env->cr[3] & ~0x1f) & a20_mask;
+walk_pdpe2(list, cs->as, pdpe_addr, a20_mask);
 }
 } else {
 hwaddr pde_addr;
 bool pse;
 
-pde_addr = (env->cr[3] & ~0xfff) & env->a20_mask;
+pde_addr = (env->cr[3] & ~0xfff) & a20_mask;
 pse = !!(env->cr[4] & CR4_PSE_MASK);
-walk_pde2(list, cs->as, pde_addr, env->a20_mask, pse);
+walk_pde2(list, cs->as, pde_addr, a20_mask, pse);
 }
 }
 
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index c4602ca..32a3a0c 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1624,6 +1624,15 @@ static inline MemTxAttrs cpu_get_mem_attrs(CPUX86State 
*env)
 return ((MemTxAttrs) { .secure = (env->hflags & HF_SMM_MASK) != 0 });
 }
 
+static inline int32_t x86_get_a20_mask(CPUX86State *env)
+{
+if (env->hflags & HF_SMM_MASK) {
+return -1;
+} else {
+return env->a20_mask;
+}
+}
+
 /* fpu_helper.c */
 void cpu_set_mxcsr(CPUX86State *env, uint32_t val);
 void cpu_set_fpuc(CPUX86State *env, uint16_t val);
diff --git a/target/i386/helper.c b/target/i386/helper.c
index f11cac6..6c16e7c 100644
--- a/target/i386/helper.c
+++ b/target/i386/helper.c
@@ -724,6 +724,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
 X86CPU *cpu = X86_CPU(cs);
 CPUX86State *env = &cpu->env;
 uint64_t ptep, pte;
+int32_t a20_mask;
 target_ulong pde_addr, pte_addr;
 int error_code = 0;
 int is_dirty, prot, page_size, is_write, is_user;
@@ -739,6 +740,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
 #endif
 is_write = is_write1 & 1;
 
+a20_mask = x86_get_a20_mask(env);
 if (!(env->cr[0] & CR0_PG_MASK)) {
 pte = addr;
 #ifdef TARGET_X86_64
@@ -777,7 +779,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
 
 if (la57) {
 pml5e_addr = ((env->cr[3] & ~0xfff) +
-(((addr >> 48) & 0x1ff) << 3)) & env->a20_mask;
+(((addr >> 48) & 0x1ff) << 3)) & a20_mask;
 pml5e = x86_ldq_phys(cs, pml5e_addr);
 if (!(pml5e & PG_PRESENT_MASK)) {
 goto do_fault;
@@ -796,7 +798,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
 }
 
 pml4e_addr = ((pml5e & PG_ADDRESS_MASK) +
-(((addr >> 39) & 0x1ff) << 3)) & env->a20_mask;
+(((addr >> 39) & 0x1ff) << 3)) & a20_mask;
 pml4e = x86_ldq_phys(cs, pml4e_addr);
 if (!(pml4e & PG_PRESENT_MASK)) {
 goto do_fault;
@@ -810,7 +812,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
 }
 ptep &= pml4e ^ PG_NX_MASK;
 pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) 
<< 3)) &
-env->a20_mask;
+

[Qemu-devel] [PULL 16/20] virtio-scsi: Unset hotplug handler when unrealize

2017-05-19 Thread Paolo Bonzini

From: Fam Zheng 

This matches the qbus_set_hotplug_handler in realize, and it releases
the final reference to the embedded VirtIODevice so that it is
properly finalized.

A use-after-free is fixed with this patch, indirectly:
virtio_device_instance_finalize wasn't called at hot-unplug, and the
vdev->listener would be a dangling pointer in the global and the per
address space listener list. See also RHBZ 1449031.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Fam Zheng 
Message-Id: <20170518102808.30046-1-f...@redhat.com>
Signed-off-by: Paolo Bonzini 
---
 hw/scsi/virtio-scsi.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 46a3e3f..f46f06d 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -918,6 +918,9 @@ void virtio_scsi_common_unrealize(DeviceState *dev, Error 
**errp)
 
 static void virtio_scsi_device_unrealize(DeviceState *dev, Error **errp)
 {
+VirtIOSCSI *s = VIRTIO_SCSI(dev);
+
+qbus_set_hotplug_handler(BUS(&s->bus), NULL, &error_abort);
 virtio_scsi_common_unrealize(dev, errp);
 }
 
-- 
1.8.3.1

[Qemu-devel] [PULL 09/20] Check the return value of fcntl in qemu_set_cloexec

2017-05-19 Thread Paolo Bonzini

From: Stefano Stabellini 

Assert that the return value is not an error. This issue was found by
Coverity.

CID: 1374831

Signed-off-by: Stefano Stabellini 
CC: gr...@kaod.org
CC: pbonz...@redhat.com
CC: Eric Blake 
Message-Id: <1494356693-13190-2-git-send-email-sstabell...@kernel.org>
Signed-off-by: Paolo Bonzini 
---
 util/oslib-posix.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 4d9189e..16894ad 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -182,7 +182,9 @@ void qemu_set_cloexec(int fd)
 {
 int f;
 f = fcntl(fd, F_GETFD);
-fcntl(fd, F_SETFD, f | FD_CLOEXEC);
+assert(f != -1);
+f = fcntl(fd, F_SETFD, f | FD_CLOEXEC);
+assert(f != -1);
 }
 
 /*
-- 
1.8.3.1

[Qemu-devel] [PULL 14/20] nbd/client.c: use errp instead of LOG

2017-05-19 Thread Paolo Bonzini

From: Vladimir Sementsov-Ogievskiy 

Move to modern errp scheme from just LOGging errors.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20170516094533.6160-6-vsement...@virtuozzo.com>
Signed-off-by: Paolo Bonzini 
---
 block/nbd-client.c  |  7 ++-
 include/block/nbd.h |  5 +++--
 nbd/client.c| 30 +-
 qemu-nbd.c  |  3 ++-
 4 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/block/nbd-client.c b/block/nbd-client.c
index 538d95e..073032b 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -28,6 +28,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "nbd-client.h"
 
 #define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
@@ -70,10 +71,14 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
 NBDClientSession *s = opaque;
 uint64_t i;
 int ret;
+Error *local_err;
 
 for (;;) {
 assert(s->reply.handle == 0);
-ret = nbd_receive_reply(s->ioc, &s->reply);
+ret = nbd_receive_reply(s->ioc, &s->reply, &local_err);
+if (ret < 0) {
+error_report_err(local_err);
+}
 if (ret <= 0) {
 break;
 }
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 9d385ea..416257a 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -133,9 +133,10 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char 
*name, uint16_t *flags,
   QCryptoTLSCreds *tlscreds, const char *hostname,
   QIOChannel **outioc,
   off_t *size, Error **errp);
-int nbd_init(int fd, QIOChannelSocket *sioc, uint16_t flags, off_t size);
+int nbd_init(int fd, QIOChannelSocket *sioc, uint16_t flags, off_t size,
+ Error **errp);
 ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest *request);
-ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply);
+ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp);
 int nbd_client(int fd);
 int nbd_disconnect(int fd);
 
diff --git a/nbd/client.c b/nbd/client.c
index f102375..595d99e 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -627,11 +627,13 @@ fail:
 }
 
 #ifdef __linux__
-int nbd_init(int fd, QIOChannelSocket *sioc, uint16_t flags, off_t size)
+int nbd_init(int fd, QIOChannelSocket *sioc, uint16_t flags, off_t size,
+ Error **errp)
 {
 unsigned long sectors = size / BDRV_SECTOR_SIZE;
 if (size / BDRV_SECTOR_SIZE != sectors) {
-LOG("Export size %lld too large for 32-bit kernel", (long long) size);
+error_setg(errp, "Export size %lld too large for 32-bit kernel",
+   (long long) size);
 return -E2BIG;
 }
 
@@ -639,7 +641,7 @@ int nbd_init(int fd, QIOChannelSocket *sioc, uint16_t 
flags, off_t size)
 
 if (ioctl(fd, NBD_SET_SOCK, (unsigned long) sioc->fd) < 0) {
 int serrno = errno;
-LOG("Failed to set NBD socket");
+error_setg(errp, "Failed to set NBD socket");
 return -serrno;
 }
 
@@ -647,7 +649,7 @@ int nbd_init(int fd, QIOChannelSocket *sioc, uint16_t 
flags, off_t size)
 
 if (ioctl(fd, NBD_SET_BLKSIZE, (unsigned long)BDRV_SECTOR_SIZE) < 0) {
 int serrno = errno;
-LOG("Failed setting NBD block size");
+error_setg(errp, "Failed setting NBD block size");
 return -serrno;
 }
 
@@ -659,7 +661,7 @@ int nbd_init(int fd, QIOChannelSocket *sioc, uint16_t 
flags, off_t size)
 
 if (ioctl(fd, NBD_SET_SIZE_BLOCKS, sectors) < 0) {
 int serrno = errno;
-LOG("Failed setting size (in blocks)");
+error_setg(errp, "Failed setting size (in blocks)");
 return -serrno;
 }
 
@@ -670,12 +672,12 @@ int nbd_init(int fd, QIOChannelSocket *sioc, uint16_t 
flags, off_t size)
 
 if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) {
 int serrno = errno;
-LOG("Failed setting read-only attribute");
+error_setg(errp, "Failed setting read-only attribute");
 return -serrno;
 }
 } else {
 int serrno = errno;
-LOG("Failed setting flags");
+error_setg(errp, "Failed setting flags");
 return -serrno;
 }
 }
@@ -723,8 +725,10 @@ int nbd_disconnect(int fd)
 }
 
 #else
-int nbd_init(int fd, QIOChannelSocket *ioc, uint16_t flags, off_t size)
+int nbd_init(int fd, QIOChannelSocket *ioc, uint16_t flags, off_t size,
+Error **errp)
 {
+error_setg(errp, "nbd_init is only supported on Linux");
 return -ENOTSUP;
 }
 
@@ -758,19 +762,19 @@ ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest 
*request)
 return write_sync(ioc, buf, sizeof(buf), NULL);
 }
 
-ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply)
+ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp)
 {
 uint8_t buf[NBD_REPLY_SIZE];
 uint32_t magic;
 ssize_t ret;

[Qemu-devel] [PULL 17/20] vhost-user-scsi: Introduce vhost-user-scsi host device

2017-05-19 Thread Paolo Bonzini

From: Felipe Franciosi 

This commit introduces a vhost-user device for SCSI. This is based
on the existing vhost-scsi implementation, but done over vhost-user
instead. It also uses a chardev to connect to the backend. Unlike
vhost-scsi (today), VMs using vhost-user-scsi can be live migrated.

To use it, start Qemu with a command line equivalent to:

qemu-system-x86_64 \
   -chardev socket,id=vus0,path=/tmp/vus.sock \
   -device vhost-user-scsi-pci,chardev=vus0,bus=pci.0,addr=...

A separate commit presents a sample application linked with libiscsi to
provide a backend for vhost-user-scsi.

Signed-off-by: Felipe Franciosi 
Message-Id: <1488479153-21203-4-git-send-email-fel...@nutanix.com>
---
 .gitignore  |   1 +
 default-configs/pci.mak |   1 +
 default-configs/s390x-softmmu.mak   |   1 +
 hw/scsi/Makefile.objs   |   1 +
 hw/scsi/vhost-user-scsi.c   | 215 
 hw/virtio/virtio-pci.c  |  54 +
 hw/virtio/virtio-pci.h  |  11 ++
 include/hw/virtio/vhost-user-scsi.h |  35 ++
 include/hw/virtio/virtio-scsi.h |   3 +
 9 files changed, 322 insertions(+)
 create mode 100644 hw/scsi/vhost-user-scsi.c
 create mode 100644 include/hw/virtio/vhost-user-scsi.h

diff --git a/.gitignore b/.gitignore
index 55a001e..fa96bd2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -50,6 +50,7 @@
 /qemu-version.h.tmp
 /module_block.h
 /vscclient
+/vhost-user-scsi
 /fsdev/virtfs-proxy-helper
 *.[1-9]
 *.a
diff --git a/default-configs/pci.mak b/default-configs/pci.mak
index 60dc651..ada9c6f 100644
--- a/default-configs/pci.mak
+++ b/default-configs/pci.mak
@@ -42,3 +42,4 @@ CONFIG_VGA=y
 CONFIG_VGA_PCI=y
 CONFIG_IVSHMEM=$(CONFIG_EVENTFD)
 CONFIG_ROCKER=y
+CONFIG_VHOST_USER_SCSI=$(CONFIG_POSIX)
diff --git a/default-configs/s390x-softmmu.mak 
b/default-configs/s390x-softmmu.mak
index 9615a48..9a0b6d9 100644
--- a/default-configs/s390x-softmmu.mak
+++ b/default-configs/s390x-softmmu.mak
@@ -1,5 +1,6 @@
 CONFIG_PCI=y
 CONFIG_VIRTIO_PCI=y
+CONFIG_VHOST_USER_SCSI=y
 CONFIG_VIRTIO=y
 CONFIG_SCLPCONSOLE=y
 CONFIG_TERMINAL3270=y
diff --git a/hw/scsi/Makefile.objs b/hw/scsi/Makefile.objs
index 54d8754..b188f72 100644
--- a/hw/scsi/Makefile.objs
+++ b/hw/scsi/Makefile.objs
@@ -11,4 +11,5 @@ obj-$(CONFIG_PSERIES) += spapr_vscsi.o
 ifeq ($(CONFIG_VIRTIO),y)
 obj-y += virtio-scsi.o virtio-scsi-dataplane.o
 obj-$(CONFIG_VHOST_SCSI) += vhost-scsi-common.o vhost-scsi.o
+obj-$(CONFIG_VHOST_USER_SCSI) += vhost-scsi-common.o vhost-user-scsi.o
 endif
diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
new file mode 100644
index 000..694a637
--- /dev/null
+++ b/hw/scsi/vhost-user-scsi.c
@@ -0,0 +1,215 @@
+/*
+ * vhost-user-scsi host device
+ *
+ * Copyright (c) 2016 Nutanix Inc. All rights reserved.
+ *
+ * Author:
+ *  Felipe Franciosi 
+ *
+ * This work is largely based on the "vhost-scsi" implementation by:
+ *  Stefan Hajnoczi
+ *  Nicholas Bellinger 
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/typedefs.h"
+#include "qom/object.h"
+#include "hw/fw-path-provider.h"
+#include "hw/qdev-core.h"
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-backend.h"
+#include "hw/virtio/vhost-user-scsi.h"
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/virtio-access.h"
+#include "sysemu/char.h"
+
+/* Features supported by the host application */
+static const int user_feature_bits[] = {
+VIRTIO_F_NOTIFY_ON_EMPTY,
+VIRTIO_RING_F_INDIRECT_DESC,
+VIRTIO_RING_F_EVENT_IDX,
+VIRTIO_SCSI_F_HOTPLUG,
+VHOST_INVALID_FEATURE_BIT
+};
+
+static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
+{
+VHostUserSCSI *s = (VHostUserSCSI *)vdev;
+VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
+bool start = (status & VIRTIO_CONFIG_S_DRIVER_OK) && vdev->vm_running;
+
+if (vsc->dev.started == start) {
+return;
+}
+
+if (start) {
+int ret;
+
+ret = vhost_scsi_common_start(vsc);
+if (ret < 0) {
+error_report("unable to start vhost-user-scsi: %s", 
strerror(-ret));
+exit(1);
+}
+} else {
+vhost_scsi_common_stop(vsc);
+}
+}
+
+static void vhost_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq)
+{
+}
+
+static void vhost_user_scsi_save(QEMUFile *f, void *opaque)
+{
+VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
+virtio_save(vdev, f);
+}
+
+static int vhost_user_scsi_load(QEMUFile *f, void *opaque, int version_id)
+{
+VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
+return virtio_load(vdev, f, version_id);
+}
+
+static void vhost_user_scsi_realize(DeviceState *dev, Error **errp)
+{
+VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
+VHostUserSC

[Qemu-devel] [PULL 18/20] vhost-user-scsi: Introduce a vhost-user-scsi sample application

2017-05-19 Thread Paolo Bonzini

From: Felipe Franciosi 

This commit introduces a vhost-user-scsi backend sample application. It
must be linked with libiscsi and libvhost-user.

To use it, compile with:
  $ make vhost-user-scsi

And run as follows:
  $ ./vhost-user-scsi -u vus.sock -i iscsi://uri_to_target/
  $ qemu-system-x86_64 --enable-kvm -m 512 \
  -object memory-backend-file,id=mem,size=512m,share=on,mem-path=guestmem \
  -numa node,memdev=mem \
  -chardev socket,id=vhost-user-scsi,path=vus.sock \
  -device vhost-user-scsi-pci,chardev=vhost-user-scsi \

The application is currently limited at one LUN only and it processes
requests synchronously (therefore only achieving QD1). The purpose of
the code is to show how a backend can be implemented and to test the
vhost-user-scsi Qemu implementation.

If a different instance of this vhost-user-scsi application is executed
at a remote host, a VM can be live migrated to such a host.

Signed-off-by: Felipe Franciosi 
Message-Id: <1488479153-21203-5-git-send-email-fel...@nutanix.com>
---
 Makefile  |   3 +
 Makefile.objs |   4 +
 contrib/vhost-user-scsi/Makefile.objs |   1 +
 contrib/vhost-user-scsi/vhost-user-scsi.c | 886 ++
 4 files changed, 894 insertions(+)
 create mode 100644 contrib/vhost-user-scsi/Makefile.objs
 create mode 100644 contrib/vhost-user-scsi/vhost-user-scsi.c

diff --git a/Makefile b/Makefile
index c830d7a..e14988d 100644
--- a/Makefile
+++ b/Makefile
@@ -269,6 +269,7 @@ dummy := $(call unnest-vars,, \
 ivshmem-client-obj-y \
 ivshmem-server-obj-y \
 libvhost-user-obj-y \
+vhost-user-scsi-obj-y \
 qga-vss-dll-obj-y \
 block-obj-y \
 block-obj-m \
@@ -473,6 +474,8 @@ ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) 
$(COMMON_LDADDS)
$(call LINK, $^)
 ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) $(COMMON_LDADDS)
$(call LINK, $^)
+vhost-user-scsi$(EXESUF): $(vhost-user-scsi-obj-y)
+   $(call LINK, $^)
 
 module_block.h: $(SRC_PATH)/scripts/modules/module_block.py config-host.mak
$(call quiet-command,$(PYTHON) $< $@ \
diff --git a/Makefile.objs b/Makefile.objs
index 2100845..1fa9450 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -112,6 +112,10 @@ qga-vss-dll-obj-y = qga/
 ivshmem-client-obj-y = contrib/ivshmem-client/
 ivshmem-server-obj-y = contrib/ivshmem-server/
 libvhost-user-obj-y = contrib/libvhost-user/
+vhost-user-scsi.o-cflags := $(LIBISCSI_CFLAGS)
+vhost-user-scsi.o-libs := $(LIBISCSI_LIBS)
+vhost-user-scsi-obj-y = contrib/vhost-user-scsi/
+vhost-user-scsi-obj-y += contrib/libvhost-user/libvhost-user.o
 
 ##
 trace-events-subdirs =
diff --git a/contrib/vhost-user-scsi/Makefile.objs 
b/contrib/vhost-user-scsi/Makefile.objs
new file mode 100644
index 000..e83a38a
--- /dev/null
+++ b/contrib/vhost-user-scsi/Makefile.objs
@@ -0,0 +1 @@
+vhost-user-scsi-obj-y = vhost-user-scsi.o
diff --git a/contrib/vhost-user-scsi/vhost-user-scsi.c 
b/contrib/vhost-user-scsi/vhost-user-scsi.c
new file mode 100644
index 000..e41bad0
--- /dev/null
+++ b/contrib/vhost-user-scsi/vhost-user-scsi.c
@@ -0,0 +1,886 @@
+/*
+ * vhost-user-scsi sample application
+ *
+ * Copyright (c) 2016 Nutanix Inc. All rights reserved.
+ *
+ * Author:
+ *  Felipe Franciosi 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 only.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "contrib/libvhost-user/libvhost-user.h"
+#include "hw/virtio/virtio-scsi.h"
+#include "iscsi/iscsi.h"
+
+#include 
+
+/* Small compat shim from glib 2.32 */
+#ifndef G_SOURCE_CONTINUE
+#define G_SOURCE_CONTINUE TRUE
+#endif
+#ifndef G_SOURCE_REMOVE
+#define G_SOURCE_REMOVE FALSE
+#endif
+
+//#define VUS_DEBUG 1
+
+/** Log helpers **/
+
+#define PPRE  \
+struct timespec ts;   \
+char   timebuf[64];   \
+struct tm tm; \
+(void)clock_gettime(CLOCK_REALTIME, &ts); \
+(void)strftime(timebuf, 64, "%Y%m%d %T", gmtime_r(&ts.tv_sec, &tm))
+
+#define PEXT(lvl, msg, ...) do {  \
+PPRE; \
+fprintf(stderr, "%s.%06ld " lvl ": %s:%s():%d: " msg "\n",\
+timebuf, ts.tv_nsec/1000, \
+__FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__);\
+} while(0)
+
+#define PNOR(lvl, msg, ...) do {  \
+PPRE; \
+fprintf(stderr, "%s.%06ld " lvl ": " msg "\n",

[Qemu-devel] [PULL 20/20] target/i386: use multiple CPU AddressSpaces

2017-05-19 Thread Paolo Bonzini

This speeds up SMM switches.  Later on it may remove the need to take
the BQL, and it may also allow to reuse code between TCG and KVM.

Signed-off-by: Paolo Bonzini 
---
 target/i386/cpu.c| 15 +-
 target/i386/cpu.h| 11 +-
 target/i386/helper.c | 54 
 target/i386/machine.c|  4 
 target/i386/smm_helper.c | 18 
 5 files changed, 47 insertions(+), 55 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index a41d595..a638832 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3239,7 +3239,7 @@ static void x86_cpu_machine_done(Notifier *n, void 
*unused)
 cpu->smram = g_new(MemoryRegion, 1);
 memory_region_init_alias(cpu->smram, OBJECT(cpu), "smram",
  smram, 0, 1ull << 32);
-memory_region_set_enabled(cpu->smram, false);
+memory_region_set_enabled(cpu->smram, true);
 memory_region_add_subregion_overlap(cpu->cpu_as_root, 0, cpu->smram, 
1);
 }
 }
@@ -3619,7 +3619,9 @@ static void x86_cpu_realizefn(DeviceState *dev, Error 
**errp)
 
 #ifndef CONFIG_USER_ONLY
 if (tcg_enabled()) {
-AddressSpace *newas = g_new(AddressSpace, 1);
+AddressSpace *as_normal = address_space_init_shareable(cs->memory,
+   "cpu-memory");
+AddressSpace *as_smm = g_new(AddressSpace, 1);
 
 cpu->cpu_as_mem = g_new(MemoryRegion, 1);
 cpu->cpu_as_root = g_new(MemoryRegion, 1);
@@ -3635,9 +3637,11 @@ static void x86_cpu_realizefn(DeviceState *dev, Error 
**errp)
  get_system_memory(), 0, ~0ull);
 memory_region_add_subregion_overlap(cpu->cpu_as_root, 0, 
cpu->cpu_as_mem, 0);
 memory_region_set_enabled(cpu->cpu_as_mem, true);
-address_space_init(newas, cpu->cpu_as_root, "CPU");
-cs->num_ases = 1;
-cpu_address_space_init(cs, newas, 0);
+address_space_init(as_smm, cpu->cpu_as_root, "CPU");
+
+cs->num_ases = 2;
+cpu_address_space_init(cs, as_normal, 0);
+cpu_address_space_init(cs, as_smm, 1);
 
 /* ... SMRAM with higher priority, linked from /machine/smram.  */
 cpu->machine_done.notify = x86_cpu_machine_done;
@@ -4053,6 +4057,7 @@ static void x86_cpu_common_class_init(ObjectClass *oc, 
void *data)
 #ifdef CONFIG_USER_ONLY
 cc->handle_mmu_fault = x86_cpu_handle_mmu_fault;
 #else
+cc->asidx_from_attrs = x86_asidx_from_attrs;
 cc->get_memory_mapping = x86_cpu_get_memory_mapping;
 cc->get_phys_page_debug = x86_cpu_get_phys_page_debug;
 cc->write_elf64_note = x86_cpu_write_elf64_note;
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 32a3a0c..c2e081c 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1450,6 +1450,16 @@ int x86_cpu_handle_mmu_fault(CPUState *cpu, vaddr addr,
 void x86_cpu_set_a20(X86CPU *cpu, int a20_state);
 
 #ifndef CONFIG_USER_ONLY
+static inline int x86_asidx_from_attrs(CPUState *cs, MemTxAttrs attrs)
+{
+return !!attrs.secure;
+}
+
+static inline AddressSpace *cpu_addressspace(CPUState *cs, MemTxAttrs attrs)
+{
+return cpu_get_address_space(cs, cpu_asidx_from_attrs(cs, attrs));
+}
+
 uint8_t x86_ldub_phys(CPUState *cs, hwaddr addr);
 uint32_t x86_lduw_phys(CPUState *cs, hwaddr addr);
 uint32_t x86_ldl_phys(CPUState *cs, hwaddr addr);
@@ -1652,7 +1662,6 @@ void do_interrupt_x86_hardirq(CPUX86State *env, int 
intno, int is_hw);
 
 /* smm_helper.c */
 void do_smm_enter(X86CPU *cpu);
-void cpu_smm_update(X86CPU *cpu);
 
 /* apic.c */
 void cpu_report_tpr_access(CPUX86State *env, TPRAccess access);
diff --git a/target/i386/helper.c b/target/i386/helper.c
index 6c16e7c..d0daa1f 100644
--- a/target/i386/helper.c
+++ b/target/i386/helper.c
@@ -1403,89 +1403,89 @@ uint8_t x86_ldub_phys(CPUState *cs, hwaddr addr)
 {
 X86CPU *cpu = X86_CPU(cs);
 CPUX86State *env = &cpu->env;
+MemTxAttrs attrs = cpu_get_mem_attrs(env);
+AddressSpace *as = cpu_addressspace(cs, attrs);
 
-return address_space_ldub(cs->as, addr,
-  cpu_get_mem_attrs(env),
-  NULL);
+return address_space_ldub(as, addr, attrs, NULL);
 }
 
 uint32_t x86_lduw_phys(CPUState *cs, hwaddr addr)
 {
 X86CPU *cpu = X86_CPU(cs);
 CPUX86State *env = &cpu->env;
+MemTxAttrs attrs = cpu_get_mem_attrs(env);
+AddressSpace *as = cpu_addressspace(cs, attrs);
 
-return address_space_lduw(cs->as, addr,
-  cpu_get_mem_attrs(env),
-  NULL);
+return address_space_lduw(as, addr, attrs, NULL);
 }
 
 uint32_t x86_ldl_phys(CPUState *cs, hwaddr addr)
 {
 X86CPU *cpu = X86_CPU(cs);
 CPUX86State *env = &cpu->env;
+MemTxAttrs attrs = cpu_get_mem_attrs(env);
+AddressSpace *as = cpu_addressspace(cs, attrs);
 
-return address_space_ldl(cs->as, addr,
-

[Qemu-devel] [PULL 2/3] audio: Rename audio_init() to soundhw_init()

2017-05-19 Thread Gerd Hoffmann

From: Eduardo Habkost 

To make it consistent with the remaining soundhw.c functions and
avoid confusion with the audio_init() function in audio/audio.c,
rename audio_init() to soundhw_init().

Signed-off-by: Eduardo Habkost 
Reviewed-by: David Gibson 
Message-id: 20170508205735.23444-3-ehabk...@redhat.com
Signed-off-by: Gerd Hoffmann 
---
 include/hw/audio/audio.h | 2 +-
 hw/audio/soundhw.c   | 2 +-
 hw/ppc/prep.c| 2 +-
 vl.c | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/hw/audio/audio.h b/include/hw/audio/audio.h
index 259bb2cf96..119f7d78d5 100644
--- a/include/hw/audio/audio.h
+++ b/include/hw/audio/audio.h
@@ -7,7 +7,7 @@ void isa_register_soundhw(const char *name, const char *descr,
 void pci_register_soundhw(const char *name, const char *descr,
   int (*init_pci)(PCIBus *bus));
 
-void audio_init(void);
+void soundhw_init(void);
 void select_soundhw(const char *optarg);
 
 #endif
diff --git a/hw/audio/soundhw.c b/hw/audio/soundhw.c
index 5e96b73c81..29565da93d 100644
--- a/hw/audio/soundhw.c
+++ b/hw/audio/soundhw.c
@@ -129,7 +129,7 @@ void select_soundhw(const char *optarg)
 }
 }
 
-void audio_init(void)
+void soundhw_init(void)
 {
 struct soundhw *c;
 ISABus *isa_bus = (ISABus *) object_resolve_path_type("", TYPE_ISA_BUS, 
NULL);
diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
index 96a4813b3f..4a7d2cfbe0 100644
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c
@@ -783,7 +783,7 @@ static void ibm_40p_init(MachineState *machine)
&cmos_checksum);
 
 /* initialize audio subsystem */
-audio_init();
+soundhw_init();
 
 /* add some more devices */
 if (defaults_enabled()) {
diff --git a/vl.c b/vl.c
index 6e46889cde..8f08f422a7 100644
--- a/vl.c
+++ b/vl.c
@@ -4575,7 +4575,7 @@ int main(int argc, char **argv, char **envp)
 
 realtime_init();
 
-audio_init();
+soundhw_init();
 
 if (hax_enabled()) {
 hax_sync_vcpus();
-- 
2.9.3

Re: [Qemu-devel] [Bug 1034423] Re: Guests running OpenIndiana (and relatives) fail to boot on AMD hardware

2017-05-19 Thread Owen Tuz

This is an old ticket! I had completely forgotten about it, but will test
when I get a chance and let you know.

Cheers,

Owen

On Fri, May 19, 2017 at 11:25 AM, Thomas Huth <1034...@bugs.launchpad.net>
wrote:

> Triaging old bug tickets ... can you still reproduce this issue with the
> latest version of QEMU (currently v2.9)?
>
> ** Changed in: qemu
>Status: New => Incomplete
>
> --
> You received this bug notification because you are subscribed to the bug
> report.
> https://bugs.launchpad.net/bugs/1034423
>
> Title:
>   Guests running OpenIndiana (and relatives) fail to boot on AMD
>   hardware
>
> Status in QEMU:
>   Incomplete
>
> Bug description:
>   First observed with OpenSolaris 2009.06, and also applies to the
>   latest OpenIndiana release.
>
>   Version: qemu-kvm 1.1.1
>
>   Hardware:
>
>   2 x AMD Opteron 6128 8-core processors, 64GB RAM.
>
>   These guests boot on equivalent Intel hardware.
>
>   To reproduce:
>
>   qemu-kvm -nodefaults -m 512 -cpu host -vga cirrus -usbdevice tablet
>   -vnc :99 -monitor stdio -hda drive.img -cdrom oi-dev-
>   151a5-live-x86.iso -boot order=dc
>
>   I've tested with "-vga std" and various different emulated CPU types,
>   to no effect.
>
>   What happens:
>
>   GRUB loads, and offers multiple boot options, but none work. Some kind
>   of kernel panic flies by very fast before restarting the VM, and
>   careful use of the screenshot button reveals that it reads as follows:
>
>   panic[cpu0]/thread=fec22de0: BAD TRAP: type=8 (#df Double fault)
>   rp=fec2b48c add r=0
>
>   #df Double fault
>   pid=0, pc=0xault
>   pid=0, pc=0xfe800377, sp=0xfec40090, eflags=0x202
>   cr0: 80050011 cr4:b8
>   cr2: 0cr3: ae2f000
> gs:1b0fs:  0   es:
>  160   ds:  160
>edi:0  esi:  0 ebp:
>  0 esp: fec2b4c4
>ebx: c0010015 edx:  0 ecx: 0 eax:
> fec40400
>trp: 8  err:  0 eip: fe800377
> cs:   158
>efl: 202 usp: fec40090  ss:   160
>   tss.tss_link: 0x0
>   tss.tss_esp0:   0x0
>   tss.tss_ss0: 0x160
>   tss.tss_esp1:   0x0
>   tss.tss_ss1:  0x0
>   tss.tss esp2: 0x0
>   tss.tss_ss2:  0x0
>   tss.tss_cr3:   0xae2f000
>   tss.tss_eip:   0xfec40400
>   tss.tss_eflags:  0x202
>   tss.tss_eax:  0xfec40400
>   tss.tss_ebx:  0xc0010015
>   tss.tss_ecx:  0xc001
>   tss.tss_edx:  0x0
>   tss.tss_esp:  0xfec40090
>
>   Warning - stack not written to the dumpbuf
>   fec2b3c8 unix:due+e4 (8, fec2b48c, 0, 0)
>   fec2b478 unix:trap+12fa (fec2b48c, 0, 0)
>   fec2b48c unix:_cmntrap+7c (1b0, 0, 160, 160, 0)
>
>   If there's any more, I haven't managed to catch it.
>
>   Solaris 11 does not seem to suffer from the same issue, although the
>   first message that appears at boot (after the version info) is "trap:
>   Unkown trap type 8 in user mode". Could be related?
>
>   As always, thanks in advance and please let me know if I can help to
>   test, or provide any more information.
>
> To manage notifications about this bug go to:
> https://bugs.launchpad.net/qemu/+bug/1034423/+subscriptions
>

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1034423

Title:
  Guests running OpenIndiana (and relatives) fail to boot on AMD
  hardware

Status in QEMU:
  Incomplete

Bug description:
  First observed with OpenSolaris 2009.06, and also applies to the
  latest OpenIndiana release.

  Version: qemu-kvm 1.1.1

  Hardware:

  2 x AMD Opteron 6128 8-core processors, 64GB RAM.

  These guests boot on equivalent Intel hardware.

  To reproduce:

  qemu-kvm -nodefaults -m 512 -cpu host -vga cirrus -usbdevice tablet
  -vnc :99 -monitor stdio -hda drive.img -cdrom oi-dev-
  151a5-live-x86.iso -boot order=dc

  I've tested with "-vga std" and various different emulated CPU types,
  to no effect.

  What happens:

  GRUB loads, and offers multiple boot options, but none work. Some kind
  of kernel panic flies by very fast before restarting the VM, and
  careful use of the screenshot button reveals that it reads as follows:

  panic[cpu0]/thread=fec22de0: BAD TRAP: type=8 (#df Double fault)
  rp=fec2b48c add r=0

  #df Double fault
  pid=0, pc=0xault
  pid=0, pc=0xfe800377, sp=0xfec40090, eflags=0x202
  cr0: 80050011 cr4:b8
  cr2: 0cr3: ae2f000
gs:1b0fs:  0   es: 160   
ds:  160
   edi:0  esi:  0 ebp: 0 
esp: fec2b4c4
   ebx: c0010015 edx:  0 ecx: 0 eax: 
fec40400
   trp: 8  err:  0 eip: fe800377  cs:   
158
   efl: 202 usp: fec40090  ss:   160
  tss.tss_link: 0x0
  tss.tss_esp0:   0x0
  tss.tss_ss0: 0x160
  tss.tss

[Qemu-devel] [PULL 0/3] audio patch queue.

2017-05-19 Thread Gerd Hoffmann

  Hi,

Smallish audio patch queue, renaming moving soundhw init code.

please pull,
  Gerd

The following changes since commit 56821559f0ba682fe6b367815572e6f974d329ab:

  Merge remote-tracking branch 'dgilbert/tags/pull-hmp-20170517' into staging 
(2017-05-18 13:36:15 +0100)

are available in the git repository at:

  git://git.kraxel.org/qemu tags/pull-audio-20170519-1

for you to fetch changes up to 8a824e4d74213a2da39323304f949c5b4243e1fb:

  audio: Rename hw/audio/audio.h to hw/audio/soundhw.h (2017-05-19 10:48:54 
+0200)


audio: move & rename soundhw init code.


Eduardo Habkost (3):
  audio: Move arch_init audio code to hw/audio/soundhw.c
  audio: Rename audio_init() to soundhw_init()
  audio: Rename hw/audio/audio.h to hw/audio/soundhw.h

 include/hw/audio/{audio.h => soundhw.h} |   3 +
 include/sysemu/arch_init.h  |   2 -
 arch_init.c | 126 +-
 hw/audio/ac97.c |   2 +-
 hw/audio/adlib.c|   2 +-
 hw/audio/cs4231a.c  |   2 +-
 hw/audio/es1370.c   |   2 +-
 hw/audio/gus.c  |   2 +-
 hw/audio/intel-hda.c|   2 +-
 hw/audio/pcspk.c|   2 +-
 hw/audio/sb16.c |   2 +-
 hw/audio/soundhw.c  | 156 
 hw/ppc/prep.c   |   3 +-
 vl.c|   3 +-
 hw/audio/Makefile.objs  |   2 +
 15 files changed, 174 insertions(+), 137 deletions(-)
 rename include/hw/audio/{audio.h => soundhw.h} (81%)
 create mode 100644 hw/audio/soundhw.c

Re: [Qemu-devel] [PATCH 1/1] s390x/css: catch section mismatch on load

2017-05-19 Thread Halil Pasic



On 05/18/2017 07:47 PM, Dr. David Alan Gilbert wrote:
>> Hi!
>>
>> I also wonder what is the best way to do this with vmstate.  I know there
>> are VMSTATE_*_EQUAL macros for integers, and I have partially modelled my
>> patch after that, but there we only get a != b as error message, which is
>> satisfactory for detecting bugs which are supposed to get fixed. In this
>> particular case having a verbose error message should be really helpful
>> and thus important.
>>
>> I'm asking because I'm currently working on a vmstate conversion of the
>> s390x css and virtio-ccw  stuff (find my latest patch set here
>> https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg01364.html).
> I think the way to solve that problem will probably be adding a 'hint'
> parameter to the VMSTATE_*_EQUAL macros that is a constant string,
> stuff a pointer to that into a possibly new field in VMStateField,
> and then make the get_*_equal functions include that string in the
> message like you do.  There's a lot of copy and paste but it's
> not too bad now that Jianjun's patch from a few months ago passed
> the VMStateField* to the .get/.put.
> 
> Dave
> 
> 

Thanks Dave! I read your reply like you are seeing this verbose
message if VMSTATE_*_EQUAL feature something worth of inclusion.
Am I right? 

If yes, I'm willing to implement it.

Halil

[Qemu-devel] [PULL 3/3] audio: Rename hw/audio/audio.h to hw/audio/soundhw.h

2017-05-19 Thread Gerd Hoffmann

From: Eduardo Habkost 

All the functions in hw/audio/audio.h are called "soundhw_*()"
and live in hw/audio/audiohw.c. Rename the header file for
consistency.

Signed-off-by: Eduardo Habkost 
Reviewed-by: David Gibson 
Reviewed-by: Hervé Poussineau 
Message-id: 20170508205735.23444-4-ehabk...@redhat.com
Signed-off-by: Gerd Hoffmann 
---
 include/hw/audio/{audio.h => soundhw.h} | 0
 arch_init.c | 2 +-
 hw/audio/ac97.c | 2 +-
 hw/audio/adlib.c| 2 +-
 hw/audio/cs4231a.c  | 2 +-
 hw/audio/es1370.c   | 2 +-
 hw/audio/gus.c  | 2 +-
 hw/audio/intel-hda.c| 2 +-
 hw/audio/pcspk.c| 2 +-
 hw/audio/sb16.c | 2 +-
 hw/audio/soundhw.c  | 2 +-
 hw/ppc/prep.c   | 2 +-
 vl.c| 2 +-
 13 files changed, 12 insertions(+), 12 deletions(-)
 rename include/hw/audio/{audio.h => soundhw.h} (100%)

diff --git a/include/hw/audio/audio.h b/include/hw/audio/soundhw.h
similarity index 100%
rename from include/hw/audio/audio.h
rename to include/hw/audio/soundhw.h
diff --git a/arch_init.c b/arch_init.c
index 74ca62f508..a0b8ed6167 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -27,7 +27,7 @@
 #include "sysemu/sysemu.h"
 #include "sysemu/arch_init.h"
 #include "hw/pci/pci.h"
-#include "hw/audio/audio.h"
+#include "hw/audio/soundhw.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
 #include "qmp-commands.h"
diff --git a/hw/audio/ac97.c b/hw/audio/ac97.c
index c30657501c..959c786261 100644
--- a/hw/audio/ac97.c
+++ b/hw/audio/ac97.c
@@ -19,7 +19,7 @@
 
 #include "qemu/osdep.h"
 #include "hw/hw.h"
-#include "hw/audio/audio.h"
+#include "hw/audio/soundhw.h"
 #include "audio/audio.h"
 #include "hw/pci/pci.h"
 #include "sysemu/dma.h"
diff --git a/hw/audio/adlib.c b/hw/audio/adlib.c
index 09b8248cda..c6e0f10c16 100644
--- a/hw/audio/adlib.c
+++ b/hw/audio/adlib.c
@@ -25,7 +25,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "hw/hw.h"
-#include "hw/audio/audio.h"
+#include "hw/audio/soundhw.h"
 #include "audio/audio.h"
 #include "hw/isa/isa.h"
 
diff --git a/hw/audio/cs4231a.c b/hw/audio/cs4231a.c
index 3ecd0582bf..096e8e98d7 100644
--- a/hw/audio/cs4231a.c
+++ b/hw/audio/cs4231a.c
@@ -23,7 +23,7 @@
  */
 #include "qemu/osdep.h"
 #include "hw/hw.h"
-#include "hw/audio/audio.h"
+#include "hw/audio/soundhw.h"
 #include "audio/audio.h"
 #include "hw/isa/isa.h"
 #include "hw/qdev.h"
diff --git a/hw/audio/es1370.c b/hw/audio/es1370.c
index fe64c1ac37..dd7c23d185 100644
--- a/hw/audio/es1370.c
+++ b/hw/audio/es1370.c
@@ -28,7 +28,7 @@
 
 #include "qemu/osdep.h"
 #include "hw/hw.h"
-#include "hw/audio/audio.h"
+#include "hw/audio/soundhw.h"
 #include "audio/audio.h"
 #include "hw/pci/pci.h"
 #include "sysemu/dma.h"
diff --git a/hw/audio/gus.c b/hw/audio/gus.c
index ec103a4db9..3e864cd36d 100644
--- a/hw/audio/gus.c
+++ b/hw/audio/gus.c
@@ -24,7 +24,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "hw/hw.h"
-#include "hw/audio/audio.h"
+#include "hw/audio/soundhw.h"
 #include "audio/audio.h"
 #include "hw/isa/isa.h"
 #include "gusemu.h"
diff --git a/hw/audio/intel-hda.c b/hw/audio/intel-hda.c
index 2c497eb174..06acc98f7b 100644
--- a/hw/audio/intel-hda.c
+++ b/hw/audio/intel-hda.c
@@ -22,7 +22,7 @@
 #include "hw/pci/pci.h"
 #include "hw/pci/msi.h"
 #include "qemu/timer.h"
-#include "hw/audio/audio.h"
+#include "hw/audio/soundhw.h"
 #include "intel-hda.h"
 #include "intel-hda-defs.h"
 #include "sysemu/dma.h"
diff --git a/hw/audio/pcspk.c b/hw/audio/pcspk.c
index 9b99358d87..f643b122bb 100644
--- a/hw/audio/pcspk.c
+++ b/hw/audio/pcspk.c
@@ -26,7 +26,7 @@
 #include "hw/hw.h"
 #include "hw/i386/pc.h"
 #include "hw/isa/isa.h"
-#include "hw/audio/audio.h"
+#include "hw/audio/soundhw.h"
 #include "audio/audio.h"
 #include "qemu/timer.h"
 #include "hw/timer/i8254.h"
diff --git a/hw/audio/sb16.c b/hw/audio/sb16.c
index 6b4427f242..6ab2f6f89a 100644
--- a/hw/audio/sb16.c
+++ b/hw/audio/sb16.c
@@ -23,7 +23,7 @@
  */
 #include "qemu/osdep.h"
 #include "hw/hw.h"
-#include "hw/audio/audio.h"
+#include "hw/audio/soundhw.h"
 #include "audio/audio.h"
 #include "hw/isa/isa.h"
 #include "hw/qdev.h"
diff --git a/hw/audio/soundhw.c b/hw/audio/soundhw.c
index 29565da93d..e698909d34 100644
--- a/hw/audio/soundhw.c
+++ b/hw/audio/soundhw.c
@@ -28,7 +28,7 @@
 #include "qom/object.h"
 #include "hw/isa/isa.h"
 #include "hw/pci/pci.h"
-#include "hw/audio/audio.h"
+#include "hw/audio/soundhw.h"
 
 struct soundhw {
 const char *name;
diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
index 4a7d2cfbe0..d16646c95d 100644
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c
@@ -36,7 +36,7 @@
 #include "hw/pci/pci_host.h"
 #include "hw/ppc/ppc.h"
 #include "hw/boards.h"
-#include "hw/audio/audio.h"
+#include "hw/audio/soundhw.h"
 #include "qemu/error-report.h"
 #include "qemu/l

[Qemu-devel] [PULL 1/3] audio: Move arch_init audio code to hw/audio/soundhw.c

2017-05-19 Thread Gerd Hoffmann

From: Eduardo Habkost 

There's no reason to keep the soundhw table in arch_init.c. Move
that code to a new hw/audio/soundhw.c file.

While moving the code, trivial coding style issues were fixed.

Signed-off-by: Eduardo Habkost 
Reviewed-by: David Gibson 
Reviewed-by: Thomas Huth 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 20170508205735.23444-2-ehabk...@redhat.com
Signed-off-by: Gerd Hoffmann 
---
 include/hw/audio/audio.h   |   3 +
 include/sysemu/arch_init.h |   2 -
 arch_init.c| 124 ---
 hw/audio/soundhw.c | 156 +
 hw/ppc/prep.c  |   1 +
 vl.c   |   1 +
 hw/audio/Makefile.objs |   2 +
 7 files changed, 163 insertions(+), 126 deletions(-)
 create mode 100644 hw/audio/soundhw.c

diff --git a/include/hw/audio/audio.h b/include/hw/audio/audio.h
index 55d40f71bf..259bb2cf96 100644
--- a/include/hw/audio/audio.h
+++ b/include/hw/audio/audio.h
@@ -7,4 +7,7 @@ void isa_register_soundhw(const char *name, const char *descr,
 void pci_register_soundhw(const char *name, const char *descr,
   int (*init_pci)(PCIBus *bus));
 
+void audio_init(void);
+void select_soundhw(const char *optarg);
+
 #endif
diff --git a/include/sysemu/arch_init.h b/include/sysemu/arch_init.h
index 2bf16b203c..8751c468ed 100644
--- a/include/sysemu/arch_init.h
+++ b/include/sysemu/arch_init.h
@@ -28,8 +28,6 @@ enum {
 
 extern const uint32_t arch_type;
 
-void select_soundhw(const char *optarg);
-void audio_init(void);
 int kvm_available(void);
 int xen_available(void);
 
diff --git a/arch_init.c b/arch_init.c
index 0810116144..74ca62f508 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -85,130 +85,6 @@ int graphic_depth = 32;
 
 const uint32_t arch_type = QEMU_ARCH;
 
-struct soundhw {
-const char *name;
-const char *descr;
-int enabled;
-int isa;
-union {
-int (*init_isa) (ISABus *bus);
-int (*init_pci) (PCIBus *bus);
-} init;
-};
-
-static struct soundhw soundhw[9];
-static int soundhw_count;
-
-void isa_register_soundhw(const char *name, const char *descr,
-  int (*init_isa)(ISABus *bus))
-{
-assert(soundhw_count < ARRAY_SIZE(soundhw) - 1);
-soundhw[soundhw_count].name = name;
-soundhw[soundhw_count].descr = descr;
-soundhw[soundhw_count].isa = 1;
-soundhw[soundhw_count].init.init_isa = init_isa;
-soundhw_count++;
-}
-
-void pci_register_soundhw(const char *name, const char *descr,
-  int (*init_pci)(PCIBus *bus))
-{
-assert(soundhw_count < ARRAY_SIZE(soundhw) - 1);
-soundhw[soundhw_count].name = name;
-soundhw[soundhw_count].descr = descr;
-soundhw[soundhw_count].isa = 0;
-soundhw[soundhw_count].init.init_pci = init_pci;
-soundhw_count++;
-}
-
-void select_soundhw(const char *optarg)
-{
-struct soundhw *c;
-
-if (is_help_option(optarg)) {
-show_valid_cards:
-
-if (soundhw_count) {
- printf("Valid sound card names (comma separated):\n");
- for (c = soundhw; c->name; ++c) {
- printf ("%-11s %s\n", c->name, c->descr);
- }
- printf("\n-soundhw all will enable all of the above\n");
-} else {
- printf("Machine has no user-selectable audio hardware "
-"(it may or may not have always-present audio 
hardware).\n");
-}
-exit(!is_help_option(optarg));
-}
-else {
-size_t l;
-const char *p;
-char *e;
-int bad_card = 0;
-
-if (!strcmp(optarg, "all")) {
-for (c = soundhw; c->name; ++c) {
-c->enabled = 1;
-}
-return;
-}
-
-p = optarg;
-while (*p) {
-e = strchr(p, ',');
-l = !e ? strlen(p) : (size_t) (e - p);
-
-for (c = soundhw; c->name; ++c) {
-if (!strncmp(c->name, p, l) && !c->name[l]) {
-c->enabled = 1;
-break;
-}
-}
-
-if (!c->name) {
-if (l > 80) {
-error_report("Unknown sound card name (too big to show)");
-}
-else {
-error_report("Unknown sound card name `%.*s'",
- (int) l, p);
-}
-bad_card = 1;
-}
-p += l + (e != NULL);
-}
-
-if (bad_card) {
-goto show_valid_cards;
-}
-}
-}
-
-void audio_init(void)
-{
-struct soundhw *c;
-ISABus *isa_bus = (ISABus *) object_resolve_path_type("", TYPE_ISA_BUS, 
NULL);
-PCIBus *pci_bus = (PCIBus *) object_resolve_path_type("", TYPE_PCI_BUS, 
NULL);
-
-for (c = soundhw; c->name; ++c) {
-if (c->enabled) {
-if (c->isa) {
-if (!isa_bus) {
-error_repo

[Qemu-devel] [Bug 1511887] Re: USB device 1.1 not correctly passedthru from Linux host to Windows guest

2017-05-19 Thread Jiri Cejka

Currently I can reproduce this bug with 2.8+dfsg-3~bpo8+1 version
because I'm using stable Debian GNU/Linux. I'll try install latest qemu
test it and report back here.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1511887

Title:
  USB device 1.1 not correctly passedthru from Linux host to Windows
  guest

Status in QEMU:
  Incomplete

Bug description:
  I have USB Digital Oscilloscope which works great on pure Windows
  machine but not work on virtualized one. I tried passthru the device
  from my Debian Jessie (64bit) host machine to Windows 7 (32bit) guest
  machine but unfortunately it does not work very well. It looks that
  device is passed thru so Windows machine knows about new device and
  loads HID device driver for it but the device driver failed to start
  the device and details of an error provided by device manager is "This
  device cannot start" Code 10.

  Installed Qemu version: 2.1+dfsg-12+deb8u4 0

  USB device spec: Dynon Instruments ELAB-080, USB 1.1

  On linux host computer
  ---
  lsusb identify it as:
  Bus 003 Device 009: ID 13a3:0001 

  lsusb -t identify it as:
  /: Bus 03.Port 1: Dev 1, Class=root_hub, Driver=uhci_hcd/2p, 12M
  |__ Port 1: Dev 9, If 0, Class=Human Interface Device, Driver=usbhid, 12M

  This is how I started my Windows guest machine
  --
  kvm -cpu host \
  -m 2048MiB \
  -hda test.vdi \
  -ctrl-grab \
  -parallel /dev/parport0 \
  -usbdevice host:13a3:0001

  ...also instead of last line I tried this one:
  -device usb-host,vendorid=0x13a3,productid=0x0001

  none of them help to properly handle my device inside guest machine.

  Only one time the Windows guest machine properly start the device so
  software for that oscilloscope can identify the Oscilloscope and
  measure for a while but unfortunately after I guess 5 seconds of
  measurement the device was disconnected from Windows and never start
  working again even after couple of restarts of guest machine even
  after plug and unplug it's USB cable and power cable.

  I searched for a solution or some clues to get it work but none of my
  searching over the internet was successful. Because device works on
  pure Windows but not work on virtualized one, I think there is a
  problem with handling not standard USB devices (like sticks,
  keyboards, mouses etc.)

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1511887/+subscriptions

[Qemu-devel] [Bug 1635339] Re: qxl_pre_save assertion failure on vm "save"

2017-05-19 Thread Frediano Ziglio

wddm dod 0.17 version released which fixes the issue guest side.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1635339

Title:
  qxl_pre_save assertion failure on vm "save"

Status in QEMU:
  Confirmed

Bug description:
  When I try and save my Windows 10 VM, I see an assertion failure, and
  the machine is shut down.

  I see the following in the log:

  main_channel_handle_parsed: agent start
  qemu-system-x86_64: /build/qemu-Zwynhi/qemu-2.5+dfsg/hw/display/qxl.c:2101: 
qxl_pre_save: Assertion `d->last_release_offset < d->vga.vram_size' failed.
  2016-10-20 11:52:42.713+: shutting down

  Please let me know what other information would be relevant!

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1635339/+subscriptions

Re: [Qemu-devel] [RFC PATCH V2 2/2] msi: Handle remappable format interrupt request

2017-05-19 Thread Anthony PERARD

On Thu, May 18, 2017 at 01:33:00AM -0400, Lan Tianyu wrote:
> From: Chao Gao 
> 
> According to VT-d spec Interrupt Remapping and Interrupt Posting ->
> Interrupt Remapping -> Interrupt Request Formats On Intel 64
> Platforms, fields of MSI data register have changed. This patch
> avoids wrongly regarding a remappable format interrupt request as
> an interrupt binded with an event channel.
> 
> Signed-off-by: Chao Gao 
> Signed-off-by: Lan Tianyu 
> ---
>  hw/pci/msi.c | 5 +++--
>  hw/pci/msix.c| 4 +++-
>  hw/xen/xen_pt_msi.c  | 2 +-
>  include/hw/xen/xen.h | 2 +-
>  xen-hvm-stub.c   | 2 +-
>  xen-hvm.c| 7 ++-
>  6 files changed, 15 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/pci/msi.c b/hw/pci/msi.c
> index a87b227..199cb47 100644
> --- a/hw/pci/msi.c
> +++ b/hw/pci/msi.c
> @@ -289,7 +289,7 @@ void msi_reset(PCIDevice *dev)
>  static bool msi_is_masked(const PCIDevice *dev, unsigned int vector)
>  {
>  uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
> -uint32_t mask, data;
> +uint32_t mask, data, addr_lo;
>  bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
>  assert(vector < PCI_MSI_VECTORS_MAX);
>  
> @@ -298,7 +298,8 @@ static bool msi_is_masked(const PCIDevice *dev, unsigned 
> int vector)
>  }
>  
>  data = pci_get_word(dev->config + msi_data_off(dev, msi64bit));
> -if (xen_is_pirq_msi(data)) {
> +addr_lo = pci_get_long(dev->config + msi_address_lo_off(dev));
> +if (xen_is_pirq_msi(data, addr_lo)) {
>  return false;
>  }
>  
> diff --git a/hw/pci/msix.c b/hw/pci/msix.c
> index bb54e8b..efe2982 100644
> --- a/hw/pci/msix.c
> +++ b/hw/pci/msix.c
> @@ -82,9 +82,11 @@ static bool msix_vector_masked(PCIDevice *dev, unsigned 
> int vector, bool fmask)
>  {
>  unsigned offset = vector * PCI_MSIX_ENTRY_SIZE;
>  uint8_t *data = &dev->msix_table[offset + PCI_MSIX_ENTRY_DATA];
> +uint8_t *addr_lo = &dev->msix_table[offset + PCI_MSIX_ENTRY_LOWER_ADDR];
>  /* MSIs on Xen can be remapped into pirqs. In those cases, masking
>   * and unmasking go through the PV evtchn path. */
> -if (xen_enabled() && xen_is_pirq_msi(pci_get_long(data))) {
> +if (xen_enabled() && xen_is_pirq_msi(pci_get_long(data),
> + pci_get_long(addr_lo))) {
>  return false;
>  }
>  return fmask || dev->msix_table[offset + PCI_MSIX_ENTRY_VECTOR_CTRL] &
> diff --git a/hw/xen/xen_pt_msi.c b/hw/xen/xen_pt_msi.c
> index 5fab95e..45a9e9f 100644
> --- a/hw/xen/xen_pt_msi.c
> +++ b/hw/xen/xen_pt_msi.c
> @@ -114,7 +114,7 @@ static int msi_msix_setup(XenPCIPassthroughState *s,
>  
>  assert((!is_msix && msix_entry == 0) || is_msix);
>  
> -if (xen_is_pirq_msi(data)) {
> +if (xen_is_pirq_msi(data, addr)) {
>  *ppirq = msi_ext_dest_id(addr >> 32) | msi_dest_id(addr);
>  if (!*ppirq) {
>  /* this probably identifies an misconfiguration of the guest,
> diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h
> index 09c2ce5..af759bc 100644
> --- a/include/hw/xen/xen.h
> +++ b/include/hw/xen/xen.h
> @@ -33,7 +33,7 @@ int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num);
>  void xen_piix3_set_irq(void *opaque, int irq_num, int level);
>  void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int 
> len);
>  void xen_hvm_inject_msi(uint64_t addr, uint32_t data);
> -int xen_is_pirq_msi(uint32_t msi_data);
> +int xen_is_pirq_msi(uint32_t msi_data, uint32_t msi_addr_lo);

Maybe inverting the arguments would be better, so the arguments would be
the address first, then the data, like I think it is often the case.
What do you think?

>  
>  qemu_irq *xen_interrupt_controller_init(void);
>  
> diff --git a/xen-hvm-stub.c b/xen-hvm-stub.c
> index c500325..dae421c 100644
> --- a/xen-hvm-stub.c
> +++ b/xen-hvm-stub.c
> @@ -31,7 +31,7 @@ void xen_hvm_inject_msi(uint64_t addr, uint32_t data)
>  {
>  }
>  
> -int xen_is_pirq_msi(uint32_t msi_data)
> +int xen_is_pirq_msi(uint32_t msi_data, uint32_t msi_addr_lo)
>  {
>  return 0;
>  }
> diff --git a/xen-hvm.c b/xen-hvm.c
> index 5043beb..db29121 100644
> --- a/xen-hvm.c
> +++ b/xen-hvm.c
> @@ -146,8 +146,13 @@ void xen_piix_pci_write_config_client(uint32_t address, 
> uint32_t val, int len)
>  }
>  }
>  
> -int xen_is_pirq_msi(uint32_t msi_data)
> +int xen_is_pirq_msi(uint32_t msi_data, uint32_t msi_addr_lo)
>  {
> +/* If msi address is configurate to remapping format, the msi will not
> + * remapped into a pirq.

What do you think of: "If the MSI address is configured in remappable
format, the MSI will not be remapped into a pirq." ?

> + */
> +if (msi_addr_lo & MSI_ADDR_IF_MASK)
> +return 0;
>  /* If vector is 0, the msi is remapped into a pirq, passed as
>   * dest_id.
>   */

Thanks,

-- 
Anthony PERARD

Re: [Qemu-devel] [Xen-devel] [RFC PATCH V2 1/2] xen-pt: bind/unbind interrupt remapping format MSI

2017-05-19 Thread Jan Beulich

>>> On 19.05.17 at 13:16,  wrote:
> On Thu, May 18, 2017 at 01:32:59AM -0400, Lan Tianyu wrote:
>> --- a/include/hw/i386/apic-msidef.h
>> +++ b/include/hw/i386/apic-msidef.h
>> @@ -26,6 +26,7 @@
>>  
>>  #define MSI_ADDR_DEST_ID_SHIFT  12
>>  #define MSI_ADDR_DEST_IDX_SHIFT 4
>> -#define  MSI_ADDR_DEST_ID_MASK  0x000
>> +#define  MSI_ADDR_DEST_ID_MASK  0x000fff00
> 
> The value of MSI_ADDR_DEST_ID_MASK is changed here. I think the patch
> should be:
> +#define  MSI_ADDR_DEST_ID_MASK  0x0000

Judging from other sources, rather the other way around - the
mask needs to have further bits removed (should be 0x000ff000
afaict). Xen sources confirm this, and while Linux has the value
you suggest, that contradicts

#define MSI_ADDR_DEST_ID_SHIFT  12
#define  MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \
 MSI_ADDR_DEST_ID_MASK)

as well as

#define MSI_ADDR_EXT_DEST_ID(dest)  ((dest) & 0xff00)

chopping off just the low 8 bits.

Jan

Re: [Qemu-devel] [PATCH 1/1] s390x/css: catch section mismatch on load

2017-05-19 Thread Dr. David Alan Gilbert

* Halil Pasic (pa...@linux.vnet.ibm.com) wrote:
> 
> 
> On 05/18/2017 07:47 PM, Dr. David Alan Gilbert wrote:
> >> Hi!
> >>
> >> I also wonder what is the best way to do this with vmstate.  I know there
> >> are VMSTATE_*_EQUAL macros for integers, and I have partially modelled my
> >> patch after that, but there we only get a != b as error message, which is
> >> satisfactory for detecting bugs which are supposed to get fixed. In this
> >> particular case having a verbose error message should be really helpful
> >> and thus important.
> >>
> >> I'm asking because I'm currently working on a vmstate conversion of the
> >> s390x css and virtio-ccw  stuff (find my latest patch set here
> >> https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg01364.html).
> > I think the way to solve that problem will probably be adding a 'hint'
> > parameter to the VMSTATE_*_EQUAL macros that is a constant string,
> > stuff a pointer to that into a possibly new field in VMStateField,
> > and then make the get_*_equal functions include that string in the
> > message like you do.  There's a lot of copy and paste but it's
> > not too bad now that Jianjun's patch from a few months ago passed
> > the VMStateField* to the .get/.put.
> > 
> > Dave
> > 
> > 
> 
> Thanks Dave! I read your reply like you are seeing this verbose
> message if VMSTATE_*_EQUAL feature something worth of inclusion.
> Am I right? 

Yes.

> If yes, I'm willing to implement it.

Please do!

Dave

> Halil
> 
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

[Qemu-devel] [PATCH] ehci: fix frame timer invocation.

2017-05-19 Thread Gerd Hoffmann

ehci registers ehci_frame_timer as both timer and bottom half, which
turned out to be a bad idea as it can be called as bottom half then
while it is running as timer, and it isn't prepared to handle recursive
calls.

Change the timer func to just schedule the bottom half to avoid this.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1449609
Signed-off-by: Gerd Hoffmann 
---
 hw/usb/hcd-ehci.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 50ef817f93..3703a8dddc 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -2232,7 +2232,7 @@ static void ehci_update_frindex(EHCIState *ehci, int 
uframes)
 ehci->frindex = (ehci->frindex + uframes) % 0x4000;
 }
 
-static void ehci_frame_timer(void *opaque)
+static void ehci_work_bh(void *opaque)
 {
 EHCIState *ehci = opaque;
 int need_timer = 0;
@@ -2324,6 +2324,13 @@ static void ehci_frame_timer(void *opaque)
 }
 }
 
+static void ehci_work_timer(void *opaque)
+{
+EHCIState *ehci = opaque;
+
+qemu_bh_schedule(ehci->async_bh);
+}
+
 static const MemoryRegionOps ehci_mmio_caps_ops = {
 .read = ehci_caps_read,
 .write = ehci_caps_write,
@@ -2478,8 +2485,8 @@ void usb_ehci_realize(EHCIState *s, DeviceState *dev, 
Error **errp)
 s->ports[i].dev = 0;
 }
 
-s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_frame_timer, s);
-s->async_bh = qemu_bh_new(ehci_frame_timer, s);
+s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_work_timer, s);
+s->async_bh = qemu_bh_new(ehci_work_bh, s);
 s->device = dev;
 
 s->vmstate = qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s);
-- 
2.9.3

Re: [Qemu-devel] [PULL 00/20] Misc patches for 2017-05-19

2017-05-19 Thread no-reply

Hi,

This series seems to have some coding style problems. See output below for
more information:

Message-id: 1495192872-27667-1-git-send-email-pbonz...@redhat.com
Type: series
Subject: [Qemu-devel] [PULL 00/20] Misc patches for 2017-05-19

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

git config --local diff.renamelimit 0
git config --local diff.renames True

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
6fce4cd target/i386: use multiple CPU AddressSpaces
a04ba9d target/i386: enable A20 automatically in system management mode
87c38d5 vhost-user-scsi: Introduce a vhost-user-scsi sample application
a63728e vhost-user-scsi: Introduce vhost-user-scsi host device
bda4194 virtio-scsi: Unset hotplug handler when unrealize
ca14443 exec: simplify phys_page_find() params
7eee4fd nbd/client.c: use errp instead of LOG
388beda nbd: add errp to read_sync, write_sync and drop_sync
0032273 nbd: add errp parameter to nbd_wr_syncv()
bdf25c9 nbd: read_sync and friends: return 0 on success
b61d7d1 nbd: strict nbd_wr_syncv
cc100d3 Check the return value of fcntl in qemu_set_cloexec
94297c6 kvm: irqchip: skip update msi when disabled
f8f04f1 msix: trace control bit write op
11bfe30 kvm: irqchip: trace changes on msi add/remove
192c432 mc146818rtc: embrace all x86 specific code
6e1b003 mc146818rtc: drop unnecessary '#ifdef TARGET_I386'
cb9a45b mc146818rtc: ensure LOST_TICK_POLICY_SLEW is only enabled on TARGET_I386
98a508b mc146818rtc: precisely count the clock for periodic timer
b9744f3 mc146818rtc: update periodic timer only if it is needed

=== OUTPUT BEGIN ===
Checking PATCH 1/20: mc146818rtc: update periodic timer only if it is needed...
Checking PATCH 2/20: mc146818rtc: precisely count the clock for periodic 
timer...
ERROR: braces {} are necessary for all arms of this statement
#129: FILE: hw/timer/mc146818rtc.c:216:
+if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
[...]
+} else
[...]

total: 1 errors, 0 warnings, 181 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

Checking PATCH 3/20: mc146818rtc: ensure LOST_TICK_POLICY_SLEW is only enabled 
on TARGET_I386...
Checking PATCH 4/20: mc146818rtc: drop unnecessary '#ifdef TARGET_I386'...
Checking PATCH 5/20: mc146818rtc: embrace all x86 specific code...
Checking PATCH 6/20: kvm: irqchip: trace changes on msi add/remove...
Checking PATCH 7/20: msix: trace control bit write op...
Checking PATCH 8/20: kvm: irqchip: skip update msi when disabled...
Checking PATCH 9/20: Check the return value of fcntl in qemu_set_cloexec...
Checking PATCH 10/20: nbd: strict nbd_wr_syncv...
Checking PATCH 11/20: nbd: read_sync and friends: return 0 on success...
Checking PATCH 12/20: nbd: add errp parameter to nbd_wr_syncv()...
Checking PATCH 13/20: nbd: add errp to read_sync, write_sync and drop_sync...
Checking PATCH 14/20: nbd/client.c: use errp instead of LOG...
ERROR: code indent should never use tabs
#126: FILE: nbd/client.c:729:
+^I Error **errp)$

total: 1 errors, 0 warnings, 146 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

Checking PATCH 15/20: exec: simplify phys_page_find() params...
Checking PATCH 16/20: virtio-scsi: Unset hotplug handler when unrealize...
Checking PATCH 17/20: vhost-user-scsi: Introduce vhost-user-scsi host device...
ERROR: do not use C99 // comments
#216: FILE: hw/scsi/vhost-user-scsi.c:145:
+// Turn on predefined features supported by this device

ERROR: do not use C99 // comments
#261: FILE: hw/scsi/vhost-user-scsi.c:190:
+// Add the bootindex property for this object

ERROR: do not use C99 // comments
#265: FILE: hw/scsi/vhost-user-scsi.c:194:
+// Set boot index according the the device config

total: 3 errors, 0 warnings, 382 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

Checking PATCH 18/20: vhost-user-scsi: Introduce a vhost-user-scsi sample 
application...
ERROR: do not use C99 // comments
#109: FILE: contrib/vhost-user-scsi/vhost-user-scsi.c:28:
+//#define VUS_DEBUG 1

ERROR: spaces required around that '/' (ctx:VxV)
#123: FILE: contrib/vhost-user-scsi/vhost-user-scsi.c:42:
+timebuf, ts.tv_nsec/1000, \
^

ERROR: __func__ should be used instead of gcc specific __FUNCTION__
#124: FI

[Qemu-devel] [PATCH v2 5/9] char: forbid direct chardevice access for hotswap devices

2017-05-19 Thread Anton Nefedov

qemu_chr_fe_get_driver() is unsafe, frontends with hotswap support
should not access CharDriver ptr directly as CharDriver might change.

Signed-off-by: Anton Nefedov 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 chardev/char.c|  7 +++
 include/sysemu/char.h | 10 ++
 2 files changed, 17 insertions(+)

diff --git a/chardev/char.c b/chardev/char.c
index 0483f19..36d6f36 100644
--- a/chardev/char.c
+++ b/chardev/char.c
@@ -484,9 +484,16 @@ static Notifier muxes_realize_notify = {
 
 Chardev *qemu_chr_fe_get_driver(CharBackend *be)
 {
+/* this is unsafe for the users that support chardev hotswap */
+assert(be->chr_be_change == NULL);
 return be->chr;
 }
 
+bool qemu_chr_fe_backend_connected(CharBackend *be)
+{
+return !!be->chr;
+}
+
 static bool fe_connect(CharBackend *b, Chardev *s, Error **errp)
 {
 int tag = 0;
diff --git a/include/sysemu/char.h b/include/sysemu/char.h
index 92ae57e..fa21535 100644
--- a/include/sysemu/char.h
+++ b/include/sysemu/char.h
@@ -404,10 +404,20 @@ bool qemu_chr_fe_init(CharBackend *b, Chardev *s, Error 
**errp);
  *
  * Returns the driver associated with a CharBackend or NULL if no
  * associated Chardev.
+ * Note: avoid this function as the driver should never be accessed directly,
+ *   especially by the frontends that support chardevice hotswap.
+ *   Consider qemu_chr_fe_backend_connected() to check for driver existence
  */
 Chardev *qemu_chr_fe_get_driver(CharBackend *be);
 
 /**
+ * @qemu_chr_fe_backend_connected:
+ *
+ * Returns true if there is a chardevice associated with @be.
+ */
+bool qemu_chr_fe_backend_connected(CharBackend *be);
+
+/**
  * @qemu_chr_fe_deinit:
  *
  * Dissociate the CharBackend from the Chardev.
-- 
2.7.4

[Qemu-devel] [PATCH v2 0/9] chardevice hotswap

2017-05-19 Thread Anton Nefedov

This serie is a v2 of the February submit
http://lists.nongnu.org/archive/html/qemu-devel/2017-02/msg01989.html

The interface is changed as requested and the changes are slightly reworked
and split into separate patches.



The patchset adds support of the character device change without
a frontend device removal.
Yet isa-serial and virtio-serial frontends are supported.

The feature can be helpful for e.g. Windows debug allowing to
establish connection to a live VM from VM with WinDbg.

Anton Nefedov (9):
  char: move QemuOpts->ChardevBackend translation to a separate func
  char: add backend hotswap handler
  char: chardevice hotswap
  hmp: add hmp analogue for qmp-chardev-change
  char: forbid direct chardevice access for hotswap devices
  virtio-console: chardev hotswap support
  serial: move TIOCM update to a separate function
  serial: chardev hotswap support
  char: avoid chardevice direct access

 backends/rng-egd.c  |   2 +-
 chardev/char-mux.c  |   1 +
 chardev/char.c  | 235 +---
 gdbstub.c   |   4 +-
 hmp-commands.hx |  16 +++
 hmp.c   |  34 +++
 hmp.h   |   1 +
 hw/arm/pxa2xx.c |   3 +-
 hw/arm/strongarm.c  |   4 +-
 hw/char/bcm2835_aux.c   |   2 +-
 hw/char/cadence_uart.c  |   4 +-
 hw/char/debugcon.c  |   4 +-
 hw/char/digic-uart.c|   2 +-
 hw/char/escc.c  |   8 +-
 hw/char/etraxfs_ser.c   |   2 +-
 hw/char/exynos4210_uart.c   |   4 +-
 hw/char/grlib_apbuart.c |   4 +-
 hw/char/imx_serial.c|   2 +-
 hw/char/ipoctal232.c|   4 +-
 hw/char/lm32_juart.c|   2 +-
 hw/char/lm32_uart.c |   2 +-
 hw/char/mcf_uart.c  |   2 +-
 hw/char/milkymist-uart.c|   2 +-
 hw/char/parallel.c  |   2 +-
 hw/char/pl011.c |   2 +-
 hw/char/sclpconsole-lm.c|   4 +-
 hw/char/sclpconsole.c   |   4 +-
 hw/char/serial.c|  63 +---
 hw/char/sh_serial.c |   4 +-
 hw/char/spapr_vty.c |   4 +-
 hw/char/stm32f2xx_usart.c   |   3 +-
 hw/char/terminal3270.c  |   4 +-
 hw/char/virtio-console.c|  35 ++-
 hw/char/xen_console.c   |   4 +-
 hw/char/xilinx_uartlite.c   |   2 +-
 hw/ipmi/ipmi_bmc_extern.c   |   4 +-
 hw/mips/boston.c|   2 +-
 hw/mips/mips_malta.c|   2 +-
 hw/misc/ivshmem.c   |   6 +-
 hw/usb/ccid-card-passthru.c |   6 +-
 hw/usb/dev-serial.c |   7 +-
 hw/usb/redirect.c   |   7 +-
 include/sysemu/char.h   |  44 +
 monitor.c   |   4 +-
 net/colo-compare.c  |  14 +--
 net/filter-mirror.c |   8 +-
 net/slirp.c |   2 +-
 net/vhost-user.c|   7 +-
 qapi-schema.json|  40 
 qtest.c |   2 +-
 tests/test-char.c   |  14 ++-
 tests/vhost-user-test.c |   2 +-
 52 files changed, 506 insertions(+), 140 deletions(-)

-- 
2.7.4

[Qemu-devel] [PATCH v2 8/9] serial: chardev hotswap support

2017-05-19 Thread Anton Nefedov

for a backend change, a number of ioctls has to be replayed to sync
the current setup of a frontend to a backend tty. This is hopefully
enough so we don't have to track, store and replay the whole original
control byte sequence.

Signed-off-by: Anton Nefedov 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
CC: Michael S. Tsirkin 
CC: Paolo Bonzini 
---
 hw/char/serial.c | 32 ++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/hw/char/serial.c b/hw/char/serial.c
index 1e6bdeb..ed01637 100644
--- a/hw/char/serial.c
+++ b/hw/char/serial.c
@@ -891,9 +891,37 @@ static void serial_reset(void *opaque)
 s->msr &= ~UART_MSR_ANY_DELTA;
 }
 
+static int serial_be_change(void *opaque)
+{
+SerialState *s = opaque;
+
+qemu_chr_fe_set_handlers(&s->chr, serial_can_receive1, serial_receive1,
+ serial_event, serial_be_change, s, NULL, true);
+
+serial_update_parameters(s);
+
+qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_SERIAL_SET_BREAK,
+  &s->last_break_enable);
+
+s->poll_msl = (s->ier & UART_IER_MSI) ? 1 : 0;
+serial_update_msl(s);
+
+if (s->poll_msl >= 0 && !(s->mcr & UART_MCR_LOOP)) {
+serial_update_tiocm(s);
+}
+
+if (s->watch_tag > 0) {
+g_source_remove(s->watch_tag);
+s->watch_tag = qemu_chr_fe_add_watch(&s->chr, G_IO_OUT | G_IO_HUP,
+ serial_watch_cb, s);
+}
+
+return 0;
+}
+
 void serial_realize_core(SerialState *s, Error **errp)
 {
-if (!qemu_chr_fe_get_driver(&s->chr)) {
+if (!qemu_chr_fe_backend_connected(&s->chr)) {
 error_setg(errp, "Can't create serial device, empty char device");
 return;
 }
@@ -904,7 +932,7 @@ void serial_realize_core(SerialState *s, Error **errp)
 qemu_register_reset(serial_reset, s);
 
 qemu_chr_fe_set_handlers(&s->chr, serial_can_receive1, serial_receive1,
- serial_event, NULL, s, NULL, true);
+ serial_event, serial_be_change, s, NULL, true);
 fifo8_create(&s->recv_fifo, UART_FIFO_LENGTH);
 fifo8_create(&s->xmit_fifo, UART_FIFO_LENGTH);
 serial_reset(s);
-- 
2.7.4

[Qemu-devel] [PATCH v2 1/9] char: move QemuOpts->ChardevBackend translation to a separate func

2017-05-19 Thread Anton Nefedov

Signed-off-by: Anton Nefedov 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 chardev/char.c | 72 +-
 1 file changed, 46 insertions(+), 26 deletions(-)

diff --git a/chardev/char.c b/chardev/char.c
index 4e24dc3..684cccd 100644
--- a/chardev/char.c
+++ b/chardev/char.c
@@ -854,17 +854,14 @@ help_string_append(const char *name, void *opaque)
 g_string_append_printf(str, "\n%s", name);
 }
 
-Chardev *qemu_chr_new_from_opts(QemuOpts *opts,
-Error **errp)
+static ChardevBackend *qemu_chr_parse_opts(QemuOpts *opts,
+   Error **errp)
 {
 Error *local_err = NULL;
 const ChardevClass *cc;
-Chardev *chr;
 int i;
 ChardevBackend *backend = NULL;
 const char *name = qemu_opt_get(opts, "backend");
-const char *id = qemu_opts_id(opts);
-char *bid = NULL;
 
 if (name == NULL) {
 error_setg(errp, "chardev: \"%s\" missing backend",
@@ -872,21 +869,6 @@ Chardev *qemu_chr_new_from_opts(QemuOpts *opts,
 return NULL;
 }
 
-if (is_help_option(name)) {
-GString *str = g_string_new("");
-
-chardev_name_foreach(help_string_append, str);
-
-error_report("Available chardev backend types: %s", str->str);
-g_string_free(str, true);
-exit(0);
-}
-
-if (id == NULL) {
-error_setg(errp, "chardev: no id specified");
-return NULL;
-}
-
 for (i = 0; i < ARRAY_SIZE(chardev_alias_table); i++) {
 if (g_strcmp0(chardev_alias_table[i].alias, name) == 0) {
 name = chardev_alias_table[i].typename;
@@ -902,16 +884,12 @@ Chardev *qemu_chr_new_from_opts(QemuOpts *opts,
 backend = g_new0(ChardevBackend, 1);
 backend->type = CHARDEV_BACKEND_KIND_NULL;
 
-if (qemu_opt_get_bool(opts, "mux", 0)) {
-bid = g_strdup_printf("%s-base", id);
-}
-
-chr = NULL;
 if (cc->parse) {
 cc->parse(opts, backend, &local_err);
 if (local_err) {
 error_propagate(errp, local_err);
-goto out;
+qapi_free_ChardevBackend(backend);
+return NULL;
 }
 } else {
 ChardevCommon *ccom = g_new0(ChardevCommon, 1);
@@ -919,6 +897,48 @@ Chardev *qemu_chr_new_from_opts(QemuOpts *opts,
 backend->u.null.data = ccom; /* Any ChardevCommon member would work */
 }
 
+return backend;
+}
+
+Chardev *qemu_chr_new_from_opts(QemuOpts *opts,
+Error **errp)
+{
+const ChardevClass *cc;
+Chardev *chr = NULL;
+ChardevBackend *backend = NULL;
+const char *name = qemu_opt_get(opts, "backend");
+const char *id = qemu_opts_id(opts);
+char *bid = NULL;
+
+if (name && is_help_option(name)) {
+GString *str = g_string_new("");
+
+chardev_name_foreach(help_string_append, str);
+
+error_report("Available chardev backend types: %s", str->str);
+g_string_free(str, true);
+exit(0);
+}
+
+if (id == NULL) {
+error_setg(errp, "chardev: no id specified");
+return NULL;
+}
+
+backend = qemu_chr_parse_opts(opts, errp);
+if (backend == NULL) {
+return NULL;
+}
+
+cc = char_get_class(name, errp);
+if (cc == NULL) {
+goto out;
+}
+
+if (qemu_opt_get_bool(opts, "mux", 0)) {
+bid = g_strdup_printf("%s-base", id);
+}
+
 chr = qemu_chardev_new(bid ? bid : id,
object_class_get_name(OBJECT_CLASS(cc)),
backend, errp);
-- 
2.7.4

[Qemu-devel] [PATCH v2 3/9] char: chardevice hotswap

2017-05-19 Thread Anton Nefedov

This patch adds a possibility to change a char device without a frontend
removal.

1. Ideally, it would have to happen transparently to a frontend, i.e.
frontend would continue its regular operation.
However, backends are not stateless and are set up by the frontends
via qemu_chr_fe_<> functions, and it's not (generally) possible to replay
that setup entirely in a backend code, as different chardevs respond
to the setup calls differently, so do frontends work differently basing
on those setup responses.
Moreover, some frontend can generally get and save the backend pointer
(qemu_chr_fe_get_driver()), and it will become invalid after backend change.

So, a frontend which would like to support chardev hotswap has to register
a "backend change" handler, and redo its backend setup there.

2. Write path can be used by multiple threads and thus protected with
chr_write_lock.
So hotswap also has to be protected so write functions won't access
a backend being replaced.

3. Hotswap function can be called from e.g. a read handler of a monitor
socket. This can cause troubles so it's safer to defer execution to
a bottom-half (however, it means we cannot return some of the errors
synchronously - but most of them we can)

Signed-off-by: Anton Nefedov 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 chardev/char.c| 147 ++
 include/sysemu/char.h |  10 
 qapi-schema.json  |  40 ++
 3 files changed, 187 insertions(+), 10 deletions(-)

diff --git a/chardev/char.c b/chardev/char.c
index ae60950..bac5e1c 100644
--- a/chardev/char.c
+++ b/chardev/char.c
@@ -132,12 +132,16 @@ static bool qemu_chr_replay(Chardev *chr)
 
 int qemu_chr_fe_write(CharBackend *be, const uint8_t *buf, int len)
 {
-Chardev *s = be->chr;
+Chardev *s;
 ChardevClass *cc;
 int ret;
 
+qemu_mutex_lock(&be->chr_lock);
+s = be->chr;
+
 if (!s) {
-return 0;
+ret = 0;
+goto end;
 }
 
 if (qemu_chr_replay(s) && replay_mode == REPLAY_MODE_PLAY) {
@@ -145,7 +149,7 @@ int qemu_chr_fe_write(CharBackend *be, const uint8_t *buf, 
int len)
 replay_char_write_event_load(&ret, &offset);
 assert(offset <= len);
 qemu_chr_fe_write_buffer(s, buf, offset, &offset);
-return ret;
+goto end;
 }
 
 cc = CHARDEV_GET_CLASS(s);
@@ -161,7 +165,9 @@ int qemu_chr_fe_write(CharBackend *be, const uint8_t *buf, 
int len)
 if (qemu_chr_replay(s) && replay_mode == REPLAY_MODE_RECORD) {
 replay_char_write_event_save(ret, ret < 0 ? 0 : ret);
 }
-
+
+end:
+qemu_mutex_unlock(&be->chr_lock);
 return ret;
 }
 
@@ -191,13 +197,16 @@ int qemu_chr_write_all(Chardev *s, const uint8_t *buf, 
int len)
 
 int qemu_chr_fe_write_all(CharBackend *be, const uint8_t *buf, int len)
 {
-Chardev *s = be->chr;
+Chardev *s;
+int ret;
 
-if (!s) {
-return 0;
-}
+qemu_mutex_lock(&be->chr_lock);
 
-return qemu_chr_write_all(s, buf, len);
+s = be->chr;
+ret = s ? qemu_chr_write_all(s, buf, len) : 0;
+
+qemu_mutex_unlock(&be->chr_lock);
+return ret;
 }
 
 int qemu_chr_fe_read_all(CharBackend *be, uint8_t *buf, int len)
@@ -478,7 +487,7 @@ Chardev *qemu_chr_fe_get_driver(CharBackend *be)
 return be->chr;
 }
 
-bool qemu_chr_fe_init(CharBackend *b, Chardev *s, Error **errp)
+static bool fe_connect(CharBackend *b, Chardev *s, Error **errp)
 {
 int tag = 0;
 
@@ -507,6 +516,17 @@ unavailable:
 return false;
 }
 
+bool qemu_chr_fe_init(CharBackend *b, Chardev *s, Error **errp)
+{
+if (!fe_connect(b, s, errp)) {
+return false;
+}
+
+qemu_mutex_init(&b->chr_lock);
+b->hotswap_bh = NULL;
+return true;
+}
+
 static bool qemu_chr_is_busy(Chardev *s)
 {
 if (CHARDEV_IS_MUX(s)) {
@@ -531,6 +551,10 @@ void qemu_chr_fe_deinit(CharBackend *b)
 d->backends[b->tag] = NULL;
 }
 b->chr = NULL;
+qemu_mutex_destroy(&b->chr_lock);
+if (b->hotswap_bh) {
+qemu_bh_delete(b->hotswap_bh);
+}
 }
 }
 
@@ -1308,6 +1332,109 @@ ChardevReturn *qmp_chardev_add(const char *id, 
ChardevBackend *backend,
 return ret;
 }
 
+static void chardev_change_bh(void *opaque)
+{
+Chardev *chr_new = opaque;
+const char *id = chr_new->label;
+Chardev *chr = qemu_chr_find(id);
+CharBackend *be = chr->be;
+bool closed_sent = false;
+
+if (!be) {
+/* disconnected since we checked: ok, less work for us */
+goto end;
+}
+
+if (chr->be_open && !chr_new->be_open) {
+qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
+closed_sent = true;
+}
+
+qemu_mutex_lock(&be->chr_lock);
+chr->be = NULL;
+fe_connect(be, chr_new, &error_abort);
+
+if (be->chr_be_change(be->opaque) < 0) {
+error_report("Chardev '%s' change failed", id);
+fe_connect(be, chr, &error_abort);
+qemu_mutex_unlock(&be->chr_lock);
+if

[Qemu-devel] [PATCH v2 2/9] char: add backend hotswap handler

2017-05-19 Thread Anton Nefedov

Frontends should have an interface to setup the handler of a backend change.
The interface will be used in the next commits

Signed-off-by: Anton Nefedov 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 backends/rng-egd.c  |  2 +-
 chardev/char-mux.c  |  1 +
 chardev/char.c  |  4 +++-
 gdbstub.c   |  2 +-
 hw/arm/pxa2xx.c |  3 ++-
 hw/arm/strongarm.c  |  2 +-
 hw/char/bcm2835_aux.c   |  2 +-
 hw/char/cadence_uart.c  |  2 +-
 hw/char/debugcon.c  |  2 +-
 hw/char/digic-uart.c|  2 +-
 hw/char/escc.c  |  2 +-
 hw/char/etraxfs_ser.c   |  2 +-
 hw/char/exynos4210_uart.c   |  2 +-
 hw/char/grlib_apbuart.c |  2 +-
 hw/char/imx_serial.c|  2 +-
 hw/char/ipoctal232.c|  2 +-
 hw/char/lm32_juart.c|  2 +-
 hw/char/lm32_uart.c |  2 +-
 hw/char/mcf_uart.c  |  2 +-
 hw/char/milkymist-uart.c|  2 +-
 hw/char/pl011.c |  2 +-
 hw/char/sclpconsole-lm.c|  2 +-
 hw/char/sclpconsole.c   |  2 +-
 hw/char/serial.c|  2 +-
 hw/char/sh_serial.c |  2 +-
 hw/char/spapr_vty.c |  2 +-
 hw/char/stm32f2xx_usart.c   |  3 ++-
 hw/char/terminal3270.c  |  2 +-
 hw/char/virtio-console.c|  4 ++--
 hw/char/xen_console.c   |  2 +-
 hw/char/xilinx_uartlite.c   |  2 +-
 hw/ipmi/ipmi_bmc_extern.c   |  2 +-
 hw/mips/boston.c|  2 +-
 hw/mips/mips_malta.c|  2 +-
 hw/misc/ivshmem.c   |  2 +-
 hw/usb/ccid-card-passthru.c |  2 +-
 hw/usb/dev-serial.c |  2 +-
 hw/usb/redirect.c   |  2 +-
 include/sysemu/char.h   |  5 +
 monitor.c   |  4 ++--
 net/colo-compare.c  | 14 --
 net/filter-mirror.c |  6 +++---
 net/slirp.c |  2 +-
 net/vhost-user.c|  7 ---
 qtest.c |  2 +-
 tests/test-char.c   | 14 ++
 tests/vhost-user-test.c |  2 +-
 47 files changed, 78 insertions(+), 59 deletions(-)

diff --git a/backends/rng-egd.c b/backends/rng-egd.c
index 380b19a..0b0e945 100644
--- a/backends/rng-egd.c
+++ b/backends/rng-egd.c
@@ -106,7 +106,7 @@ static void rng_egd_opened(RngBackend *b, Error **errp)
 
 /* FIXME we should resubmit pending requests when the CDS reconnects. */
 qemu_chr_fe_set_handlers(&s->chr, rng_egd_chr_can_read,
- rng_egd_chr_read, NULL, s, NULL, true);
+ rng_egd_chr_read, NULL, NULL, s, NULL, true);
 }
 
 static void rng_egd_set_chardev(Object *obj, const char *value, Error **errp)
diff --git a/chardev/char-mux.c b/chardev/char-mux.c
index 37d42c6..5849ea5 100644
--- a/chardev/char-mux.c
+++ b/chardev/char-mux.c
@@ -278,6 +278,7 @@ void mux_chr_set_handlers(Chardev *chr, GMainContext 
*context)
  mux_chr_can_read,
  mux_chr_read,
  mux_chr_event,
+ NULL,
  chr,
  context, true);
 }
diff --git a/chardev/char.c b/chardev/char.c
index 684cccd..ae60950 100644
--- a/chardev/char.c
+++ b/chardev/char.c
@@ -522,7 +522,7 @@ void qemu_chr_fe_deinit(CharBackend *b)
 assert(b);
 
 if (b->chr) {
-qemu_chr_fe_set_handlers(b, NULL, NULL, NULL, NULL, NULL, true);
+qemu_chr_fe_set_handlers(b, NULL, NULL, NULL, NULL, NULL, NULL, true);
 if (b->chr->be == b) {
 b->chr->be = NULL;
 }
@@ -538,6 +538,7 @@ void qemu_chr_fe_set_handlers(CharBackend *b,
   IOCanReadHandler *fd_can_read,
   IOReadHandler *fd_read,
   IOEventHandler *fd_event,
+  BackendChangeHandler *be_change,
   void *opaque,
   GMainContext *context,
   bool set_open)
@@ -561,6 +562,7 @@ void qemu_chr_fe_set_handlers(CharBackend *b,
 b->chr_can_read = fd_can_read;
 b->chr_read = fd_read;
 b->chr_event = fd_event;
+b->chr_be_change = be_change;
 b->opaque = opaque;
 if (cc->chr_update_read_handler) {
 cc->chr_update_read_handler(s, context);
diff --git a/gdbstub.c b/gdbstub.c
index 86eed4f..1ac0489 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -2013,7 +2013,7 @@ int gdbserver_start(const char *device)
 if (chr) {
 qemu_chr_fe_init(&s->chr, chr, &error_abort);
 qemu_chr_fe_set_handlers(&s->chr, gdb_chr_can_receive, gdb_chr_receive,
- gdb_chr_event, NULL, NULL, true);
+ gdb_chr_event, NULL, NULL, NULL, true);
 }
 s->state = chr ? RS_IDLE : RS_INACTIVE;
 s->mon_chr = mon_chr;
diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c
index eea551d..3e51882 100644
--- a/hw/arm/pxa2xx.c
+++ b/hw/arm/pxa2xx.c
@@ -1970,7 +1970,8 @@ static void pxa2xx_fir_realize

[Qemu-devel] [PATCH v2 6/9] virtio-console: chardev hotswap support

2017-05-19 Thread Anton Nefedov

Signed-off-by: Anton Nefedov 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
CC: Amit Shah 
---
 hw/char/virtio-console.c | 35 ++-
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/hw/char/virtio-console.c b/hw/char/virtio-console.c
index cf7331d..bd74669 100644
--- a/hw/char/virtio-console.c
+++ b/hw/char/virtio-console.c
@@ -49,7 +49,7 @@ static ssize_t flush_buf(VirtIOSerialPort *port,
 VirtConsole *vcon = VIRTIO_CONSOLE(port);
 ssize_t ret;
 
-if (!qemu_chr_fe_get_driver(&vcon->chr)) {
+if (!qemu_chr_fe_backend_connected(&vcon->chr)) {
 /* If there's no backend, we can just say we consumed all data. */
 return len;
 }
@@ -163,12 +163,35 @@ static void chr_event(void *opaque, int event)
 }
 }
 
+static int chr_be_change(void *opaque)
+{
+VirtConsole *vcon = opaque;
+VirtIOSerialPort *port = VIRTIO_SERIAL_PORT(vcon);
+VirtIOSerialPortClass *k = VIRTIO_SERIAL_PORT_GET_CLASS(port);
+
+if (k->is_console) {
+qemu_chr_fe_set_handlers(&vcon->chr, chr_can_read, chr_read,
+ NULL, chr_be_change, vcon, NULL, true);
+} else {
+qemu_chr_fe_set_handlers(&vcon->chr, chr_can_read, chr_read,
+ chr_event, chr_be_change, vcon, NULL, false);
+}
+
+if (vcon->watch) {
+g_source_remove(vcon->watch);
+vcon->watch = qemu_chr_fe_add_watch(&vcon->chr,
+G_IO_OUT | G_IO_HUP,
+chr_write_unblocked, vcon);
+}
+
+return 0;
+}
+
 static void virtconsole_realize(DeviceState *dev, Error **errp)
 {
 VirtIOSerialPort *port = VIRTIO_SERIAL_PORT(dev);
 VirtConsole *vcon = VIRTIO_CONSOLE(dev);
 VirtIOSerialPortClass *k = VIRTIO_SERIAL_PORT_GET_CLASS(dev);
-Chardev *chr = qemu_chr_fe_get_driver(&vcon->chr);
 
 if (port->id == 0 && !k->is_console) {
 error_setg(errp, "Port number 0 on virtio-serial devices reserved "
@@ -176,7 +199,7 @@ static void virtconsole_realize(DeviceState *dev, Error 
**errp)
 return;
 }
 
-if (chr) {
+if (qemu_chr_fe_backend_connected(&vcon->chr)) {
 /*
  * For consoles we don't block guest data transfer just
  * because nothing is connected - we'll just let it go
@@ -188,11 +211,13 @@ static void virtconsole_realize(DeviceState *dev, Error 
**errp)
  */
 if (k->is_console) {
 qemu_chr_fe_set_handlers(&vcon->chr, chr_can_read, chr_read,
- NULL, NULL, vcon, NULL, true);
+ NULL, chr_be_change,
+ vcon, NULL, true);
 virtio_serial_open(port);
 } else {
 qemu_chr_fe_set_handlers(&vcon->chr, chr_can_read, chr_read,
- chr_event, NULL, vcon, NULL, false);
+ chr_event, chr_be_change,
+ vcon, NULL, false);
 }
 }
 }
-- 
2.7.4

[Qemu-devel] [PATCH v2 4/9] hmp: add hmp analogue for qmp-chardev-change

2017-05-19 Thread Anton Nefedov

Signed-off-by: Anton Nefedov 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
CC: Dr. David Alan Gilbert 
---
 chardev/char.c|  4 ++--
 hmp-commands.hx   | 16 
 hmp.c | 34 ++
 hmp.h |  1 +
 include/sysemu/char.h | 12 
 5 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/chardev/char.c b/chardev/char.c
index bac5e1c..0483f19 100644
--- a/chardev/char.c
+++ b/chardev/char.c
@@ -880,8 +880,8 @@ help_string_append(const char *name, void *opaque)
 g_string_append_printf(str, "\n%s", name);
 }
 
-static ChardevBackend *qemu_chr_parse_opts(QemuOpts *opts,
-   Error **errp)
+ChardevBackend *qemu_chr_parse_opts(QemuOpts *opts,
+Error **errp)
 {
 Error *local_err = NULL;
 const ChardevClass *cc;
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 0aca984..0f2a059 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1727,6 +1727,22 @@ chardev_add accepts the same parameters as the -chardev 
command line switch.
 ETEXI
 
 {
+.name   = "chardev-change",
+.args_type  = "id:s,args:s",
+.params = "id args",
+.help   = "change chardev",
+.cmd= hmp_chardev_change,
+},
+
+STEXI
+@item chardev-change args
+@findex chardev-change
+chardev_change accepts existing chardev @var{id} and then the same arguments
+as the -chardev command line switch (except for "id").
+
+ETEXI
+
+{
 .name   = "chardev-remove",
 .args_type  = "id:s",
 .params = "id",
diff --git a/hmp.c b/hmp.c
index 3dceaf8..f7d0b38 100644
--- a/hmp.c
+++ b/hmp.c
@@ -2209,6 +2209,40 @@ void hmp_chardev_add(Monitor *mon, const QDict *qdict)
 hmp_handle_error(mon, &err);
 }
 
+void hmp_chardev_change(Monitor *mon, const QDict *qdict)
+{
+const char *args = qdict_get_str(qdict, "args");
+const char *id;
+Error *err = NULL;
+ChardevBackend *backend = NULL;
+ChardevReturn *ret = NULL;
+QemuOpts *opts = qemu_opts_parse_noisily(qemu_find_opts("chardev"), args,
+ true);
+if (!opts) {
+error_setg(&err, "Parsing chardev args failed");
+goto end;
+}
+
+id = qdict_get_str(qdict, "id");
+if (qemu_opts_id(opts)) {
+error_setg(&err, "Unexpected 'id' parameter");
+goto end;
+}
+
+backend = qemu_chr_parse_opts(opts, &err);
+if (!backend) {
+goto end;
+}
+
+ret = qmp_chardev_change(id, backend, &err);
+
+end:
+qapi_free_ChardevReturn(ret);
+qapi_free_ChardevBackend(backend);
+qemu_opts_del(opts);
+hmp_handle_error(mon, &err);
+}
+
 void hmp_chardev_remove(Monitor *mon, const QDict *qdict)
 {
 Error *local_err = NULL;
diff --git a/hmp.h b/hmp.h
index d8b94ce..23e035c 100644
--- a/hmp.h
+++ b/hmp.h
@@ -102,6 +102,7 @@ void hmp_nbd_server_start(Monitor *mon, const QDict *qdict);
 void hmp_nbd_server_add(Monitor *mon, const QDict *qdict);
 void hmp_nbd_server_stop(Monitor *mon, const QDict *qdict);
 void hmp_chardev_add(Monitor *mon, const QDict *qdict);
+void hmp_chardev_change(Monitor *mon, const QDict *qdict);
 void hmp_chardev_remove(Monitor *mon, const QDict *qdict);
 void hmp_qemu_io(Monitor *mon, const QDict *qdict);
 void hmp_cpu_add(Monitor *mon, const QDict *qdict);
diff --git a/include/sysemu/char.h b/include/sysemu/char.h
index 68c7876..92ae57e 100644
--- a/include/sysemu/char.h
+++ b/include/sysemu/char.h
@@ -132,6 +132,18 @@ Chardev *qemu_chr_new_from_opts(QemuOpts *opts,
 void qemu_chr_parse_common(QemuOpts *opts, ChardevCommon *backend);
 
 /**
+ * @qemu_chr_parse_opts:
+ *
+ * Parse the options to the ChardevBackend struct.
+ *
+ * @opts
+ *
+ * Returns: a new backend
+ */
+ChardevBackend *qemu_chr_parse_opts(QemuOpts *opts,
+Error **errp);
+
+/**
  * @qemu_chr_new:
  *
  * Create a new character backend from a URI.
-- 
2.7.4

[Qemu-devel] [PATCH v2 9/9] char: avoid chardevice direct access

2017-05-19 Thread Anton Nefedov

frontends should avoid accessing CharDriver struct where possible

Signed-off-by: Anton Nefedov 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 chardev/char.c  | 5 +
 gdbstub.c   | 2 +-
 hw/arm/strongarm.c  | 2 +-
 hw/char/cadence_uart.c  | 2 +-
 hw/char/debugcon.c  | 2 +-
 hw/char/escc.c  | 6 +++---
 hw/char/exynos4210_uart.c   | 2 +-
 hw/char/grlib_apbuart.c | 2 +-
 hw/char/ipoctal232.c| 2 +-
 hw/char/parallel.c  | 2 +-
 hw/char/sclpconsole-lm.c| 2 +-
 hw/char/sclpconsole.c   | 2 +-
 hw/char/sh_serial.c | 2 +-
 hw/char/spapr_vty.c | 2 +-
 hw/char/terminal3270.c  | 2 +-
 hw/char/xen_console.c   | 2 +-
 hw/ipmi/ipmi_bmc_extern.c   | 2 +-
 hw/misc/ivshmem.c   | 4 ++--
 hw/usb/ccid-card-passthru.c | 4 ++--
 hw/usb/dev-serial.c | 5 ++---
 hw/usb/redirect.c   | 5 ++---
 include/sysemu/char.h   | 7 +++
 net/filter-mirror.c | 2 +-
 23 files changed, 39 insertions(+), 29 deletions(-)

diff --git a/chardev/char.c b/chardev/char.c
index 36d6f36..e43d840 100644
--- a/chardev/char.c
+++ b/chardev/char.c
@@ -494,6 +494,11 @@ bool qemu_chr_fe_backend_connected(CharBackend *be)
 return !!be->chr;
 }
 
+bool qemu_chr_fe_backend_open(CharBackend *be)
+{
+return be->chr && be->chr->be_open;
+}
+
 static bool fe_connect(CharBackend *b, Chardev *s, Error **errp)
 {
 int tag = 0;
diff --git a/gdbstub.c b/gdbstub.c
index 1ac0489..68cbe8a 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -2001,7 +2001,7 @@ int gdbserver_start(const char *device)
NULL, &error_abort);
 monitor_init(mon_chr, 0);
 } else {
-if (qemu_chr_fe_get_driver(&s->chr)) {
+if (qemu_chr_fe_backend_connected(&s->chr)) {
 object_unparent(OBJECT(qemu_chr_fe_get_driver(&s->chr)));
 }
 mon_chr = s->mon_chr;
diff --git a/hw/arm/strongarm.c b/hw/arm/strongarm.c
index bec093d..9d7cf21 100644
--- a/hw/arm/strongarm.c
+++ b/hw/arm/strongarm.c
@@ -1105,7 +1105,7 @@ static void strongarm_uart_tx(void *opaque)
 
 if (s->utcr3 & UTCR3_LBM) /* loopback */ {
 strongarm_uart_receive(s, &s->tx_fifo[s->tx_start], 1);
-} else if (qemu_chr_fe_get_driver(&s->chr)) {
+} else if (qemu_chr_fe_backend_connected(&s->chr)) {
 /* XXX this blocks entire thread. Rewrite to use
  * qemu_chr_fe_write and background I/O callbacks */
 qemu_chr_fe_write_all(&s->chr, &s->tx_fifo[s->tx_start], 1);
diff --git a/hw/char/cadence_uart.c b/hw/char/cadence_uart.c
index 71867b3..19636c0 100644
--- a/hw/char/cadence_uart.c
+++ b/hw/char/cadence_uart.c
@@ -278,7 +278,7 @@ static gboolean cadence_uart_xmit(GIOChannel *chan, 
GIOCondition cond,
 int ret;
 
 /* instant drain the fifo when there's no back-end */
-if (!qemu_chr_fe_get_driver(&s->chr)) {
+if (!qemu_chr_fe_backend_connected(&s->chr)) {
 s->tx_count = 0;
 return FALSE;
 }
diff --git a/hw/char/debugcon.c b/hw/char/debugcon.c
index 6d95297..bd0d4f0 100644
--- a/hw/char/debugcon.c
+++ b/hw/char/debugcon.c
@@ -87,7 +87,7 @@ static const MemoryRegionOps debugcon_ops = {
 
 static void debugcon_realize_core(DebugconState *s, Error **errp)
 {
-if (!qemu_chr_fe_get_driver(&s->chr)) {
+if (!qemu_chr_fe_backend_connected(&s->chr)) {
 error_setg(errp, "Can't create debugcon device, empty char device");
 return;
 }
diff --git a/hw/char/escc.c b/hw/char/escc.c
index aa882b6..dbbeb4a 100644
--- a/hw/char/escc.c
+++ b/hw/char/escc.c
@@ -416,7 +416,7 @@ static void escc_update_parameters(ChannelState *s)
 int speed, parity, data_bits, stop_bits;
 QEMUSerialSetParams ssp;
 
-if (!qemu_chr_fe_get_driver(&s->chr) || s->type != ser)
+if (!qemu_chr_fe_backend_connected(&s->chr) || s->type != ser)
 return;
 
 if (s->wregs[W_TXCTRL1] & TXCTRL1_PAREN) {
@@ -556,7 +556,7 @@ static void escc_mem_write(void *opaque, hwaddr addr,
 trace_escc_mem_writeb_data(CHN_C(s), val);
 s->tx = val;
 if (s->wregs[W_TXCTRL2] & TXCTRL2_TXEN) { // tx enabled
-if (qemu_chr_fe_get_driver(&s->chr)) {
+if (qemu_chr_fe_backend_connected(&s->chr)) {
 /* XXX this blocks entire thread. Rewrite to use
  * qemu_chr_fe_write and background I/O callbacks */
 qemu_chr_fe_write_all(&s->chr, &s->tx, 1);
@@ -1012,7 +1012,7 @@ static void escc_realize(DeviceState *dev, Error **errp)
   ESCC_SIZE << s->it_shift);
 
 for (i = 0; i < 2; i++) {
-if (qemu_chr_fe_get_driver(&s->chn[i].chr)) {
+if (qemu_chr_fe_backend_connected(&s->chn[i].chr)) {
 s->chn[i].clock = s->frequency / 2;
 qemu_chr_fe_set_handlers(&s->chn[i].chr, serial_can_receive,
  serial_receive1, serial_event, NULL,
diff --git a/hw/char/exynos4210_uart.c b/hw/char/exyno

[Qemu-devel] [PATCH v2 7/9] serial: move TIOCM update to a separate function

2017-05-19 Thread Anton Nefedov

will be used by the following patch

Signed-off-by: Anton Nefedov 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
CC: Michael S. Tsirkin 
CC: Paolo Bonzini 
---
 hw/char/serial.c | 31 +++
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/hw/char/serial.c b/hw/char/serial.c
index d8d34d0..1e6bdeb 100644
--- a/hw/char/serial.c
+++ b/hw/char/serial.c
@@ -312,6 +312,24 @@ static void serial_write_fcr(SerialState *s, uint8_t val)
 }
 }
 
+static void serial_update_tiocm(SerialState *s)
+{
+int flags;
+
+qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_SERIAL_GET_TIOCM, &flags);
+
+flags &= ~(CHR_TIOCM_RTS | CHR_TIOCM_DTR);
+
+if (s->mcr & UART_MCR_RTS) {
+flags |= CHR_TIOCM_RTS;
+}
+if (s->mcr & UART_MCR_DTR) {
+flags |= CHR_TIOCM_DTR;
+}
+
+qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_SERIAL_SET_TIOCM, &flags);
+}
+
 static void serial_ioport_write(void *opaque, hwaddr addr, uint64_t val,
 unsigned size)
 {
@@ -426,24 +444,13 @@ static void serial_ioport_write(void *opaque, hwaddr 
addr, uint64_t val,
 break;
 case 4:
 {
-int flags;
 int old_mcr = s->mcr;
 s->mcr = val & 0x1f;
 if (val & UART_MCR_LOOP)
 break;
 
 if (s->poll_msl >= 0 && old_mcr != s->mcr) {
-
-qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_SERIAL_GET_TIOCM, &flags);
-
-flags &= ~(CHR_TIOCM_RTS | CHR_TIOCM_DTR);
-
-if (val & UART_MCR_RTS)
-flags |= CHR_TIOCM_RTS;
-if (val & UART_MCR_DTR)
-flags |= CHR_TIOCM_DTR;
-
-qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_SERIAL_SET_TIOCM, &flags);
+serial_update_tiocm(s);
 /* Update the modem status after a one-character-send 
wait-time, since there may be a response
from the device/computer at the other end of the serial 
line */
 timer_mod(s->modem_status_poll, 
qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + s->char_transmit_time);
-- 
2.7.4

Re: [Qemu-devel] [PATCH RFC 1/6] io: only allow return path for socket typed

2017-05-19 Thread Dr. David Alan Gilbert

* Daniel P. Berrange (berra...@redhat.com) wrote:
> On Fri, May 19, 2017 at 02:43:27PM +0800, Peter Xu wrote:
> > We don't really have a return path for the other types yet. Let's check
> > this when .get_return_path() is called.
> > 
> > For this, we introduce a new feature bit, and set it up only for socket
> > typed IO channels.
> > 
> > This will help detect earlier failure for postcopy, e.g., logically
> > speaking postcopy cannot work with "exec:". Before this patch, when we
> > try to migrate with "migrate -d exec:cat>out", we'll hang the system.
> > With this patch, we'll get:
> > 
> > (qemu) migrate -d exec:cat>out
> > Unable to open return-path for postcopy
> 
> This is wrong - post-copy migration *can* work with exec: - it just entirely
> depends on what command you are running. Your example ran a command which is
> unidirectional, but if you ran 'exec:socat ...' you would have a fully
> bidirectional channel. Actually the channel is always bi-directional, but
> 'cat' simply won't ever send data back to QEMU.

The thing is it didn't used to be able to; prior to your conversion to
channel, postcopy would reject being started with exec: because it
couldn't open a return path, so it was safe.

> If QEMU hangs when the other end doesn't send data back, that actually seems
> like a potentially serious bug in migration code. Even if using the normal
> 'tcp' migration protocol, if the target QEMU server hangs and fails to
> send data to QEMU on the return path, the source QEMU must never hang.

Hmm, we shouldn't get a 'hang' with a postcopy on a link without a
return path; but it does depend on how the exec: behaves on the
destination.
If the destination discards data written to it, then I think the
behaviour would be:
   a) Page requests would just get dropped, they'd eventually get
fulfilled by the background page transmissions, but that could mean that
a page request would wait for minutes for the page.
   b) The qemu main thread on the destination can be blocked by that, so
the monitor might not respond until the page request is fulfilled.
   c) I'm not quite sure what would happen to the source return-path
thread

The behaviour seems to have changed between 2.9.0 (f26 package) and my
reasonably recent head build.

2.9.0 gives me:
(qemu) migrate_set_speed 1B
(qemu) migrate_set_capability postcopy-ram on
(qemu) migrate -d "exec:cat > out"
RP: Received invalid message 0x length 0x
(qemu) info migrate
capabilities: xbzrle: off rdma-pin-all: off auto-converge: off zero-blocks: off 
compress: off events: off postcopy-ram: on x-colo: off release-ram: off 
Migration status: failed
total time: 0 milliseconds

So that's the return path thread trying to read from the exec: not
getting anything and failing.

On head-ish it doesn't fail, the source qemu doesn't hang, however
the migration never completes - possibly because it's waiting for
the MIG_RP_MSG_SHUT from the destination.
A migration_cancel also doesn't work for 'exec' because it doesn't
support shutdown() - it just sticks in 'cancelling'.
On a socket that was broken like this the cancel would work because
it issues a shutdown() which causes the socket to cleanup.

Personally I'd rather fix this by still not supporting exec:,
making shutdown() work on exec (by kill'ing the child process)
means at least cancel would work, but it still wouldn't be pretty
for a postcopy, and still doesn't help Peter solve this problem
which is a nasty problem QEMU has had for ages.

Dave

> Regards,
> Daniel
> -- 
> |: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [Qemu-devel] [PATCH RFC 1/6] io: only allow return path for socket typed

2017-05-19 Thread Dr. David Alan Gilbert

* Daniel P. Berrange (berra...@redhat.com) wrote:
> On Fri, May 19, 2017 at 09:25:38AM +0100, Daniel P. Berrange wrote:
> > On Fri, May 19, 2017 at 02:43:27PM +0800, Peter Xu wrote:
> > > We don't really have a return path for the other types yet. Let's check
> > > this when .get_return_path() is called.
> > > 
> > > For this, we introduce a new feature bit, and set it up only for socket
> > > typed IO channels.
> > > 
> > > This will help detect earlier failure for postcopy, e.g., logically
> > > speaking postcopy cannot work with "exec:". Before this patch, when we
> > > try to migrate with "migrate -d exec:cat>out", we'll hang the system.
> > > With this patch, we'll get:
> > > 
> > > (qemu) migrate -d exec:cat>out
> > > Unable to open return-path for postcopy
> > 
> > This is wrong - post-copy migration *can* work with exec: - it just entirely
> > depends on what command you are running. Your example ran a command which is
> > unidirectional, but if you ran 'exec:socat ...' you would have a fully
> > bidirectional channel. Actually the channel is always bi-directional, but
> > 'cat' simply won't ever send data back to QEMU.
> > 
> > If QEMU hangs when the other end doesn't send data back, that actually seems
> > like a potentially serious bug in migration code. Even if using the normal
> > 'tcp' migration protocol, if the target QEMU server hangs and fails to
> > send data to QEMU on the return path, the source QEMU must never hang.
> 
> BTW, if you want to simplify the code in this area at all, then arguably
> we should get rid of the "get_return_path" helper method entirely. We're
> not actually opening any new connections - we're just creating a second
> QEMUFile that uses the same underlying QIOChannel object. All we would
> need is for the QEMUFile to have a separate 'buf' field management in
> QEMUFile for the read & write directions.  Then all the code would be
> able to just use the single QEMUFile for read & write getting rid of this
> concept of "opening a return path" which doens't actually do anything at
> the underlying data transport level.

No, I'd rather keep the get_return_path;  we should keep each direction
separate.

Dave

> Regards,
> Daniel
> -- 
> |: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [Qemu-devel] [PATCH RFC 1/6] io: only allow return path for socket typed

2017-05-19 Thread Daniel P. Berrange

On Fri, May 19, 2017 at 01:51:43PM +0100, Dr. David Alan Gilbert wrote:
> * Daniel P. Berrange (berra...@redhat.com) wrote:
> > On Fri, May 19, 2017 at 02:43:27PM +0800, Peter Xu wrote:
> > > We don't really have a return path for the other types yet. Let's check
> > > this when .get_return_path() is called.
> > > 
> > > For this, we introduce a new feature bit, and set it up only for socket
> > > typed IO channels.
> > > 
> > > This will help detect earlier failure for postcopy, e.g., logically
> > > speaking postcopy cannot work with "exec:". Before this patch, when we
> > > try to migrate with "migrate -d exec:cat>out", we'll hang the system.
> > > With this patch, we'll get:
> > > 
> > > (qemu) migrate -d exec:cat>out
> > > Unable to open return-path for postcopy
> > 
> > This is wrong - post-copy migration *can* work with exec: - it just entirely
> > depends on what command you are running. Your example ran a command which is
> > unidirectional, but if you ran 'exec:socat ...' you would have a fully
> > bidirectional channel. Actually the channel is always bi-directional, but
> > 'cat' simply won't ever send data back to QEMU.
> 
> The thing is it didn't used to be able to; prior to your conversion to
> channel, postcopy would reject being started with exec: because it
> couldn't open a return path, so it was safe.

It safe but functionally broken because it is valid to want to use
exec migration with post-copy.

> > If QEMU hangs when the other end doesn't send data back, that actually seems
> > like a potentially serious bug in migration code. Even if using the normal
> > 'tcp' migration protocol, if the target QEMU server hangs and fails to
> > send data to QEMU on the return path, the source QEMU must never hang.
> 
> Hmm, we shouldn't get a 'hang' with a postcopy on a link without a
> return path; but it does depend on how the exec: behaves on the
> destination.
> If the destination discards data written to it, then I think the
> behaviour would be:
>a) Page requests would just get dropped, they'd eventually get
> fulfilled by the background page transmissions, but that could mean that
> a page request would wait for minutes for the page.
>b) The qemu main thread on the destination can be blocked by that, so
> the monitor might not respond until the page request is fulfilled.
>c) I'm not quite sure what would happen to the source return-path
> thread
> 
> The behaviour seems to have changed between 2.9.0 (f26 package) and my
> reasonably recent head build.

That's due to the bug we just fixed where we mistakenly didn't
allow bi-directional I/O for exec

  commit 062d81f0e968fe1597474735f3ea038065027372
  Author: Daniel P. Berrange 
  Date:   Fri Apr 21 12:12:20 2017 +0100

migration: setup bi-directional I/O channel for exec: protocol

Historically the migration data channel has only needed to be
unidirectional. Thus the 'exec:' protocol was requesting an
I/O channel with O_RDONLY on incoming side, and O_WRONLY on
the outgoing side.

This is fine for classic migration, but if you then try to run
TLS over it, this fails because the TLS handshake requires a
bi-directional channel.

Signed-off-by: Daniel P. Berrange 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 


> A migration_cancel also doesn't work for 'exec' because it doesn't
> support shutdown() - it just sticks in 'cancelling'.
> On a socket that was broken like this the cancel would work because
> it issues a shutdown() which causes the socket to cleanup.

I'm curious why migration_cancel requires shutdown() to work ? Why
isn't it sufficient from the source POV to just close the socket
entirely straight away.

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

[Qemu-devel] [Bug 1511887] Re: USB device 1.1 not correctly passedthru from Linux host to Windows guest

2017-05-19 Thread Thomas Huth

** Changed in: qemu
   Status: Incomplete => Triaged

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1511887

Title:
  USB device 1.1 not correctly passedthru from Linux host to Windows
  guest

Status in QEMU:
  Triaged

Bug description:
  I have USB Digital Oscilloscope which works great on pure Windows
  machine but not work on virtualized one. I tried passthru the device
  from my Debian Jessie (64bit) host machine to Windows 7 (32bit) guest
  machine but unfortunately it does not work very well. It looks that
  device is passed thru so Windows machine knows about new device and
  loads HID device driver for it but the device driver failed to start
  the device and details of an error provided by device manager is "This
  device cannot start" Code 10.

  Installed Qemu version: 2.1+dfsg-12+deb8u4 0

  USB device spec: Dynon Instruments ELAB-080, USB 1.1

  On linux host computer
  ---
  lsusb identify it as:
  Bus 003 Device 009: ID 13a3:0001 

  lsusb -t identify it as:
  /: Bus 03.Port 1: Dev 1, Class=root_hub, Driver=uhci_hcd/2p, 12M
  |__ Port 1: Dev 9, If 0, Class=Human Interface Device, Driver=usbhid, 12M

  This is how I started my Windows guest machine
  --
  kvm -cpu host \
  -m 2048MiB \
  -hda test.vdi \
  -ctrl-grab \
  -parallel /dev/parport0 \
  -usbdevice host:13a3:0001

  ...also instead of last line I tried this one:
  -device usb-host,vendorid=0x13a3,productid=0x0001

  none of them help to properly handle my device inside guest machine.

  Only one time the Windows guest machine properly start the device so
  software for that oscilloscope can identify the Oscilloscope and
  measure for a while but unfortunately after I guess 5 seconds of
  measurement the device was disconnected from Windows and never start
  working again even after couple of restarts of guest machine even
  after plug and unplug it's USB cable and power cable.

  I searched for a solution or some clues to get it work but none of my
  searching over the internet was successful. Because device works on
  pure Windows but not work on virtualized one, I think there is a
  problem with handling not standard USB devices (like sticks,
  keyboards, mouses etc.)

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1511887/+subscriptions

Re: [Qemu-devel] [PATCH RFC 1/6] io: only allow return path for socket typed

2017-05-19 Thread Dr. David Alan Gilbert

* Daniel P. Berrange (berra...@redhat.com) wrote:
> On Fri, May 19, 2017 at 01:51:43PM +0100, Dr. David Alan Gilbert wrote:
> > * Daniel P. Berrange (berra...@redhat.com) wrote:
> > > On Fri, May 19, 2017 at 02:43:27PM +0800, Peter Xu wrote:
> > > > We don't really have a return path for the other types yet. Let's check
> > > > this when .get_return_path() is called.
> > > > 
> > > > For this, we introduce a new feature bit, and set it up only for socket
> > > > typed IO channels.
> > > > 
> > > > This will help detect earlier failure for postcopy, e.g., logically
> > > > speaking postcopy cannot work with "exec:". Before this patch, when we
> > > > try to migrate with "migrate -d exec:cat>out", we'll hang the system.
> > > > With this patch, we'll get:
> > > > 
> > > > (qemu) migrate -d exec:cat>out
> > > > Unable to open return-path for postcopy
> > > 
> > > This is wrong - post-copy migration *can* work with exec: - it just 
> > > entirely
> > > depends on what command you are running. Your example ran a command which 
> > > is
> > > unidirectional, but if you ran 'exec:socat ...' you would have a fully
> > > bidirectional channel. Actually the channel is always bi-directional, but
> > > 'cat' simply won't ever send data back to QEMU.
> > 
> > The thing is it didn't used to be able to; prior to your conversion to
> > channel, postcopy would reject being started with exec: because it
> > couldn't open a return path, so it was safe.
> 
> It safe but functionally broken because it is valid to want to use
> exec migration with post-copy.
> 
> > > If QEMU hangs when the other end doesn't send data back, that actually 
> > > seems
> > > like a potentially serious bug in migration code. Even if using the normal
> > > 'tcp' migration protocol, if the target QEMU server hangs and fails to
> > > send data to QEMU on the return path, the source QEMU must never hang.
> > 
> > Hmm, we shouldn't get a 'hang' with a postcopy on a link without a
> > return path; but it does depend on how the exec: behaves on the
> > destination.
> > If the destination discards data written to it, then I think the
> > behaviour would be:
> >a) Page requests would just get dropped, they'd eventually get
> > fulfilled by the background page transmissions, but that could mean that
> > a page request would wait for minutes for the page.
> >b) The qemu main thread on the destination can be blocked by that, so
> > the monitor might not respond until the page request is fulfilled.
> >c) I'm not quite sure what would happen to the source return-path
> > thread
> > 
> > The behaviour seems to have changed between 2.9.0 (f26 package) and my
> > reasonably recent head build.
> 
> That's due to the bug we just fixed where we mistakenly didn't
> allow bi-directional I/O for exec
> 
>   commit 062d81f0e968fe1597474735f3ea038065027372
>   Author: Daniel P. Berrange 
>   Date:   Fri Apr 21 12:12:20 2017 +0100
> 
> migration: setup bi-directional I/O channel for exec: protocol
> 
> Historically the migration data channel has only needed to be
> unidirectional. Thus the 'exec:' protocol was requesting an
> I/O channel with O_RDONLY on incoming side, and O_WRONLY on
> the outgoing side.
> 
> This is fine for classic migration, but if you then try to run
> TLS over it, this fails because the TLS handshake requires a
> bi-directional channel.
> 
> Signed-off-by: Daniel P. Berrange 
> Reviewed-by: Juan Quintela 
> Signed-off-by: Juan Quintela 
> 
> 
> > A migration_cancel also doesn't work for 'exec' because it doesn't
> > support shutdown() - it just sticks in 'cancelling'.
> > On a socket that was broken like this the cancel would work because
> > it issues a shutdown() which causes the socket to cleanup.
> 
> I'm curious why migration_cancel requires shutdown() to work ? Why
> isn't it sufficient from the source POV to just close the socket
> entirely straight away.

close() closes the fd so that any other uses of the fd get an
error and you're at risk of the fd getting reallocated by something
else.  So consider if the main thread calls close(), the migration
thread and the return thread both carry on using that fd, which might
have been reallocated to something different.  Worse I think we came to the
consolution that on some OSs a read()/write() blocked in the use of an fd didn't
get kicked out by a close.

shutdown() is safe, in that it stops any other threads accessing the fd
but doesn't allow it's reallocation until the close;  We perform the
close only when we've joined all other threads that were using the fd.
Any of the threads that do new calls on the fd get an error and quickly
fall down their error paths.

Dave

> Regards,
> Daniel
> -- 
> |: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [Qemu-devel] [Qemu-ppc] [PATCH v11 2/2] migration: spapr: migrate pending_events of spapr state

2017-05-19 Thread Daniel Henrique Barboza




On 05/18/2017 11:32 PM, David Gibson wrote:

On Thu, May 18, 2017 at 05:24:02PM -0300, Daniel Henrique Barboza wrote:

From: Jianjun Duan 

In racing situations between hotplug events and migration operation,
a rtas hotplug event could have not yet be delivered to the source
guest when migration is started. In this case the pending_events of
spapr state need be transmitted to the target so that the hotplug
event can be finished on the target.

All the different fields of the events are encoded as defined by
PAPR. We can migrate them as uint8_t binary stream without any
concerns about data padding or endianess.

pending_events is put in a subsection in the spapr state VMSD to make
sure migration across different versions is not broken.

Signed-off-by: Jianjun Duan 
Signed-off-by: Daniel Henrique Barboza 
---
  hw/ppc/spapr.c | 32 
  hw/ppc/spapr_events.c  |  1 +
  include/hw/ppc/spapr.h |  3 ++-
  3 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 0980d73..5afd328 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1444,6 +1444,37 @@ static bool version_before_3(void *opaque, int 
version_id)
  return version_id < 3;
  }
  
+static bool spapr_pending_events_needed(void *opaque)

+{
+sPAPRMachineState *spapr = (sPAPRMachineState *)opaque;
+return !QTAILQ_EMPTY(&spapr->pending_events);
+}
+
+static const VMStateDescription vmstate_spapr_event_entry = {
+.name = "spapr_event_log_entry",
+.version_id = 1,
+.minimum_version_id = 1,
+.fields = (VMStateField[]) {
+VMSTATE_INT32(log_type, sPAPREventLogEntry),
+VMSTATE_UINT32(data_size, sPAPREventLogEntry),
+VMSTATE_VBUFFER_ALLOC_UINT32(data, sPAPREventLogEntry, 0,
+ NULL, data_size),
+VMSTATE_END_OF_LIST()
+},
+};
+
+static const VMStateDescription vmstate_spapr_pending_events = {
+.name = "spapr_pending_events",
+.version_id = 1,
+.minimum_version_id = 1,
+.needed = spapr_pending_events_needed,
+.fields = (VMStateField[]) {
+VMSTATE_QTAILQ_V(pending_events, sPAPRMachineState, 1,
+ vmstate_spapr_event_entry, sPAPREventLogEntry, next),
+VMSTATE_END_OF_LIST()
+},
+};
+
  static bool spapr_ov5_cas_needed(void *opaque)
  {
  sPAPRMachineState *spapr = opaque;
@@ -1542,6 +1573,7 @@ static const VMStateDescription vmstate_spapr = {
  .subsections = (const VMStateDescription*[]) {
  &vmstate_spapr_ov5_cas,
  &vmstate_spapr_patb_entry,
+&vmstate_spapr_pending_events,
  NULL
  }
  };
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 73e2a18..96c1605 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -350,6 +350,7 @@ static void rtas_event_log_queue(int log_type, void *data)
  g_assert(data);
  entry->log_type = log_type;
  entry->data = data;
+entry->data_size = sizeof(*data);

This can't be right, since data is a void*.  I'm surprised it even
compiles.  You'll need to actually look into the data buffer here and
extract the size field.

Hehe completely forgot here that I was querying the sizeof void. Good catch.
I think it didn't throw an error because the compiler defaulted the type 
of void

in sizeof() to something else (char perhaps?).

I'll fix it by making a switch with the log_type and setting sizeof with 
the proper

structured used.


Daniel



  QTAILQ_INSERT_TAIL(&spapr->pending_events, entry, next);
  }
  
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h

index 02239a5..0554e11 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -597,8 +597,9 @@ struct sPAPRTCETable {
  sPAPRTCETable *spapr_tce_find_by_liobn(target_ulong liobn);
  
  struct sPAPREventLogEntry {

-int log_type;
+int32_t log_type;
  void *data;
+uint32_t data_size;
  QTAILQ_ENTRY(sPAPREventLogEntry) next;
  };

Re: [Qemu-devel] [PULL 0/3] audio patch queue.

2017-05-19 Thread no-reply

Hi,

This series seems to have some coding style problems. See output below for
more information:

Message-id: 20170519112415.19191-1-kra...@redhat.com
Type: series
Subject: [Qemu-devel] [PULL 0/3] audio patch queue.

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

git config --local diff.renamelimit 0
git config --local diff.renames True

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
bd45295 audio: Rename hw/audio/audio.h to hw/audio/soundhw.h
d49234a audio: Rename audio_init() to soundhw_init()
2f8d27b audio: Move arch_init audio code to hw/audio/soundhw.c

=== OUTPUT BEGIN ===
Checking PATCH 1/3: audio: Move arch_init audio code to hw/audio/soundhw.c...
ERROR: suspect code indent for conditional statements (8, 13)
#248: FILE: hw/audio/soundhw.c:76:
+if (soundhw_count) {
+ printf("Valid sound card names (comma separated):\n");

ERROR: suspect code indent for conditional statements (13, 17)
#250: FILE: hw/audio/soundhw.c:78:
+ for (c = soundhw; c->name; ++c) {
+ printf ("%-11s %s\n", c->name, c->descr);

ERROR: space prohibited between function name and open parenthesis '('
#251: FILE: hw/audio/soundhw.c:79:
+ printf ("%-11s %s\n", c->name, c->descr);

ERROR: else should follow close brace '}'
#260: FILE: hw/audio/soundhw.c:88:
+}
+else {

ERROR: else should follow close brace '}'
#289: FILE: hw/audio/soundhw.c:117:
+}
+else {

WARNING: line over 80 characters
#307: FILE: hw/audio/soundhw.c:135:
+ISABus *isa_bus = (ISABus *) object_resolve_path_type("", TYPE_ISA_BUS, 
NULL);

WARNING: line over 80 characters
#308: FILE: hw/audio/soundhw.c:136:
+PCIBus *pci_bus = (PCIBus *) object_resolve_path_type("", TYPE_PCI_BUS, 
NULL);

total: 5 errors, 2 warnings, 320 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

Checking PATCH 2/3: audio: Rename audio_init() to soundhw_init()...
Checking PATCH 3/3: audio: Rename hw/audio/audio.h to hw/audio/soundhw.h...
=== OUTPUT END ===

Test command exited with code: 1


---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-de...@freelists.org

Re: [Qemu-devel] [PATCH RFC 1/6] io: only allow return path for socket typed

2017-05-19 Thread Daniel P. Berrange

On Fri, May 19, 2017 at 02:02:00PM +0100, Dr. David Alan Gilbert wrote:
> * Daniel P. Berrange (berra...@redhat.com) wrote:
> > On Fri, May 19, 2017 at 01:51:43PM +0100, Dr. David Alan Gilbert wrote:
> > > * Daniel P. Berrange (berra...@redhat.com) wrote:
> > > > On Fri, May 19, 2017 at 02:43:27PM +0800, Peter Xu wrote:
> > > > > We don't really have a return path for the other types yet. Let's 
> > > > > check
> > > > > this when .get_return_path() is called.
> > > > > 
> > > > > For this, we introduce a new feature bit, and set it up only for 
> > > > > socket
> > > > > typed IO channels.
> > > > > 
> > > > > This will help detect earlier failure for postcopy, e.g., logically
> > > > > speaking postcopy cannot work with "exec:". Before this patch, when we
> > > > > try to migrate with "migrate -d exec:cat>out", we'll hang the system.
> > > > > With this patch, we'll get:
> > > > > 
> > > > > (qemu) migrate -d exec:cat>out
> > > > > Unable to open return-path for postcopy
> > > > 
> > > > This is wrong - post-copy migration *can* work with exec: - it just 
> > > > entirely
> > > > depends on what command you are running. Your example ran a command 
> > > > which is
> > > > unidirectional, but if you ran 'exec:socat ...' you would have a fully
> > > > bidirectional channel. Actually the channel is always bi-directional, 
> > > > but
> > > > 'cat' simply won't ever send data back to QEMU.
> > > 
> > > The thing is it didn't used to be able to; prior to your conversion to
> > > channel, postcopy would reject being started with exec: because it
> > > couldn't open a return path, so it was safe.
> > 
> > It safe but functionally broken because it is valid to want to use
> > exec migration with post-copy.
> > 
> > > > If QEMU hangs when the other end doesn't send data back, that actually 
> > > > seems
> > > > like a potentially serious bug in migration code. Even if using the 
> > > > normal
> > > > 'tcp' migration protocol, if the target QEMU server hangs and fails to
> > > > send data to QEMU on the return path, the source QEMU must never hang.
> > > 
> > > Hmm, we shouldn't get a 'hang' with a postcopy on a link without a
> > > return path; but it does depend on how the exec: behaves on the
> > > destination.
> > > If the destination discards data written to it, then I think the
> > > behaviour would be:
> > >a) Page requests would just get dropped, they'd eventually get
> > > fulfilled by the background page transmissions, but that could mean that
> > > a page request would wait for minutes for the page.
> > >b) The qemu main thread on the destination can be blocked by that, so
> > > the monitor might not respond until the page request is fulfilled.
> > >c) I'm not quite sure what would happen to the source return-path
> > > thread
> > > 
> > > The behaviour seems to have changed between 2.9.0 (f26 package) and my
> > > reasonably recent head build.
> > 
> > That's due to the bug we just fixed where we mistakenly didn't
> > allow bi-directional I/O for exec
> > 
> >   commit 062d81f0e968fe1597474735f3ea038065027372
> >   Author: Daniel P. Berrange 
> >   Date:   Fri Apr 21 12:12:20 2017 +0100
> > 
> > migration: setup bi-directional I/O channel for exec: protocol
> > 
> > Historically the migration data channel has only needed to be
> > unidirectional. Thus the 'exec:' protocol was requesting an
> > I/O channel with O_RDONLY on incoming side, and O_WRONLY on
> > the outgoing side.
> > 
> > This is fine for classic migration, but if you then try to run
> > TLS over it, this fails because the TLS handshake requires a
> > bi-directional channel.
> > 
> > Signed-off-by: Daniel P. Berrange 
> > Reviewed-by: Juan Quintela 
> > Signed-off-by: Juan Quintela 
> > 
> > 
> > > A migration_cancel also doesn't work for 'exec' because it doesn't
> > > support shutdown() - it just sticks in 'cancelling'.
> > > On a socket that was broken like this the cancel would work because
> > > it issues a shutdown() which causes the socket to cleanup.
> > 
> > I'm curious why migration_cancel requires shutdown() to work ? Why
> > isn't it sufficient from the source POV to just close the socket
> > entirely straight away.
> 
> close() closes the fd so that any other uses of the fd get an
> error and you're at risk of the fd getting reallocated by something
> else.  So consider if the main thread calls close(), the migration
> thread and the return thread both carry on using that fd, which might
> have been reallocated to something different.  Worse I think we came to the
> consolution that on some OSs a read()/write() blocked in the use of an fd
> didn't get kicked out by a close.

I'd be very surprised if close() didn't terminate any other threads doing
read/write, and even more surprised if it they handed out the same FD
to another thread while something was still in the read.

> shutdown() is safe, in that it stops any other threads accessing the fd
> but doesn't

Re: [Qemu-devel] [RFC PATCH v2 1/4] migration: Introduce unregister_savevm_live()

2017-05-19 Thread Laurent Vivier

On 19/05/2017 09:33, David Gibson wrote:
> On Fri, May 19, 2017 at 11:10:36AM +0530, Bharata B Rao wrote:
>> Introduce a new function unregister_savevm_live() to unregister the vmstate
>> handlers registered via register_savevm_live().
>>
>> register_savevm() allocates SaveVMHandlers while register_savevm_live()
>> gets passed with SaveVMHandlers. During unregistration, we  want to
>> free SaveVMHandlers in the former case but not free in the latter case.
>> Hence this new API is needed to differentiate this.
>>
>> This new API will be needed by PowerPC to unregister the HTAB savevm
>> handlers.
>>
>> Signed-off-by: Bharata B Rao 
> 
> Reviewed-by: David Gibson 
> 
> I could take this through my tree, but it would need an ACK from Dave
> Gilbert or Juan Quintela.

I cc: them for that.

Just a comment on the patch.

Instead of introducing a new function, perhaps we can homogenize the use
of register_savevm() by always providing a SaveVMHandlers pointer and
never a couple of (SaveStateHandler, LoadStateHandler) so the
unregister_save() has never to free se->ops?

Laurent

Re: [Qemu-devel] EL2\EL3 support for Aarch64

2017-05-19 Thread Alex Bennée

Sergey Smolov  writes:

> Hello, List!
>
> Could you tell me, what is the current state for EL2\EL3 (Aarch64)?
> Whether they are fully supported, or not?
> Am I need to use any command line options to enable them for Aarch64
> assembler programs simulation?

Sure you can use both. To enable EL2 support:

  qemu-system-aarch64 ${QEMU_OPTS} \
-machine gic-version=3 \
-machine virtualization=true

To boot in secure mode you'll need to use -bios and boot a secure
firmware (like OPTEE for example).

>
> Thanks in advance!

--
Alex Bennée

Re: [Qemu-devel] [PATCH] block: Tweak error message related to qemu-img convert

2017-05-19 Thread Max Reitz

On 2017-05-08 19:13, Eric Blake wrote:
> When converting a 1.1 image down to 0.10, qemu-iotests 060 forces
> a contrived failure where allocating a cluster used to replace a
> zero cluster reads unaligned data.  Since it is a zero cluster
> rather than a data cluster being converted, changing the error
> message to match our earlier change in 'qcow2: Make distinction
> bewteen zero cluster types obvious' is worthwhile.
> 
> Suggested-by: Max Reitz 
> Signed-off-by: Eric Blake 
> ---
> 
> There's one more instance of "Data cluster offset" in qcow2-cluster.c,
> but that one in handle_copied() is contained inside a
> cluster_type == QCOW2_CLUSTER_NORMAL conditional.
> 
>  block/qcow2-cluster.c  | 3 ++-
>  tests/qemu-iotests/060.out | 2 +-
>  2 files changed, 3 insertions(+), 2 deletions(-)

Assuming no objection means consent:

Thanks, fixed the commit title and applied to my block branch:

https://github.com/XanClic/qemu/commits/block

Max



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH 0/2] linux-user: fix eventfd()

2017-05-19 Thread Riku Voipio

On Tue, Apr 25, 2017 at 06:32:30PM +0200, Laurent Vivier wrote:
> Ping?

Applied, thanks. 

> Laurent
> 
> Le 01/03/2017 à 10:37, Laurent Vivier a écrit :
> > This patch series byte-swap the uint64_t data stream
> > of a file-descriptor opened with eventfd().
> > 
> > It allows to pass more LTP test cases:
> > 
> > eventfd011  TPASS  :  counter value matches required
> > eventfd012  TPASS  :  read failed with EAGAIN as expected
> > eventfd013  TPASS  :  counter value matches required
> > eventfd014  TPASS  :  write failed with EAGAIN as expected
> > eventfd015  TPASS  :  read failed with EINVAL as expected
> > eventfd016  TPASS  :  write failed with EINVAL as expected
> > eventfd017  TPASS  :  write failed with EINVAL as expected
> > eventfd018  TPASS  :  fd is set in readfds
> > eventfd019  TPASS  :  fd is not set in readfds
> > eventfd01   10  TPASS  :  fd is set in writefds
> > eventfd01   11  TPASS  :  fd is not set in writefds
> > eventfd011  TPASS  :  counter value matches required
> > eventfd012  TPASS  :  read failed with EAGAIN as expected
> > eventfd013  TPASS  :  counter value matches required
> > eventfd014  TPASS  :  write failed with EAGAIN as expected
> > eventfd015  TPASS  :  read failed with EINVAL as expected
> > eventfd016  TPASS  :  write failed with EINVAL as expected
> > eventfd017  TPASS  :  write failed with EINVAL as expected
> > eventfd018  TPASS  :  fd is set in readfds
> > eventfd019  TPASS  :  fd is not set in readfds
> > eventfd01   10  TPASS  :  fd is set in writefds
> > eventfd01   11  TPASS  :  fd is not set in writefds
> > eventfd01   12  TPASS  :  counter value write from child successful
> > eventfd01   13  TCONF  :  eventfd01.c:642: eventfd support is not available 
> > in AIO subsystem
> > eventfd01   14  TCONF  :  eventfd01.c:647: eventfd support is not available 
> > in AIO subsystem
> > eventfd01   15  TCONF  :  eventfd01.c:652: eventfd support is not available 
> > in AIO subsystem
> > 
> > Laurent Vivier (2):
> >   linux-user: call fd_trans_target_to_host_data() for write()
> >   linux-user: fix eventfd
> > 
> >  linux-user/syscall.c | 38 +++---
> >  1 file changed, 35 insertions(+), 3 deletions(-)
> > 
>

Re: [Qemu-devel] [PATCH] linux-user: Fix TARGET_MAP* and TARGET_F_??LCK for hppa arch

2017-05-19 Thread Riku Voipio

On Sun, Mar 12, 2017 at 08:17:46AM +1000, Richard Henderson wrote:
> On 03/12/2017 03:50 AM, Helge Deller wrote:
> >TARGET_MAP_TYPE needs to be 0x03 instead of 0x0f on the hppa
> >architecture, otherwise it conflicts with MAP_FIXED which is 0x04.
> >
> >Add missing TARGET_MAP_STACK and TARGET_MAP_HUGETLB values.
> >
> >Fix TARGET_F_RDLCK, TARGET_F_WRLCK and TARGET_F_UNLCK.
> >
> >Signed-off-by: Helge Deller 
> 
> I applied the MAP_FIXED and TARGET_F_* parts separately in my tree.  I'd
> like to see what others think about the other MAP_* defines before including
> that.

What's the current state of these patches? Are these patches still waiting for
opinions?

Riku

Re: [Qemu-devel] [PATCH] linux-user: remove all traces of qemu from /proc/self/cmdline

2017-05-19 Thread Riku Voipio

On Mon, Mar 20, 2017 at 12:31:55PM +0100, Andreas Schwab wrote:
> Instead of post-processing the real contents use the remembered target
> argv.  That removes all traces of qemu, including command line options,
> and handles QEMU_ARGV0.

Applied to Linux-user, thanks

Riku
 
> Signed-off-by: Andreas Schwab 
> ---
>  linux-user/syscall.c | 47 +++
>  1 file changed, 7 insertions(+), 40 deletions(-)
> 
> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> index cec8428589..ec1fd20386 100644
> --- a/linux-user/syscall.c
> +++ b/linux-user/syscall.c
> @@ -7358,52 +7358,19 @@ int host_to_target_waitstatus(int status)
>  
>  static int open_self_cmdline(void *cpu_env, int fd)
>  {
> -int fd_orig = -1;
> -bool word_skipped = false;
> -
> -fd_orig = open("/proc/self/cmdline", O_RDONLY);
> -if (fd_orig < 0) {
> -return fd_orig;
> -}
> +CPUState *cpu = ENV_GET_CPU((CPUArchState *)cpu_env);
> +struct linux_binprm *bprm = ((TaskState *)cpu->opaque)->bprm;
> +int i;
>  
> -while (true) {
> -ssize_t nb_read;
> -char buf[128];
> -char *cp_buf = buf;
> +for (i = 0; i < bprm->argc; i++) {
> +size_t len = strlen(bprm->argv[i]) + 1;
>  
> -nb_read = read(fd_orig, buf, sizeof(buf));
> -if (nb_read < 0) {
> -int e = errno;
> -fd_orig = close(fd_orig);
> -errno = e;
> +if (write(fd, bprm->argv[i], len) != len) {
>  return -1;
> -} else if (nb_read == 0) {
> -break;
> -}
> -
> -if (!word_skipped) {
> -/* Skip the first string, which is the path to qemu-*-static
> -   instead of the actual command. */
> -cp_buf = memchr(buf, 0, nb_read);
> -if (cp_buf) {
> -/* Null byte found, skip one string */
> -cp_buf++;
> -nb_read -= cp_buf - buf;
> -word_skipped = true;
> -}
> -}
> -
> -if (word_skipped) {
> -if (write(fd, cp_buf, nb_read) != nb_read) {
> -int e = errno;
> -close(fd_orig);
> -errno = e;
> -return -1;
> -}
>  }
>  }
>  
> -return close(fd_orig);
> +return 0;
>  }
>  
>  static int open_self_maps(void *cpu_env, int fd)
> -- 
> 2.12.0
> 
> 
> -- 
> Andreas Schwab, SUSE Labs, sch...@suse.de
> GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
> "And now for something completely different."

Re: [Qemu-devel] [PATCH] e1000e: Fix a bug where guest hangs upon migration

2017-05-19 Thread Sameeh Jubran

On Fri, May 19, 2017 at 9:25 AM, Jason Wang  wrote:

>
>
> On 2017年05月17日 19:46, Sameeh Jubran wrote:
>
>> The bug was caused by the "receive overrun" (bit #6 of the ICR register)
>> interrupt
>> which would be triggered post migration in a heavy traffic environment.
>> Even though the
>> "receive overrun" bit (#6) is masked out by the IMS register (refer to
>> the log below)
>> the driver still receives an interrupt as the "receive overrun" bit (#6)
>> causes the
>> "Other" - bit #24 of the ICR register - bit to be set as documented
>> below. The driver
>> handles the interrupt and clears the "Other" bit (#24) but doesn't clear
>> the
>> "receive overrun" bit (#6) which leads to an infinite loop. Apparently
>> the Windows
>> driver expects that the "receive overrun" bit and other ones - documented
>> below - to be
>> cleared when the "Other" bit (#24) is cleared.
>>
>> So to sum that up:
>> 1. Bit #6 of the ICR register is set by heavy traffic
>> 2. As a results of setting bit #6, bit #24 is set
>> 3. The driver receives an interrupt for bit 24 (it doesn't receieve an
>> interrupt for bit #6 as it is masked out by IMS)
>> 4. The driver handles and clears the interrupt of bit #24
>> 5. Bit #6 is still set.
>> 6. 2 happens all over again
>>
>> The Interrupt Cause Read - ICR register:
>>
>> The ICR has the "Other" bit - bit #24 - that is set when one or more of
>> the following
>> ICR register's bits are set:
>>
>> LSC - bit #2, RXO - bit #6, MDAC - bit #9, SRPD - bit #16, ACK - bit #17,
>> MNG - bit #18
>>
>> Log sample of the storm:
>>
>> 27563@1494850819.411877:e1000e_irq_pending_interrupts ICR PENDING:
>> 0x100 (ICR: 0x815000c2, IMS: 0x1a4)
>> 27563@1494850819.411900:e1000e_irq_pending_interrupts ICR PENDING: 0x0
>> (ICR: 0x815000c2, IMS: 0xa4)
>> 27563@1494850819.411915:e1000e_irq_pending_interrupts ICR PENDING: 0x0
>> (ICR: 0x815000c2, IMS: 0xa4)
>> 27563@1494850819.412380:e1000e_irq_pending_interrupts ICR PENDING: 0x0
>> (ICR: 0x815000c2, IMS: 0xa4)
>> 27563@1494850819.412395:e1000e_irq_pending_interrupts ICR PENDING: 0x0
>> (ICR: 0x815000c2, IMS: 0xa4)
>> 27563@1494850819.412436:e1000e_irq_pending_interrupts ICR PENDING: 0x0
>> (ICR: 0x815000c2, IMS: 0xa4)
>> 27563@1494850819.412441:e1000e_irq_pending_interrupts ICR PENDING: 0x0
>> (ICR: 0x815000c2, IMS: 0xa4)
>> 27563@1494850819.412998:e1000e_irq_pending_interrupts ICR PENDING:
>> 0x100 (ICR: 0x815000c2, IMS: 0x1a4)
>>
>> This commit solves:
>> https://bugzilla.redhat.com/show_bug.cgi?id=1447935
>> https://bugzilla.redhat.com/show_bug.cgi?id=1449490
>>
>> Signed-off-by: Sameeh Jubran 
>> ---
>>   hw/net/e1000e_core.c | 7 +--
>>   1 file changed, 5 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
>> index 28c5be1..8174b53 100644
>> --- a/hw/net/e1000e_core.c
>> +++ b/hw/net/e1000e_core.c
>> @@ -2454,14 +2454,17 @@ e1000e_set_ics(E1000ECore *core, int index,
>> uint32_t val)
>>   static void
>>   e1000e_set_icr(E1000ECore *core, int index, uint32_t val)
>>   {
>> +uint32_t icr = 0;
>>   if ((core->mac[ICR] & E1000_ICR_ASSERTED) &&
>>   (core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) {
>>   trace_e1000e_irq_icr_process_iame();
>>   e1000e_clear_ims_bits(core, core->mac[IAM]);
>>   }
>>   -trace_e1000e_irq_icr_write(val, core->mac[ICR], core->mac[ICR] &
>> ~val);
>> -core->mac[ICR] &= ~val;
>> +icr = core->mac[ICR] & ~val;
>> +icr = (val & E1000_ICR_OTHER) ? (icr & ~E1000_ICR_OTHER_CAUSES) :
>> icr;
>> +trace_e1000e_irq_icr_write(val, core->mac[ICR], icr);
>> +core->mac[ICR] = icr;
>>   e1000e_update_interrupt_state(core);
>>   }
>>
>>
>
> Thanks for the patch.
>
> So this is an undocumented behavior, we must be careful on this. Several
> question below:
>
> - have you verified this on real hardware?
>
No I haven't

> - is MSIX enabled in this case?
>
Yes it is, I have tested the patch with msi disabled too.

> - according to the steps you've summed up above, it's not specific to
> migration?
>
True

>
> Thanks
>



-- 
Respectfully,
*Sameeh Jubran*
*Linkedin *
*Software Engineer @ Daynix .*

[Qemu-devel] A problem of IRQchip in QEMU and KVM for ARM

2017-05-19 Thread Li Zhang

Hi,

I am looking into QEMU code in ARM recently and trying to add add_hot_cpu
in QEMU for ARM,
but it doesn't work when enabling KVM. It reports error:

"kvm_init_vcpu failed: Device or resourc busy."

By debugging QEMU with gdb, it failed on ioctl. In kernel soruce code
arch/arm/kvm/arm.c,
vcpu is created by this following function, it will report -EBUSY if
irqchip_in_kernel.

struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
{
int err;
struct kvm_vcpu *vcpu;

if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) {
err = -EBUSY;
goto out;
}

  
}

I set virt machine with kernel_irqchip = off, it can execute cpu-add
interface correctly with qmp-shell
commands. But VMs still can't work well with kernel_irqchip=off when
executing "info cpus" in qemu monitor.

My question is that:
1) Can we change this error status in kvm_arch_vcpu_create?
2) Is it that irqchip_kernel=off  isn't supported with KVM enabled on ARM?

-- 

Best Regards
-Li

Re: [Qemu-devel] Migration downtime more than 5s when migrating guest with massive disks

2017-05-19 Thread Gonglei (Arei)

Oops, forgot to CC qemu-devel, add it.


> -Original Message-
> From: Gonglei (Arei)
> Sent: Friday, May 19, 2017 8:17 PM
> To: 'Paolo Bonzini'; yanghongyang; m...@redhat.com
> Cc: quint...@redhat.com; Dr. David Alan Gilbert; Huangzhichao
> Subject: RE: Migration downtime more than 5s when migrating guest with
> massive disks
> 
> 
> > -Original Message-
> > From: Paolo Bonzini [mailto:pbonz...@redhat.com]
> > Sent: Friday, May 19, 2017 6:19 PM
> >
> > On 19/05/2017 12:00, Yang Hongyang wrote:
> > > We found that migration downtime is unacceptable when migrating guest
> > with
> > > 60 disks, more than 5.5 seconds.
> > > By debugging, we find out the problem is there's too many
> > > memory_region_transaction_commit() operations during guest load, about
> > > 31w+ times.
> > > Any idea to optimize the migration downtime in this scenario?
> > > maybe reduce the times of memory_region_transaction_commit() call, but
> > how?
> > > or we could optimize the time cost of
> memory_region_transaction_commit()
> > call,
> > > but I think that wouldn't help much.
> >
> > It would.  Right now memory_region_transaction_commit() is roughly
> > O(n^2) (n devices * n BARs), and there are n of them.
> >
> > Reducing memory_region_transaction_commit to O(n) would be a large
> > change.  One idea is to share the AddressSpaceDispatch for AddressSpaces
> > that have the same root memory region (after resolving aliases).  The
> > starting point would be to change mem_begin/mem_commit/mem_add from
> a
> > MemoryListener to an loop on the FlatView, storing the
> > AddressSpaceDispatch in the FlatView.
> >
> How about do O(1) for stopping stage of live migration?
> Because the cpu is stopped in this phase, it wouldn't cause
> side effects IMHO, right?
> 
> Thanks,
> -Gonglei
> 
> > One bandaid solution is to use virtio-scsi in the guest, with multiple
> > disks behind one controller.
> >
> > Thanks,
> >
> > Paolo

[Qemu-devel] [PATCH v12 1/2] hw/ppc/spapr_events.c: removing 'exception' from sPAPREventLogEntry

2017-05-19 Thread Daniel Henrique Barboza

Currenty we do not have any RTAS event that is reported by the
event-scan interface. The existing events, RTAS_LOG_TYPE_EPOW and
RTAS_LOG_TYPE_HOTPLUG, are being reported by the check-exception
interface and, as such, marked as 'exception=true'.

Commit 79853e18d9, 'spapr_events: event-scan RTAS interface', added
the event_scan interface because the guest kernel requires it to
initialize other required interfaces. It is acting since then as
a stub because no events that would be reported by it were added
since then. However, the existence of the 'exception' boolean adds
an unnecessary load in the future migration of the pending_events,
sPAPREventLogEntry QTAILQ that hosts the pending RTAS events.

To make the code cleaner and ease the future migration changes, this
patch makes the following changes:

- remove the 'exception' boolean that filter these events. There is
nothing to filter since all events are reported by check-exception;

- functions rtas_event_log_queue, rtas_event_log_dequeue and
rtas_event_log_contains don't receive the 'exception' boolean
as parameter;

- event_scan function was simplified. It was calling
'rtas_event_log_dequeue(mask, false)' that was always returning
'NULL' because we have no events that are created with
exception=false, thus in the end it would execute a jump to
'out_no_events' all the time. The function now assumes that
this will always be the case and all the remaining logic were
deleted.

In the future, when or if we add new RTAS events that should
be reported with the event_scan interface, we can refer to
the changes made in this patch to add the event_scan logic
back.

Signed-off-by: Daniel Henrique Barboza 
---
 hw/ppc/spapr_events.c  | 52 +++---
 include/hw/ppc/spapr.h |  1 -
 2 files changed, 7 insertions(+), 46 deletions(-)

diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index f0b28d8..73e2a18 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -342,20 +342,18 @@ static int rtas_event_log_to_irq(sPAPRMachineState 
*spapr, int log_type)
 return source->irq;
 }
 
-static void rtas_event_log_queue(int log_type, void *data, bool exception)
+static void rtas_event_log_queue(int log_type, void *data)
 {
 sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
 sPAPREventLogEntry *entry = g_new(sPAPREventLogEntry, 1);
 
 g_assert(data);
 entry->log_type = log_type;
-entry->exception = exception;
 entry->data = data;
 QTAILQ_INSERT_TAIL(&spapr->pending_events, entry, next);
 }
 
-static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask,
-  bool exception)
+static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask)
 {
 sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
 sPAPREventLogEntry *entry = NULL;
@@ -364,10 +362,6 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t 
event_mask,
 const sPAPREventSource *source =
 rtas_event_log_to_source(spapr, entry->log_type);
 
-if (entry->exception != exception) {
-continue;
-}
-
 if (source->mask & event_mask) {
 break;
 }
@@ -380,7 +374,7 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t 
event_mask,
 return entry;
 }
 
-static bool rtas_event_log_contains(uint32_t event_mask, bool exception)
+static bool rtas_event_log_contains(uint32_t event_mask)
 {
 sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
 sPAPREventLogEntry *entry = NULL;
@@ -389,10 +383,6 @@ static bool rtas_event_log_contains(uint32_t event_mask, 
bool exception)
 const sPAPREventSource *source =
 rtas_event_log_to_source(spapr, entry->log_type);
 
-if (entry->exception != exception) {
-continue;
-}
-
 if (source->mask & event_mask) {
 return true;
 }
@@ -479,7 +469,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque)
 epow->event_modifier = RTAS_LOG_V6_EPOW_MODIFIER_NORMAL;
 epow->extended_modifier = RTAS_LOG_V6_EPOW_XMODIFIER_PARTITION_SPECIFIC;
 
-rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true);
+rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow);
 
 qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr),
  rtas_event_log_to_irq(spapr,
@@ -572,7 +562,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t 
hp_action,
 cpu_to_be32(drc_id->count_indexed.index);
 }
 
-rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true);
+rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp);
 
 qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr),
  rtas_event_log_to_irq(spapr,
@@ -667,7 +657,7 @@ static void check_exception(PowerPCCPU *cpu, 
sPAPRMachineState *spapr,
 xinfo |= (uint64_t)rtas_ld(args, 6) << 32;
 }
 
-event = rtas_event_log_

[Qemu-devel] [PATCH v12 2/2] migration: spapr: migrate pending_events of spapr state

2017-05-19 Thread Daniel Henrique Barboza

From: Jianjun Duan 

In racing situations between hotplug events and migration operation,
a rtas hotplug event could have not yet be delivered to the source
guest when migration is started. In this case the pending_events of
spapr state need be transmitted to the target so that the hotplug
event can be finished on the target.

All the different fields of the events are encoded as defined by
PAPR. We can migrate them as a binary stream inside VBUFFER without
any concerns about data padding or endianess.

pending_events is put in a subsection in the spapr state VMSD to make
sure migration across different versions is not broken.

Signed-off-by: Jianjun Duan 
Signed-off-by: Daniel Henrique Barboza 
---
 hw/ppc/spapr.c | 32 
 hw/ppc/spapr_events.c  | 12 
 include/hw/ppc/spapr.h |  3 ++-
 3 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 0980d73..5afd328 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1444,6 +1444,37 @@ static bool version_before_3(void *opaque, int 
version_id)
 return version_id < 3;
 }
 
+static bool spapr_pending_events_needed(void *opaque)
+{
+sPAPRMachineState *spapr = (sPAPRMachineState *)opaque;
+return !QTAILQ_EMPTY(&spapr->pending_events);
+}
+
+static const VMStateDescription vmstate_spapr_event_entry = {
+.name = "spapr_event_log_entry",
+.version_id = 1,
+.minimum_version_id = 1,
+.fields = (VMStateField[]) {
+VMSTATE_INT32(log_type, sPAPREventLogEntry),
+VMSTATE_UINT32(data_size, sPAPREventLogEntry),
+VMSTATE_VBUFFER_ALLOC_UINT32(data, sPAPREventLogEntry, 0,
+ NULL, data_size),
+VMSTATE_END_OF_LIST()
+},
+};
+
+static const VMStateDescription vmstate_spapr_pending_events = {
+.name = "spapr_pending_events",
+.version_id = 1,
+.minimum_version_id = 1,
+.needed = spapr_pending_events_needed,
+.fields = (VMStateField[]) {
+VMSTATE_QTAILQ_V(pending_events, sPAPRMachineState, 1,
+ vmstate_spapr_event_entry, sPAPREventLogEntry, next),
+VMSTATE_END_OF_LIST()
+},
+};
+
 static bool spapr_ov5_cas_needed(void *opaque)
 {
 sPAPRMachineState *spapr = opaque;
@@ -1542,6 +1573,7 @@ static const VMStateDescription vmstate_spapr = {
 .subsections = (const VMStateDescription*[]) {
 &vmstate_spapr_ov5_cas,
 &vmstate_spapr_patb_entry,
+&vmstate_spapr_pending_events,
 NULL
 }
 };
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 73e2a18..a509c46 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -350,6 +350,18 @@ static void rtas_event_log_queue(int log_type, void *data)
 g_assert(data);
 entry->log_type = log_type;
 entry->data = data;
+
+switch (log_type) {
+case RTAS_LOG_TYPE_EPOW:
+entry->data_size = sizeof(struct epow_log_full);
+break;
+case RTAS_LOG_TYPE_HOTPLUG:
+entry->data_size = sizeof(struct hp_log_full);
+break;
+default:
+g_assert(false);
+}
+
 QTAILQ_INSERT_TAIL(&spapr->pending_events, entry, next);
 }
 
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 02239a5..0554e11 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -597,8 +597,9 @@ struct sPAPRTCETable {
 sPAPRTCETable *spapr_tce_find_by_liobn(target_ulong liobn);
 
 struct sPAPREventLogEntry {
-int log_type;
+int32_t log_type;
 void *data;
+uint32_t data_size;
 QTAILQ_ENTRY(sPAPREventLogEntry) next;
 };
 
-- 
2.9.4

[Qemu-devel] [PATCH v12 0/2] pseries: migrate pending_events of spapr state

2017-05-19 Thread Daniel Henrique Barboza

NOTE: At the moment I am sending this v12, patch 1 isn't available in
dgibson/ppc-for-2.10 branch yet. I am resending it here, unchanged,
just to allow patch 2 to be applied cleanly.


v12:
- patch 2: added a switch statement to get the proper data_size based on
the log_type

v11:
- patch 1 (new): cleanup of spapr_events.c:
* removed the 'exception' boolean from the sPAPREventLogEntry
* simplified the 'event_scan' function
- patch 2:
* data_size is now calculated inside rtas_event_log_queue()
* using VBUFFER instead of VARRAY to avoid casts
* log_type changed to int32_t

v10: detached from DRC patch set

v9: no changes

v8: no changes

v7: no changes

v6: - Rebased with QEMU master after 6+ months.
class and minor improvements.
- Added clarifications from the previous v5 discussions in the commit 
messages.

v5: - Rebased on David's ppc-for-2.8.

v4: - Rebased on David's ppc-for-2.7. 

v3: - Simplify overall design followng discussion with Paolo. No longer need
  metadata to migrate QTAILQ.
- Extend VMStateInfo instead of adding similar fields to VMStateField.

v2: - Put the newly added migrating fields in subsections so that backward 
  migration is not broken.  
(link: https://lists.nongnu.org/archive/html/qemu-devel/2016-05/msg04188.html)

v1: - Inital version.
(link: https://lists.nongnu.org/archive/html/qemu-devel/2016-04/msg02601.html)


This patch was detached from the patchset:

"[PATCH v9 0/6] migration/ppc: migrating DRC, ccs_list and pending_events"

Because it is independent and has use outside of the scope of the
pseries DRC migration patchset.

Daniel Henrique Barboza (1):
  hw/ppc/spapr_events.c: removing 'exception' from sPAPREventLogEntry

Jianjun Duan (1):
  migration: spapr: migrate pending_events of spapr state

 hw/ppc/spapr.c | 32 +
 hw/ppc/spapr_events.c  | 64 +++---
 include/hw/ppc/spapr.h |  4 ++--
 3 files changed, 53 insertions(+), 47 deletions(-)

-- 
2.9.4

[Qemu-devel] [PATCH] fsdev: fix virtfs-proxy-helper cwd

2017-05-19 Thread Greg Kurz

Since chroot() doesn't change the current directory, it is indeed a good
practice to chdir() to the target directory and then then chroot(), or
to chroot() to the target directory and then chdir("/").

The current code does neither of them actually. Let's go for the latter.

This doesn't fix any security issue since all of this takes place before
the helper begins to process requests.

Signed-off-by: Greg Kurz 
---
 fsdev/virtfs-proxy-helper.c |8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fsdev/virtfs-proxy-helper.c b/fsdev/virtfs-proxy-helper.c
index 54f7ad1c48f0..4c4238f62e53 100644
--- a/fsdev/virtfs-proxy-helper.c
+++ b/fsdev/virtfs-proxy-helper.c
@@ -1129,14 +1129,14 @@ int main(int argc, char **argv)
 }
 }
 
-if (chdir("/") < 0) {
-do_perror("chdir");
-goto error;
-}
 if (chroot(rpath) < 0) {
 do_perror("chroot");
 goto error;
 }
+if (chdir("/") < 0) {
+do_perror("chdir");
+goto error;
+}
 
 get_version = false;
 #ifdef FS_IOC_GETVERSION

Re: [Qemu-devel] [PATCH RFC 1/6] io: only allow return path for socket typed

2017-05-19 Thread Dr. David Alan Gilbert

* Daniel P. Berrange (berra...@redhat.com) wrote:
> On Fri, May 19, 2017 at 02:02:00PM +0100, Dr. David Alan Gilbert wrote:
> > * Daniel P. Berrange (berra...@redhat.com) wrote:
> > > On Fri, May 19, 2017 at 01:51:43PM +0100, Dr. David Alan Gilbert wrote:
> > > > * Daniel P. Berrange (berra...@redhat.com) wrote:
> > > > > On Fri, May 19, 2017 at 02:43:27PM +0800, Peter Xu wrote:
> > > > > > We don't really have a return path for the other types yet. Let's 
> > > > > > check
> > > > > > this when .get_return_path() is called.
> > > > > > 
> > > > > > For this, we introduce a new feature bit, and set it up only for 
> > > > > > socket
> > > > > > typed IO channels.
> > > > > > 
> > > > > > This will help detect earlier failure for postcopy, e.g., logically
> > > > > > speaking postcopy cannot work with "exec:". Before this patch, when 
> > > > > > we
> > > > > > try to migrate with "migrate -d exec:cat>out", we'll hang the 
> > > > > > system.
> > > > > > With this patch, we'll get:
> > > > > > 
> > > > > > (qemu) migrate -d exec:cat>out
> > > > > > Unable to open return-path for postcopy
> > > > > 
> > > > > This is wrong - post-copy migration *can* work with exec: - it just 
> > > > > entirely
> > > > > depends on what command you are running. Your example ran a command 
> > > > > which is
> > > > > unidirectional, but if you ran 'exec:socat ...' you would have a fully
> > > > > bidirectional channel. Actually the channel is always bi-directional, 
> > > > > but
> > > > > 'cat' simply won't ever send data back to QEMU.
> > > > 
> > > > The thing is it didn't used to be able to; prior to your conversion to
> > > > channel, postcopy would reject being started with exec: because it
> > > > couldn't open a return path, so it was safe.
> > > 
> > > It safe but functionally broken because it is valid to want to use
> > > exec migration with post-copy.
> > > 
> > > > > If QEMU hangs when the other end doesn't send data back, that 
> > > > > actually seems
> > > > > like a potentially serious bug in migration code. Even if using the 
> > > > > normal
> > > > > 'tcp' migration protocol, if the target QEMU server hangs and fails to
> > > > > send data to QEMU on the return path, the source QEMU must never hang.
> > > > 
> > > > Hmm, we shouldn't get a 'hang' with a postcopy on a link without a
> > > > return path; but it does depend on how the exec: behaves on the
> > > > destination.
> > > > If the destination discards data written to it, then I think the
> > > > behaviour would be:
> > > >a) Page requests would just get dropped, they'd eventually get
> > > > fulfilled by the background page transmissions, but that could mean that
> > > > a page request would wait for minutes for the page.
> > > >b) The qemu main thread on the destination can be blocked by that, so
> > > > the monitor might not respond until the page request is fulfilled.
> > > >c) I'm not quite sure what would happen to the source return-path
> > > > thread
> > > > 
> > > > The behaviour seems to have changed between 2.9.0 (f26 package) and my
> > > > reasonably recent head build.
> > > 
> > > That's due to the bug we just fixed where we mistakenly didn't
> > > allow bi-directional I/O for exec
> > > 
> > >   commit 062d81f0e968fe1597474735f3ea038065027372
> > >   Author: Daniel P. Berrange 
> > >   Date:   Fri Apr 21 12:12:20 2017 +0100
> > > 
> > > migration: setup bi-directional I/O channel for exec: protocol
> > > 
> > > Historically the migration data channel has only needed to be
> > > unidirectional. Thus the 'exec:' protocol was requesting an
> > > I/O channel with O_RDONLY on incoming side, and O_WRONLY on
> > > the outgoing side.
> > > 
> > > This is fine for classic migration, but if you then try to run
> > > TLS over it, this fails because the TLS handshake requires a
> > > bi-directional channel.
> > > 
> > > Signed-off-by: Daniel P. Berrange 
> > > Reviewed-by: Juan Quintela 
> > > Signed-off-by: Juan Quintela 
> > > 
> > > 
> > > > A migration_cancel also doesn't work for 'exec' because it doesn't
> > > > support shutdown() - it just sticks in 'cancelling'.
> > > > On a socket that was broken like this the cancel would work because
> > > > it issues a shutdown() which causes the socket to cleanup.
> > > 
> > > I'm curious why migration_cancel requires shutdown() to work ? Why
> > > isn't it sufficient from the source POV to just close the socket
> > > entirely straight away.
> > 
> > close() closes the fd so that any other uses of the fd get an
> > error and you're at risk of the fd getting reallocated by something
> > else.  So consider if the main thread calls close(), the migration
> > thread and the return thread both carry on using that fd, which might
> > have been reallocated to something different.  Worse I think we came to the
> > consolution that on some OSs a read()/write() blocked in the use of an fd
> > didn't get kicked out by a close.
> 
> I'd be very

[Qemu-devel] [PATCH 0/2] get rid of qemu_utimens()

2017-05-19 Thread Greg Kurz

It is currently only used by 9pfs and virtfs-proxy-helper. This series convert
them to utimensat() and futimens().

--
Greg

---

Greg Kurz (2):
  9pfs: assume utimensat() and futimens() are present
  util: drop old utimensat() compat code


 configure   |   22 
 fsdev/virtfs-proxy-helper.c |3 ++-
 hw/9pfs/9p-handle.c |5 -
 include/sysemu/os-posix.h   |   11 --
 util/oslib-posix.c  |   47 ---
 5 files changed, 2 insertions(+), 86 deletions(-)

[Qemu-devel] [PATCH 1/2] 9pfs: assume utimensat() and futimens() are present

2017-05-19 Thread Greg Kurz

The utimensat() and futimens() syscalls have been around for ages (ie,
glibc 2.6 and linux 2.6.22), and the decision was already taken to
switch to utimensat() anyway when fixing CVE-2016-9602 in 2.9.

Signed-off-by: Greg Kurz 
---
 fsdev/virtfs-proxy-helper.c |3 ++-
 hw/9pfs/9p-handle.c |5 -
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/fsdev/virtfs-proxy-helper.c b/fsdev/virtfs-proxy-helper.c
index 54f7ad1c48f0..617e19cd0b88 100644
--- a/fsdev/virtfs-proxy-helper.c
+++ b/fsdev/virtfs-proxy-helper.c
@@ -945,7 +945,8 @@ static int process_requests(int sock)
  &spec[0].tv_sec, &spec[0].tv_nsec,
  &spec[1].tv_sec, &spec[1].tv_nsec);
 if (retval > 0) {
-retval = qemu_utimens(path.data, spec);
+retval = utimensat(AT_FDCWD, path.data, spec,
+   AT_SYMLINK_NOFOLLOW);
 if (retval < 0) {
 retval = -errno;
 }
diff --git a/hw/9pfs/9p-handle.c b/hw/9pfs/9p-handle.c
index 1687661bc95a..9875f1894cc5 100644
--- a/hw/9pfs/9p-handle.c
+++ b/hw/9pfs/9p-handle.c
@@ -378,7 +378,6 @@ static int handle_utimensat(FsContext *ctx, V9fsPath 
*fs_path,
 const struct timespec *buf)
 {
 int ret;
-#ifdef CONFIG_UTIMENSAT
 int fd;
 struct handle_data *data = (struct handle_data *)ctx->private;
 
@@ -388,10 +387,6 @@ static int handle_utimensat(FsContext *ctx, V9fsPath 
*fs_path,
 }
 ret = futimens(fd, buf);
 close(fd);
-#else
-ret = -1;
-errno = ENOSYS;
-#endif
 return ret;
 }

Re: [Qemu-devel] [PATCH RFC 1/6] io: only allow return path for socket typed

2017-05-19 Thread Daniel P. Berrange

On Fri, May 19, 2017 at 03:33:12PM +0100, Dr. David Alan Gilbert wrote:
> * Daniel P. Berrange (berra...@redhat.com) wrote:
> > > shutdown() is safe, in that it stops any other threads accessing the fd
> > > but doesn't allow it's reallocation until the close;  We perform the
> > > close only when we've joined all other threads that were using the fd.
> > > Any of the threads that do new calls on the fd get an error and quickly
> > > fall down their error paths.
> > 
> > Ahh that's certainly an interesting scenario. That would certainly be
> > a problem with the migration code when this was originally written.
> > It had two QEMUFile structs each with an 'int fd' field, so when you
> > close 'fd' on one QEMUFile struct, it wouldn't update the other QEMUFile
> > used by another thread.
> > 
> > Since we switched over to use QIOChannel though, I think the thread
> > scenario you describe should be avoided entirely. When you have multiple
> > QEMUFile objects, they each have a reference counted pointer to the same
> > underlying QIOChannel object instance. So when QEMUFile triggers a call
> > to qio_channel_close() in one thread, that'll set fd=-1 in the QIOChannel.
> > Since the other threads have a reference to the same QIOChannel object,
> > they'll now see this fd == -1 straightaway.
> > 
> > So, IIUC, this should make the need for shutdown() redundant (at least
> > for the thread race conditions you describe).
> 
> That's not thread safe unless you're doing some very careful locking.
> Consider:
>   T1  T2   
>  oldfd=fd   tmp=fd
>  fd=-1
>  close(oldfd)
>  unrelated open()
> read(tmp,...
> 
> In practice every use of fd will be a copy into a tmp and then the
> syscall; the unrelated open() could happen in another thread.
> (OK, the gap between the tmp and the read is tiny, although if we're
> doing multiple operations chances are the compiler will optimise
> it to the top of a loop).
> 
> There's no way to make that code safe.

Urgh, yes, I see what you mean.

Currently the QIOChannelCommand implementation, uses a pair of anonymous
pipes for stdin/out to the child process. I wonder if we could switch
that to use socketpair() instead, thus letting us shutdown() on it too.

Though I guess it would be sufficient for qio_channel_shutdown() to
merely kill the child PID, while leaving the FDs open, as then you'd
get EOF and/or EPIPE on the read/writes.

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

[Qemu-devel] [PATCH 2/2] util: drop old utimensat() compat code

2017-05-19 Thread Greg Kurz

Now that 9pfs and virtfs-proxy-helper have been converted to utimensat(),
we don't need to keep qemu_utimens() anymore.

Signed-off-by: Greg Kurz 
---
 configure |   22 -
 include/sysemu/os-posix.h |   11 ---
 util/oslib-posix.c|   47 -
 3 files changed, 80 deletions(-)

diff --git a/configure b/configure
index 139638e922e0..1dea17ed2e73 100755
--- a/configure
+++ b/configure
@@ -3623,25 +3623,6 @@ if compile_prog "" "" ; then
   inotify1=yes
 fi
 
-# check if utimensat and futimens are supported
-utimens=no
-cat > $TMPC << EOF
-#define _ATFILE_SOURCE
-#include 
-#include 
-#include 
-
-int main(void)
-{
-utimensat(AT_FDCWD, "foo", NULL, 0);
-futimens(0, NULL);
-return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  utimens=yes
-fi
-
 # check if pipe2 is there
 pipe2=no
 cat > $TMPC << EOF
@@ -5427,9 +5408,6 @@ fi
 if test "$curses" = "yes" ; then
   echo "CONFIG_CURSES=y" >> $config_host_mak
 fi
-if test "$utimens" = "yes" ; then
-  echo "CONFIG_UTIMENSAT=y" >> $config_host_mak
-fi
 if test "$pipe2" = "yes" ; then
   echo "CONFIG_PIPE2=y" >> $config_host_mak
 fi
diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h
index 900bdcb45ad0..629c8c648b7a 100644
--- a/include/sysemu/os-posix.h
+++ b/include/sysemu/os-posix.h
@@ -51,17 +51,6 @@ int os_mlock(void);
 typedef struct timeval qemu_timeval;
 #define qemu_gettimeofday(tp) gettimeofday(tp, NULL)
 
-#ifndef CONFIG_UTIMENSAT
-#ifndef UTIME_NOW
-# define UTIME_NOW ((1l << 30) - 1l)
-#endif
-#ifndef UTIME_OMIT
-# define UTIME_OMIT((1l << 30) - 2l)
-#endif
-#endif
-typedef struct timespec qemu_timespec;
-int qemu_utimens(const char *path, const qemu_timespec *times);
-
 bool is_daemonized(void);
 
 /**
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 4d9189e9efcf..7e28c161b257 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -207,53 +207,6 @@ int qemu_pipe(int pipefd[2])
 return ret;
 }
 
-int qemu_utimens(const char *path, const struct timespec *times)
-{
-struct timeval tv[2], tv_now;
-struct stat st;
-int i;
-#ifdef CONFIG_UTIMENSAT
-int ret;
-
-ret = utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW);
-if (ret != -1 || errno != ENOSYS) {
-return ret;
-}
-#endif
-/* Fallback: use utimes() instead of utimensat() */
-
-/* happy if special cases */
-if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) {
-return 0;
-}
-if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) {
-return utimes(path, NULL);
-}
-
-/* prepare for hard cases */
-if (times[0].tv_nsec == UTIME_NOW || times[1].tv_nsec == UTIME_NOW) {
-gettimeofday(&tv_now, NULL);
-}
-if (times[0].tv_nsec == UTIME_OMIT || times[1].tv_nsec == UTIME_OMIT) {
-stat(path, &st);
-}
-
-for (i = 0; i < 2; i++) {
-if (times[i].tv_nsec == UTIME_NOW) {
-tv[i].tv_sec = tv_now.tv_sec;
-tv[i].tv_usec = tv_now.tv_usec;
-} else if (times[i].tv_nsec == UTIME_OMIT) {
-tv[i].tv_sec = (i == 0) ? st.st_atime : st.st_mtime;
-tv[i].tv_usec = 0;
-} else {
-tv[i].tv_sec = times[i].tv_sec;
-tv[i].tv_usec = times[i].tv_nsec / 1000;
-}
-}
-
-return utimes(path, &tv[0]);
-}
-
 char *
 qemu_get_local_state_pathname(const char *relative_pathname)
 {

Re: [Qemu-devel] [PATCH 03/10] s390x/css: add vmstate entities for css

2017-05-19 Thread Dr. David Alan Gilbert

* Halil Pasic (pa...@linux.vnet.ibm.com) wrote:
> 
> 
> On 05/15/2017 08:01 PM, Dr. David Alan Gilbert wrote:
> > * Halil Pasic (pa...@linux.vnet.ibm.com) wrote:
> >>
> >>
> >> On 05/08/2017 06:45 PM, Dr. David Alan Gilbert wrote:
> >>> * Halil Pasic (pa...@linux.vnet.ibm.com) wrote:
>  As a preparation for switching to a vmstate based migration let us
>  introduce vmstate entities (e.g. VMStateDescription) for the css entities
>  to be migrated. Alongside some comments explaining or indicating the not
>  migration of certain members are introduced too.
> 
>  No changes in behavior, we just added some dead code -- which should
>  rise to life soon.
> 
>  Signed-off-by: Halil Pasic 
>  ---
>   hw/s390x/css.c | 276 
>  +
>   include/hw/s390x/css.h |  10 +-
>   2 files changed, 285 insertions(+), 1 deletion(-)
> 
>  diff --git a/hw/s390x/css.c b/hw/s390x/css.c
>  index c03bb20..2bda7d0 100644
>  --- a/hw/s390x/css.c
>  +++ b/hw/s390x/css.c
>  @@ -20,29 +20,231 @@
>   #include "hw/s390x/css.h"
>   #include "trace.h"
>   #include "hw/s390x/s390_flic.h"
>  +#include "hw/s390x/s390-virtio-ccw.h"
>   
> >>
> >> [..]
> >>
>  +static int css_get_ind_addr(QEMUFile *f, void *pv, size_t size,
>  +VMStateField *field)
>  +{
>  +int32_t len;
>  +IndAddr **ind_addr = pv;
>  +
>  +len = qemu_get_be32(f);
>  +if (len != 0) {
>  +*ind_addr = get_indicator(qemu_get_be64(f), len);
>  +} else {
>  +qemu_get_be64(f);
>  +*ind_addr = NULL;
>  +}
>  +return 0;
>  +}
>  +
>  +static int css_put_ind_addr(QEMUFile *f, void *pv, size_t size,
>  +VMStateField *field, QJSON *vmdesc)
>  +{
>  +IndAddr *ind_addr = *(IndAddr **) pv;
>  +
>  +if (ind_addr != NULL) {
>  +qemu_put_be32(f, ind_addr->len);
>  +qemu_put_be64(f, ind_addr->addr);
>  +} else {
>  +qemu_put_be32(f, 0);
>  +qemu_put_be64(f, 0UL);
>  +}
>  +return 0;
>  +}
>  +
>  +const VMStateInfo vmstate_info_ind_addr = {
>  +.name = "s390_ind_addr",
>  +.get = css_get_ind_addr,
>  +.put = css_put_ind_addr
>  +};
> >>>
> >>> You should be able to avoid this .get/.put by using VMSTATE_WITH_TMP,
> >>> declare a temporary struct something like:
> >>>   struct tmp_ind_addr {
> >>>  IndAddr *parent;
> >>>  uint32_t  len;
> >>>  uint64_t  addr;
> >>>   }
> >>>
> >>> and then your .get/.put routines turn into pre_save/post_load
> >>> routines to just setup the len/addr.
> >>>
> >>
> >> I don't think this is going to work -- unfortunately! You can see below,
> >> how this IndAddr* migration stuff is supposed to be used:
> >> the client code just uses the VMSTATE_PTR_TO_IND_ADDR macro as a
> >> field when describing state which needs and IndAddr* migrated.
> >>
> >> The problem is, we do not know in what state will this field
> >> be embedded, the pre_save/post_load called by put_tmp/get_tmp
> >> is however copying the pointer to this state into the parent.
> >> So instead of having a pointer to IndAddr* in those functions
> >> and updating it accordingly, I would have to find the IndAddr*
> >> in some arbitrary state (in our case VirtioCcwDevice) first,
> >> and I lack information for that.
> >>
> >> If it's hard to follow I can give you the patch I was debugging
> >> to come to this conclusion. (By the way I ended up with 10
> >> lines of code more than in this version, and although I think
> >> it looks nicer, it's simpler only if one knows how WITH_TMP
> >> works. My plan was to ask you which version do you like more
> >> and go with that before I realized it ain't gonna work.)
> >>
> > 
> > Yes, I see - I've got some similar other cases; the challenge
> > is it's a custom allocator - 'get_indicator' - and it's used
> > as fields in a few places.  Hmm.
> > 
> > 
> 
> The problem can be worked around by wrapping the WITH_TMP into a another
> vmsd and using VMSTATE_STRUCT for describing the field in question. It's
> quite some boilerplate (+16 lines). Should I post the patch here?

Yes please.

> We could also consider making WITH_TMP act as a normal field. 
> Working on the whole state looks like a bit like a corner case:
> we have some stuff adjacent in the migration stream, and we have
> to map it on multiple fields (and vice-versa). Getting the whole
> state with a pointer to a certain field could work via container_of.

You do need to know which field you're working on to be able to safely
use container_of, so I'm not sure how it would work for you in this
case.

The other thought I'd had was that perhaps we could change the temporary
structure in VMSTATE_WITH_TMP to:

  struct foo {
 struct whatever **pa

Re: [Qemu-devel] [PATCH v4 1/2] i386: rewrite way CPUID index is validated

2017-05-19 Thread Kashyap Chamarthy

On Tue, May 09, 2017 at 02:27:35PM +0100, Daniel P. Berrange wrote:
> Change the nested if statements into a flat format, to make
> it clearer what validation / capping is being performed on
> different CPUID index values.
> 
> NB this changes behaviour when "index > env->cpuid_xlevel2".
> This won't have any guest-visible effect because no there is
> no CPUID[0xC001]

Nit: When applying, maybe the maintainer could fix the typo:

"because no there is no" -> "because there is no"

> feature supported by TCG, and KVM code
> will never call cpu_x86_cpuid() with such an index value.
> 
> Reviewed-by: Eduardo Habkost 
> Signed-off-by: Daniel P. Berrange 
> ---
>  target/i386/cpu.c | 35 +++
>  1 file changed, 15 insertions(+), 20 deletions(-)
> 

[...]

-- 
/kashyap

Re: [Qemu-devel] [PATCH 03/10] s390x/css: add vmstate entities for css

2017-05-19 Thread Halil Pasic

On 05/19/2017 04:55 PM, Dr. David Alan Gilbert wrote:
> Dave
> P.S. I'm out for about a week.

Thanks for the info! Could you say something about our 'two
devices two sections vs two devices one section' dilemma
form PATCH 06/10 before leaving? I do not want to be pushy,
but I'm also eager to make progress :).

Have a good whatever it is next week!

Halil

Re: [Qemu-devel] [virtio-dev] Re: [virtio-dev] Re: [PATCH v2 00/16] Vhost-pci for inter-VM communication

2017-05-19 Thread Stefan Hajnoczi

On Fri, May 19, 2017 at 11:10:33AM +0800, Jason Wang wrote:
> On 2017年05月18日 11:03, Wei Wang wrote:
> > On 05/17/2017 02:22 PM, Jason Wang wrote:
> > > On 2017年05月17日 14:16, Jason Wang wrote:
> > > > On 2017年05月16日 15:12, Wei Wang wrote:
> > > > > > Hi:
> > > > > > 
> > > > > > Care to post the driver codes too?
> > > > > > 
> > > > > OK. It may take some time to clean up the driver code before
> > > > > post it out. You can first
> > > > > have a check of the draft at the repo here:
> > > > > https://github.com/wei-w-wang/vhost-pci-driver
> > > > > 
> > > > > Best,
> > > > > Wei
> > > > 
> > > > Interesting, looks like there's one copy on tx side. We used to
> > > > have zerocopy support for tun for VM2VM traffic. Could you
> > > > please try to compare it with your vhost-pci-net by:
> > > > 
> > We can analyze from the whole data path - from VM1's network stack to
> > send packets -> VM2's
> > network stack to receive packets. The number of copies are actually the
> > same for both.
> 
> That's why I'm asking you to compare the performance. The only reason for
> vhost-pci is performance. You should prove it.

There is another reason for vhost-pci besides maximum performance:

vhost-pci makes it possible for end-users to run networking or storage
appliances in compute clouds.  Cloud providers do not allow end-users to
run custom vhost-user processes on the host so you need vhost-pci.

Stefan


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH v2 0/9] QOM'ify work for sparc

2017-05-19 Thread Mark Cave-Ayland

On 29/04/17 11:49, xiaoqiang zhao wrote:

> This patch set aims for QOM'ifying code relate with sparc.
> It is part of my QOM'ify work of qemu code base.
> 
> changes since v1: 
> * rebased on the latest master
> 
> xiaoqiang zhao (9):
>   hw/misc: QOM'ify eccmemctl.c
>   hw/dma: QOM'ify sparc32_dma.c
>   hw/dma: QOM'ify sun4m_iommu.c
>   hw/misc: QOM'ify slavio_misc.c
>   hw/timer: QOM'ify m48txx_sysbus (pass 1)
>   hw/timer: QOM'ify m48txx_sysbus (pass 2)
>   hw/timer: QOM'ify slavio_timer
>   hw/sparc: QOM'ify sun4m.c
>   hw/sparc64: QOM'ify sun4u.c
> 
>  hw/dma/sparc32_dma.c| 25 ++-
>  hw/dma/sun4m_iommu.c| 12 +--
>  hw/misc/eccmemctl.c | 25 ++-
>  hw/misc/slavio_misc.c   | 43 ---
>  hw/sparc/sun4m.c| 54 
> +
>  hw/sparc64/sun4u.c  | 20 +-
>  hw/timer/m48t59.c   | 38 +-
>  hw/timer/slavio_timer.c | 12 +--
>  8 files changed, 105 insertions(+), 124 deletions(-)

I've finally found time to take a look at these and they look good to me.

I'll wait a few days to see if anyone has any comments (particularly
relating to the m48t59 device since that it appears that device is also
used by PReP) but if I hear nothing then I'll apply to my qemu-sparc branch.

And please do accept my apologies that it has taken so long to review
these patches, I will aspire to do much better in future.

ATB,

Mark.

Re: [Qemu-devel] [PULL 00/18] Migration pull request

2017-05-19 Thread Stefan Hajnoczi

On Thu, May 18, 2017 at 07:24:44PM +0200, Juan Quintela wrote:
> Hi
> 
> This include the following series:
> - Fix non-multiple of page size migraition (dave)
> - Remove use of old MigrationParms (a.k.a. now block migration is a 
> capability)
> - Cleanups of headers in migration
> - Make savevm.c target independent
> 
> Please, apply.
> 
> Thanks, Juan.
> 
> The following changes since commit 56821559f0ba682fe6b367815572e6f974d329ab:
> 
>   Merge remote-tracking branch 'dgilbert/tags/pull-hmp-20170517' into staging 
> (2017-05-18 13:36:15 +0100)
> 
> are available in the git repository at:
> 
>   git://github.com/juanquintela/qemu.git tags/migration/20170518
> 
> for you to fetch changes up to 46d702b106d20beda2fcd0f96ddc44855ba262b3:
> 
>   migration: Make savevm.c target independent (2017-05-18 19:21:00 +0200)
> 
> 
> migration/next for 20170518
> 
> 
> Dr. David Alan Gilbert (3):
>   migration: Fix non-multiple of page size migration
>   postcopy: Require RAMBlocks that are whole pages
>   block migration: Allow compile time disable
> 
> Juan Quintela (15):
>   hmp: Use visitor api for hmp_migrate_set_parameter()
>   migration: Create block capability
>   migration: Remove use of old MigrationParams
>   migration: Remove old MigrationParams
>   migration: Create migration/xbzrle.h
>   migration: Split migration/channel.c for channel operations
>   migration: Export qemu-file-channel.c functions in its own file
>   migration: Remove migration.h from colo.h
>   migration: Move qjson.h to migration/
>   migration: Split vmstate-types.c from vmstate.c
>   migration: Remove qemu-file.h from vmstate.h
>   migration: Remove vmstate.h from migration.h
>   migration: migration.h was not needed
>   exec: Create include for target_page_size()
>   migration: Make savevm.c target independent
> 
>  Makefile.target  |   2 +-
>  block/qed.c  |   1 -
>  configure|  11 +
>  exec.c   |  10 +
>  hmp.c|  23 +-
>  hw/i386/pc_q35.c |   1 -
>  hw/virtio/vhost-user.c   |   1 -
>  hw/virtio/vhost-vsock.c  |   1 -
>  hw/virtio/virtio.c   |   1 -
>  include/exec/target_page.h   |  21 +
>  include/hw/hw.h  |   1 +
>  include/migration/block.h|  24 ++
>  include/migration/colo.h |   1 -
>  include/migration/migration.h|  30 +-
>  include/migration/qemu-file.h|   4 -
>  include/migration/vmstate.h  |   4 -
>  include/qemu/typedefs.h  |   1 -
>  include/sysemu/sysemu.h  |   4 +-
>  migration/Makefile.objs  |   6 +-
>  migration/block.c|  19 +-
>  migration/channel.c  |  67 
>  migration/channel.h  |  27 ++
>  migration/colo-comm.c|   4 +-
>  migration/colo.c |   9 +-
>  migration/exec.c |   1 +
>  migration/fd.c   |   1 +
>  migration/migration.c| 149 ---
>  migration/postcopy-ram.c |  18 +-
>  migration/qemu-file-channel.c|   1 +
>  migration/qemu-file-channel.h|  32 ++
>  migration/qjson.c|   2 +-
>  {include/migration => migration}/qjson.h |   0
>  migration/ram.c  |   8 +-
>  migration/rdma.c |   1 +
>  migration/savevm.c   |  40 +-
>  migration/socket.c   |   1 +
>  migration/tls.c  |   1 +
>  migration/vmstate-types.c| 661 
> +++
>  migration/vmstate.c  | 656 +-
>  migration/xbzrle.c   |   2 +-
>  migration/xbzrle.h   |  21 +
>  monitor.c|   1 -
>  qapi-schema.json |  28 +-
>  tests/Makefile.include   |   2 +-
>  tests/test-vmstate.c |   2 +
>  tests/test-xbzrle.c  |   2 +-
>  46 files changed, 1102 insertions(+), 801 deletions(-)
>  create mode 100644 include/exec/target_page.h
>  create mode 100644 migration/channel.c
>  create mode 100644 migration/channel.h
>  create mode 100644 migration/qemu-file-channel.h
>  rename {include/migration => migration}/qjson.h (100%)
>  create mode 100644 migration/vmstate-types.c
>  create mode 100644 migration/xbzrle.h
> 

Thanks, applied to my staging tree:
https://github.com/stefanha/qemu/commits/staging

Stefan


signature.as

Re: [Qemu-devel] [PULL 0/1] ui: egl-headless requires dmabuf support

2017-05-19 Thread Stefan Hajnoczi

On Fri, May 19, 2017 at 10:48:29AM +0200, Gerd Hoffmann wrote:
>   Hi,
> 
> Little single-patch pull request to fix a build issue.
> 
> please pull,
>   Gerd
> 
> The following changes since commit 56821559f0ba682fe6b367815572e6f974d329ab:
> 
>   Merge remote-tracking branch 'dgilbert/tags/pull-hmp-20170517' into staging 
> (2017-05-18 13:36:15 +0100)
> 
> are available in the git repository at:
> 
>   git://git.kraxel.org/qemu tags/pull-ui-20170519-1
> 
> for you to fetch changes up to 371ec54e9f8415cd74af45acdcf67b413f50cce5:
> 
>   ui: egl-headless requires dmabuf support (2017-05-19 10:46:00 +0200)
> 
> 
> ui: egl-headless requires dmabuf support
> 
> 
> Gerd Hoffmann (1):
>   ui: egl-headless requires dmabuf support
> 
>  vl.c | 4 ++--
>  ui/Makefile.objs | 2 +-
>  2 files changed, 3 insertions(+), 3 deletions(-)
> 

Thanks, applied to my staging tree:
https://github.com/stefanha/qemu/commits/staging

Stefan


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PULL 00/20] Misc patches for 2017-05-19

2017-05-19 Thread Stefan Hajnoczi

On Fri, May 19, 2017 at 01:20:52PM +0200, Paolo Bonzini wrote:
> The following changes since commit 56821559f0ba682fe6b367815572e6f974d329ab:
> 
>   Merge remote-tracking branch 'dgilbert/tags/pull-hmp-20170517' into staging 
> (2017-05-18 13:36:15 +0100)
> 
> are available in the git repository at:
> 
> 
>   git://github.com/bonzini/qemu.git tags/for-upstream
> 
> for you to fetch changes up to e10dc0ca6854c4f47cc5e9d47e20c62aa875f518:
> 
>   target/i386: use multiple CPU AddressSpaces (2017-05-19 13:01:32 +0200)
> 
> 
> * virtio-scsi use-after-free fix (Fam)
> * vhost-user-scsi support (Felipe)
> * SMM fixes and improvements for TCG (myself)
> * irqchip and AddressSpaceDispatch cleanups and fixes (Peter)
> * Coverity fix (Stefano)
> * NBD cleanups (Vladimir)
> * RTC accuracy improvements and code cleanups (Guangrong+Yunfang)
> 
> 
> Fam Zheng (1):
>   virtio-scsi: Unset hotplug handler when unrealize
> 
> Felipe Franciosi (2):
>   vhost-user-scsi: Introduce vhost-user-scsi host device
>   vhost-user-scsi: Introduce a vhost-user-scsi sample application
> 
> Paolo Bonzini (2):
>   target/i386: enable A20 automatically in system management mode
>   target/i386: use multiple CPU AddressSpaces
> 
> Peter Xu (4):
>   kvm: irqchip: trace changes on msi add/remove
>   msix: trace control bit write op
>   kvm: irqchip: skip update msi when disabled
>   exec: simplify phys_page_find() params
> 
> Stefano Stabellini (1):
>   Check the return value of fcntl in qemu_set_cloexec
> 
> Tai Yunfang (1):
>   mc146818rtc: precisely count the clock for periodic timer
> 
> Vladimir Sementsov-Ogievskiy (5):
>   nbd: strict nbd_wr_syncv
>   nbd: read_sync and friends: return 0 on success
>   nbd: add errp parameter to nbd_wr_syncv()
>   nbd: add errp to read_sync, write_sync and drop_sync
>   nbd/client.c: use errp instead of LOG
> 
> Xiao Guangrong (4):
>   mc146818rtc: update periodic timer only if it is needed
>   mc146818rtc: ensure LOST_TICK_POLICY_SLEW is only enabled on TARGET_I386
>   mc146818rtc: drop unnecessary '#ifdef TARGET_I386'
>   mc146818rtc: embrace all x86 specific code
> 
>  .gitignore|   1 +
>  Makefile  |   3 +
>  Makefile.objs |   4 +
>  block/nbd-client.c|  11 +-
>  contrib/vhost-user-scsi/Makefile.objs |   1 +
>  contrib/vhost-user-scsi/vhost-user-scsi.c | 886 
> ++
>  default-configs/pci.mak   |   1 +
>  default-configs/s390x-softmmu.mak |   1 +
>  exec.c|  13 +-
>  hw/pci/msix.c |  11 +-
>  hw/pci/trace-events   |   3 +
>  hw/scsi/Makefile.objs |   1 +
>  hw/scsi/vhost-user-scsi.c | 215 
>  hw/scsi/virtio-scsi.c |   3 +
>  hw/timer/mc146818rtc.c| 206 ---
>  hw/virtio/virtio-pci.c|  54 ++
>  hw/virtio/virtio-pci.h|  11 +
>  include/block/nbd.h   |   8 +-
>  include/hw/virtio/vhost-user-scsi.h   |  35 ++
>  include/hw/virtio/virtio-scsi.h   |   3 +
>  kvm-all.c |   4 +-
>  nbd/client.c  | 125 ++---
>  nbd/common.c  |  23 +-
>  nbd/nbd-internal.h|  40 +-
>  nbd/server.c  |  92 ++--
>  qemu-nbd.c|   3 +-
>  target/i386/arch_memory_mapping.c |  18 +-
>  target/i386/cpu.c |  15 +-
>  target/i386/cpu.h |  20 +-
>  target/i386/helper.c  |  96 ++--
>  target/i386/kvm.c |  12 +-
>  target/i386/machine.c |   4 -
>  target/i386/smm_helper.c  |  18 -
>  trace-events  |   3 +-
>  util/oslib-posix.c|   4 +-
>  35 files changed, 1642 insertions(+), 306 deletions(-)
>  create mode 100644 contrib/vhost-user-scsi/Makefile.objs
>  create mode 100644 contrib/vhost-user-scsi/vhost-user-scsi.c
>  create mode 100644 hw/scsi/vhost-user-scsi.c
>  create mode 100644 include/hw/virtio/vhost-user-scsi.h
> -- 
> 1.8.3.1
> 
> 

Thanks, applied to my staging tree:
https://github.com/stefanha/qemu/commits/staging

Stefan


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PULL 00/20] Misc patches for 2017-05-19

2017-05-19 Thread Stefan Hajnoczi

On Fri, May 19, 2017 at 05:41:28AM -0700, no-re...@patchew.org wrote:
> Hi,
> 
> This series seems to have some coding style problems. See output below for
> more information:

Yikes, on second thought I've dropped the pull request for now.

Please look at these coding style violations.

Thanks,
Stefan

> 
> Message-id: 1495192872-27667-1-git-send-email-pbonz...@redhat.com
> Type: series
> Subject: [Qemu-devel] [PULL 00/20] Misc patches for 2017-05-19
> 
> === TEST SCRIPT BEGIN ===
> #!/bin/bash
> 
> BASE=base
> n=1
> total=$(git log --oneline $BASE.. | wc -l)
> failed=0
> 
> git config --local diff.renamelimit 0
> git config --local diff.renames True
> 
> commits="$(git log --format=%H --reverse $BASE..)"
> for c in $commits; do
> echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
> if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; 
> then
> failed=1
> echo
> fi
> n=$((n+1))
> done
> 
> exit $failed
> === TEST SCRIPT END ===
> 
> Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
> Switched to a new branch 'test'
> 6fce4cd target/i386: use multiple CPU AddressSpaces
> a04ba9d target/i386: enable A20 automatically in system management mode
> 87c38d5 vhost-user-scsi: Introduce a vhost-user-scsi sample application
> a63728e vhost-user-scsi: Introduce vhost-user-scsi host device
> bda4194 virtio-scsi: Unset hotplug handler when unrealize
> ca14443 exec: simplify phys_page_find() params
> 7eee4fd nbd/client.c: use errp instead of LOG
> 388beda nbd: add errp to read_sync, write_sync and drop_sync
> 0032273 nbd: add errp parameter to nbd_wr_syncv()
> bdf25c9 nbd: read_sync and friends: return 0 on success
> b61d7d1 nbd: strict nbd_wr_syncv
> cc100d3 Check the return value of fcntl in qemu_set_cloexec
> 94297c6 kvm: irqchip: skip update msi when disabled
> f8f04f1 msix: trace control bit write op
> 11bfe30 kvm: irqchip: trace changes on msi add/remove
> 192c432 mc146818rtc: embrace all x86 specific code
> 6e1b003 mc146818rtc: drop unnecessary '#ifdef TARGET_I386'
> cb9a45b mc146818rtc: ensure LOST_TICK_POLICY_SLEW is only enabled on 
> TARGET_I386
> 98a508b mc146818rtc: precisely count the clock for periodic timer
> b9744f3 mc146818rtc: update periodic timer only if it is needed
> 
> === OUTPUT BEGIN ===
> Checking PATCH 1/20: mc146818rtc: update periodic timer only if it is 
> needed...
> Checking PATCH 2/20: mc146818rtc: precisely count the clock for periodic 
> timer...
> ERROR: braces {} are necessary for all arms of this statement
> #129: FILE: hw/timer/mc146818rtc.c:216:
> +if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
> [...]
> +} else
> [...]
> 
> total: 1 errors, 0 warnings, 181 lines checked
> 
> Your patch has style problems, please review.  If any of these errors
> are false positives report them to the maintainer, see
> CHECKPATCH in MAINTAINERS.
> 
> Checking PATCH 3/20: mc146818rtc: ensure LOST_TICK_POLICY_SLEW is only 
> enabled on TARGET_I386...
> Checking PATCH 4/20: mc146818rtc: drop unnecessary '#ifdef TARGET_I386'...
> Checking PATCH 5/20: mc146818rtc: embrace all x86 specific code...
> Checking PATCH 6/20: kvm: irqchip: trace changes on msi add/remove...
> Checking PATCH 7/20: msix: trace control bit write op...
> Checking PATCH 8/20: kvm: irqchip: skip update msi when disabled...
> Checking PATCH 9/20: Check the return value of fcntl in qemu_set_cloexec...
> Checking PATCH 10/20: nbd: strict nbd_wr_syncv...
> Checking PATCH 11/20: nbd: read_sync and friends: return 0 on success...
> Checking PATCH 12/20: nbd: add errp parameter to nbd_wr_syncv()...
> Checking PATCH 13/20: nbd: add errp to read_sync, write_sync and drop_sync...
> Checking PATCH 14/20: nbd/client.c: use errp instead of LOG...
> ERROR: code indent should never use tabs
> #126: FILE: nbd/client.c:729:
> +^I Error **errp)$
> 
> total: 1 errors, 0 warnings, 146 lines checked
> 
> Your patch has style problems, please review.  If any of these errors
> are false positives report them to the maintainer, see
> CHECKPATCH in MAINTAINERS.
> 
> Checking PATCH 15/20: exec: simplify phys_page_find() params...
> Checking PATCH 16/20: virtio-scsi: Unset hotplug handler when unrealize...
> Checking PATCH 17/20: vhost-user-scsi: Introduce vhost-user-scsi host 
> device...
> ERROR: do not use C99 // comments
> #216: FILE: hw/scsi/vhost-user-scsi.c:145:
> +// Turn on predefined features supported by this device
> 
> ERROR: do not use C99 // comments
> #261: FILE: hw/scsi/vhost-user-scsi.c:190:
> +// Add the bootindex property for this object
> 
> ERROR: do not use C99 // comments
> #265: FILE: hw/scsi/vhost-user-scsi.c:194:
> +// Set boot index according the the device config
> 
> total: 3 errors, 0 warnings, 382 lines checked
> 
> Your patch has style problems, please review.  If any of these errors
> are false positives report them to the maintainer, see
> CHECKPATCH in MAINTAINERS.
> 
> Checking PATCH 18/20: vhost-user-scsi: Introduce a

Re: [Qemu-devel] [PULL 0/3] audio patch queue.

2017-05-19 Thread Stefan Hajnoczi

On Fri, May 19, 2017 at 01:24:12PM +0200, Gerd Hoffmann wrote:
>   Hi,
> 
> Smallish audio patch queue, renaming moving soundhw init code.
> 
> please pull,
>   Gerd
> 
> The following changes since commit 56821559f0ba682fe6b367815572e6f974d329ab:
> 
>   Merge remote-tracking branch 'dgilbert/tags/pull-hmp-20170517' into staging 
> (2017-05-18 13:36:15 +0100)
> 
> are available in the git repository at:
> 
>   git://git.kraxel.org/qemu tags/pull-audio-20170519-1
> 
> for you to fetch changes up to 8a824e4d74213a2da39323304f949c5b4243e1fb:
> 
>   audio: Rename hw/audio/audio.h to hw/audio/soundhw.h (2017-05-19 10:48:54 
> +0200)
> 
> 
> audio: move & rename soundhw init code.
> 
> 
> Eduardo Habkost (3):
>   audio: Move arch_init audio code to hw/audio/soundhw.c
>   audio: Rename audio_init() to soundhw_init()
>   audio: Rename hw/audio/audio.h to hw/audio/soundhw.h
> 
>  include/hw/audio/{audio.h => soundhw.h} |   3 +
>  include/sysemu/arch_init.h  |   2 -
>  arch_init.c | 126 +-
>  hw/audio/ac97.c |   2 +-
>  hw/audio/adlib.c|   2 +-
>  hw/audio/cs4231a.c  |   2 +-
>  hw/audio/es1370.c   |   2 +-
>  hw/audio/gus.c  |   2 +-
>  hw/audio/intel-hda.c|   2 +-
>  hw/audio/pcspk.c|   2 +-
>  hw/audio/sb16.c |   2 +-
>  hw/audio/soundhw.c  | 156 
> 
>  hw/ppc/prep.c   |   3 +-
>  vl.c|   3 +-
>  hw/audio/Makefile.objs  |   2 +
>  15 files changed, 174 insertions(+), 137 deletions(-)
>  rename include/hw/audio/{audio.h => soundhw.h} (81%)
>  create mode 100644 hw/audio/soundhw.c
> 

Coding style violations are pre-existing.  That's fine.

Thanks, applied to my staging tree:
https://github.com/stefanha/qemu/commits/staging

Stefan


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH 03/10] s390x/css: add vmstate entities for css

2017-05-19 Thread Halil Pasic



On 05/19/2017 04:55 PM, Dr. David Alan Gilbert wrote:
> * Halil Pasic (pa...@linux.vnet.ibm.com) wrote:
>>
>>
>> On 05/15/2017 08:01 PM, Dr. David Alan Gilbert wrote:
>>> * Halil Pasic (pa...@linux.vnet.ibm.com) wrote:


 On 05/08/2017 06:45 PM, Dr. David Alan Gilbert wrote:
> * Halil Pasic (pa...@linux.vnet.ibm.com) wrote:
>> As a preparation for switching to a vmstate based migration let us
>> introduce vmstate entities (e.g. VMStateDescription) for the css entities
>> to be migrated. Alongside some comments explaining or indicating the not
>> migration of certain members are introduced too.
>>
>> No changes in behavior, we just added some dead code -- which should
>> rise to life soon.
>>
>> Signed-off-by: Halil Pasic 
>> ---
>>  hw/s390x/css.c | 276 
>> +
>>  include/hw/s390x/css.h |  10 +-
>>  2 files changed, 285 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/s390x/css.c b/hw/s390x/css.c
>> index c03bb20..2bda7d0 100644
>> --- a/hw/s390x/css.c
>> +++ b/hw/s390x/css.c
>> @@ -20,29 +20,231 @@
>>  #include "hw/s390x/css.h"
>>  #include "trace.h"
>>  #include "hw/s390x/s390_flic.h"
>> +#include "hw/s390x/s390-virtio-ccw.h"
>>  

 [..]

>> +static int css_get_ind_addr(QEMUFile *f, void *pv, size_t size,
>> +VMStateField *field)
>> +{
>> +int32_t len;
>> +IndAddr **ind_addr = pv;
>> +
>> +len = qemu_get_be32(f);
>> +if (len != 0) {
>> +*ind_addr = get_indicator(qemu_get_be64(f), len);
>> +} else {
>> +qemu_get_be64(f);
>> +*ind_addr = NULL;
>> +}
>> +return 0;
>> +}
>> +
>> +static int css_put_ind_addr(QEMUFile *f, void *pv, size_t size,
>> +VMStateField *field, QJSON *vmdesc)
>> +{
>> +IndAddr *ind_addr = *(IndAddr **) pv;
>> +
>> +if (ind_addr != NULL) {
>> +qemu_put_be32(f, ind_addr->len);
>> +qemu_put_be64(f, ind_addr->addr);
>> +} else {
>> +qemu_put_be32(f, 0);
>> +qemu_put_be64(f, 0UL);
>> +}
>> +return 0;
>> +}
>> +
>> +const VMStateInfo vmstate_info_ind_addr = {
>> +.name = "s390_ind_addr",
>> +.get = css_get_ind_addr,
>> +.put = css_put_ind_addr
>> +};
>
> You should be able to avoid this .get/.put by using VMSTATE_WITH_TMP,
> declare a temporary struct something like:
>   struct tmp_ind_addr {
>  IndAddr *parent;
>  uint32_t  len;
>  uint64_t  addr;
>   }
>
> and then your .get/.put routines turn into pre_save/post_load
> routines to just setup the len/addr.
>

 I don't think this is going to work -- unfortunately! You can see below,
 how this IndAddr* migration stuff is supposed to be used:
 the client code just uses the VMSTATE_PTR_TO_IND_ADDR macro as a
 field when describing state which needs and IndAddr* migrated.

 The problem is, we do not know in what state will this field
 be embedded, the pre_save/post_load called by put_tmp/get_tmp
 is however copying the pointer to this state into the parent.
 So instead of having a pointer to IndAddr* in those functions
 and updating it accordingly, I would have to find the IndAddr*
 in some arbitrary state (in our case VirtioCcwDevice) first,
 and I lack information for that.

 If it's hard to follow I can give you the patch I was debugging
 to come to this conclusion. (By the way I ended up with 10
 lines of code more than in this version, and although I think
 it looks nicer, it's simpler only if one knows how WITH_TMP
 works. My plan was to ask you which version do you like more
 and go with that before I realized it ain't gonna work.)

>>>
>>> Yes, I see - I've got some similar other cases; the challenge
>>> is it's a custom allocator - 'get_indicator' - and it's used
>>> as fields in a few places.  Hmm.
>>>
>>>
>>
>> The problem can be worked around by wrapping the WITH_TMP into a another
>> vmsd and using VMSTATE_STRUCT for describing the field in question. It's
>> quite some boilerplate (+16 lines). Should I post the patch here?
> 
> Yes please.
> 
8<--

>From a0b6c725b114745c8434f683133995c4e33d936e Mon Sep 17 00:00:00 2001
From: Halil Pasic 
Date: Tue, 9 May 2017 12:06:42 +0200
Subject: [PATCH 1/1] s390x/css: replace info with VMSTATE_WITH_TMP

Convert s VMSatateInfo based solution manipulating the migration stream
directly to VMSTATE_WITH_TMP witch keeps IO and transformation logic
separate.

Signed-off-by: Halil Pasic 
---
 hw/s390x/css.c | 56 --
 include/hw/s390x/css.h |  4

Re: [Qemu-devel] [PULL 00/20] Misc patches for 2017-05-19

2017-05-19 Thread Paolo Bonzini



On 19/05/2017 17:51, Stefan Hajnoczi wrote:
>> This series seems to have some coding style problems. See output below for
>> more information:
> Yikes, on second thought I've dropped the pull request for now.
> 
> Please look at these coding style violations.

These are just a sample program, so I didn't really care much.  But
these three aren't:

Checking PATCH 17/20: vhost-user-scsi: Introduce vhost-user-scsi host
device...
ERROR: do not use C99 // comments
#216: FILE: hw/scsi/vhost-user-scsi.c:145:
+// Turn on predefined features supported by this device

ERROR: do not use C99 // comments
#261: FILE: hw/scsi/vhost-user-scsi.c:190:
+// Add the bootindex property for this object

ERROR: do not use C99 // comments
#265: FILE: hw/scsi/vhost-user-scsi.c:194:
+// Set boot index according the the device config

total: 3 errors, 0 warnings, 382 lines checked

so I guess I'll fix the sample program too.

Paolo



signature.asc
Description: OpenPGP digital signature

[Qemu-devel] [PULL] Update OpenBIOS images

2017-05-19 Thread Mark Cave-Ayland

Hi Stefan,

This update contains the OpenBIOS VGA driver updates required to enable Ben's 
QemuMacDrivers
for Mac guests. Please pull.


ATB,

Mark.


The following changes since commit 56821559f0ba682fe6b367815572e6f974d329ab:

  Merge remote-tracking branch 'dgilbert/tags/pull-hmp-20170517' into staging 
(2017-05-18 13:36:15 +0100)

are available in the git repository at:


  https://github.com/mcayland/qemu.git tags/qemu-openbios-signed

for you to fetch changes up to 415c3824836d3b65a5796a81b17fccd1ad575ff8:

  Update OpenBIOS images to 3ebaaa2 built from submodule. (2017-05-19 16:52:40 
+0100)


Update OpenBIOS images


Mark Cave-Ayland (1):
  Update OpenBIOS images to 3ebaaa2 built from submodule.

 pc-bios/openbios-ppc |  Bin 750840 -> 750840 bytes
 pc-bios/openbios-sparc32 |  Bin 382048 -> 382048 bytes
 pc-bios/openbios-sparc64 |  Bin 1593408 -> 1593408 bytes
 roms/openbios|2 +-
 4 files changed, 1 insertion(+), 1 deletion(-)

Re: [Qemu-devel] [PATCH 03/10] s390x/css: add vmstate entities for css

2017-05-19 Thread Halil Pasic

On 05/19/2017 04:55 PM, Dr. David Alan Gilbert wrote:
>> We could also consider making WITH_TMP act as a normal field. 
>> Working on the whole state looks like a bit like a corner case:
>> we have some stuff adjacent in the migration stream, and we have
>> to map it on multiple fields (and vice-versa). Getting the whole
>> state with a pointer to a certain field could work via container_of.
> You do need to know which field you're working on to be able to safely
> use container_of, so I'm not sure how it would work for you in this
> case.

Well, if you have to write to just one field you are good because you
already have a pointer to that field (.offset was added).

If you need to write to multiple fields in post_load then you just pick
one of the fields you are going to write to (probably the first) and use
container_of to gain access to the whole state. The logic is specific to
the bunch of the fields you are going to touch anyway.

In fact any member of the state struct will do it's only important that
you use the same when creating the VMStateField and when trying to get a
pointer to the parent in pre_save and post_load.

I haven't tried, so I'm not 100% sure, but if you like I can try, and send
you a patch if it's viable. 

I think the key to a good solution is really what is intended and typical
usage, and what is corner case. My patch in the other reply shows that we
can do without changing the ways of VMSTATE_WITH_TMP. I think we can make
what I'm trying to do here a bit prettier at the expense of making what
you are doing in virtio-net a bit uglier, but whether it's a good idea to
do so, I cant tell.

> 
> The other thought I'd had was that perhaps we could change the temporary
> structure in VMSTATE_WITH_TMP to:
> 
>   struct foo {
>  struct whatever **parent;
> 
> so now you could write to *parent in cases like these.
>

Sorry, I do not get your idea. If you have some WIP patch in this
direction I would be happy to provide some feedback.

>> Btw, I would rather call it get_indicator a factory method or even a
>> constructor than an allocator, but I think we understand each-other
>> anyway.
> Yes; I'm not too worried about the actual name as long as it's short
> and obvious.
> 
> I'd thought of 'allocator' since in most cases it's used where the
> load-time code allocates memory for the object being loaded.
> A constructor is normally something I think of as happening after
> allocation; and a factory, hmm maybe.  However, if it does the right
> thing I wouldn't object to any of those names.
> 

I think we are on the same page.

Cheers,
Halil

> Dave

Re: [Qemu-devel] [PATCH 6/6] spec/vhost-user spec: Add IOMMU support

2017-05-19 Thread Michael S. Tsirkin

On Fri, May 19, 2017 at 03:46:36PM +0800, Jason Wang wrote:
> 
> 
> On 2017年05月18日 16:43, Maxime Coquelin wrote:
> > > > 
> > > > +When the VHOST_USER_PROTOCOL_F_SLAVE_REQ is supported by the
> > > > slave, and the
> > > > +master initiated the slave to master communication channel using the
> > > > +VHOST_USER_SET_SLAVE_REQ_FD request, the slave can send IOTLB
> > > > miss and access
> > > > +failure events by sending VHOST_USER_SLAVE_IOTLB_MSG requests
> > > > to the master
> > > > +with a struct vhost_iotlb_msg payload. For miss events, the
> > > > iotlb payload has
> > > > +to be filled with the miss message type (1), the I/O virtual
> > > > address and the
> > > > +permissions flags. For access failure event, the iotlb payload
> > > > has to be
> > > > +filled with the access failure message type (4), the I/O
> > > > virtual address and
> > > > +the permissions flags.
> > > 
> > > I don't think slave should cache invalid entries. If it does not,
> > > how can it detect access failure as opposed to a miss?
> > 
> > Of course, invalid cache entries should not be cached.
> > The VHOST_IOTLB_ACCESS_FAIL has been specified for the Kernel backend,
> > even if the latter does not implement it yet.
> 
> Yes, I leave this for future use e.g reporting copy_to_user() failure to
> userspace.
> 
> Thanks

Interesting. And it's not handled now.
So let's add a text "reserved for reporting internal access
errors in the future. Should not be used for now.".

-- 
MST

Re: [Qemu-devel] [virtio-dev] Re: [virtio-dev] Re: [PATCH v2 00/16] Vhost-pci for inter-VM communication

2017-05-19 Thread Michael S. Tsirkin

On Fri, May 19, 2017 at 11:10:33AM +0800, Jason Wang wrote:
> 
> 
> On 2017年05月18日 11:03, Wei Wang wrote:
> > On 05/17/2017 02:22 PM, Jason Wang wrote:
> > > 
> > > 
> > > On 2017年05月17日 14:16, Jason Wang wrote:
> > > > 
> > > > 
> > > > On 2017年05月16日 15:12, Wei Wang wrote:
> > > > > > > 
> > > > > > 
> > > > > > Hi:
> > > > > > 
> > > > > > Care to post the driver codes too?
> > > > > > 
> > > > > OK. It may take some time to clean up the driver code before
> > > > > post it out. You can first
> > > > > have a check of the draft at the repo here:
> > > > > https://github.com/wei-w-wang/vhost-pci-driver
> > > > > 
> > > > > Best,
> > > > > Wei
> > > > 
> > > > Interesting, looks like there's one copy on tx side. We used to
> > > > have zerocopy support for tun for VM2VM traffic. Could you
> > > > please try to compare it with your vhost-pci-net by:
> > > > 
> > We can analyze from the whole data path - from VM1's network stack to
> > send packets -> VM2's
> > network stack to receive packets. The number of copies are actually the
> > same for both.
> 
> That's why I'm asking you to compare the performance. The only reason for
> vhost-pci is performance. You should prove it.
> 
> > 
> > vhost-pci: 1-copy happen in VM1's driver xmit(), which copes packets
> > from its network stack to VM2's
> > RX ring buffer. (we call it "zerocopy" because there is no intermediate
> > copy between VMs)
> > zerocopy enabled vhost-net: 1-copy happen in tun's recvmsg, which copies
> > packets from VM1's TX ring
> > buffer to VM2's RX ring buffer.
> 
> Actually, there's a major difference here. You do copy in guest which
> consumes time slice of vcpu thread on host. Vhost_net do this in its own
> thread. So I feel vhost_net is even faster here, maybe I was wrong.

Yes but only if you have enough CPUs. The point of vhost-pci
is to put the switch in a VM and scale better with # of VMs.

> > 
> > That being said, we compared to vhost-user, instead of vhost_net,
> > because vhost-user is the one
> > that is used in NFV, which we think is a major use case for vhost-pci.
> 
> If this is true, why not draft a pmd driver instead of a kernel one? And do
> you use virtio-net kernel driver to compare the performance? If yes, has OVS
> dpdk optimized for kernel driver (I think not)?
> 
> What's more important, if vhost-pci is faster, I think its kernel driver
> should be also faster than virtio-net, no?

If you have a vhost CPU per VCPU and can give a host CPU to each using
that will be faster.  But not everyone has so many host CPUs.


> > 
> > 
> > > > - make sure zerocopy is enabled for vhost_net
> > > > - comment skb_orphan_frags() in tun_net_xmit()
> > > > 
> > > > Thanks
> > > > 
> > > 
> > > You can even enable tx batching for tun by ethtool -C tap0 rx-frames
> > > N. This will greatly improve the performance according to my test.
> > > 
> > 
> > Thanks, but would this hurt latency?
> > 
> > Best,
> > Wei
> 
> I don't see this in my test.
> 
> Thanks

Re: [Qemu-devel] [PATCH v4 10/10] vhost: iommu: cache static mapping if there is

2017-05-19 Thread Michael S. Tsirkin

On Fri, May 19, 2017 at 11:19:49AM +0800, Peter Xu wrote:
> This patch pre-heat vhost iotlb cache when passthrough mode enabled.
> 
> Sometimes, even if user specified iommu_platform for vhost devices,
> IOMMU might still be disabled. One case is passthrough mode in VT-d
> implementation. We can detect this by observing iommu_list. If it's
> empty, it means IOMMU translation is disabled, then we can actually
> pre-heat the translation (it'll be static mapping then) by first
> invalidating all IOTLB, then cache existing memory ranges into vhost
> backend iotlb using 1:1 mapping.
> 
> Signed-off-by: Peter Xu 

I don't really understand. Is this a performance optimization?
Can you post some #s please?

Also, if it's PT, can't we bypass iommu altogether? That would be
even faster ...

> ---
>  hw/virtio/trace-events |  4 
>  hw/virtio/vhost.c  | 49 +
>  2 files changed, 53 insertions(+)
> 
> diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
> index 1f7a7c1..54dcbb3 100644
> --- a/hw/virtio/trace-events
> +++ b/hw/virtio/trace-events
> @@ -24,3 +24,7 @@ virtio_balloon_handle_output(const char *name, uint64_t 
> gpa) "section name: %s g
>  virtio_balloon_get_config(uint32_t num_pages, uint32_t actual) "num_pages: 
> %d actual: %d"
>  virtio_balloon_set_config(uint32_t actual, uint32_t oldactual) "actual: %d 
> oldactual: %d"
>  virtio_balloon_to_target(uint64_t target, uint32_t num_pages) "balloon 
> target: %"PRIx64" num_pages: %d"
> +
> +# hw/virtio/vhost.c
> +vhost_iommu_commit(void) ""
> +vhost_iommu_static_preheat(void) ""
> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> index 03a46a7..8069135 100644
> --- a/hw/virtio/vhost.c
> +++ b/hw/virtio/vhost.c
> @@ -27,6 +27,7 @@
>  #include "hw/virtio/virtio-access.h"
>  #include "migration/blocker.h"
>  #include "sysemu/dma.h"
> +#include "trace.h"
>  
>  /* enabled until disconnected backend stabilizes */
>  #define _VHOST_DEBUG 1
> @@ -730,6 +731,11 @@ static void vhost_iommu_unmap_notify(IOMMUNotifier *n, 
> IOMMUTLBEntry *iotlb)
>  }
>  }
>  
> +static bool vhost_iommu_mr_enabled(struct vhost_dev *dev)
> +{
> +return !QLIST_EMPTY(&dev->iommu_list);
> +}
> +
>  static void vhost_iommu_region_add(MemoryListener *listener,
> MemoryRegionSection *section)
>  {
> @@ -782,6 +788,48 @@ static void vhost_iommu_region_del(MemoryListener 
> *listener,
>  }
>  }
>  
> +static void vhost_iommu_commit(MemoryListener *listener)
> +{
> +struct vhost_dev *dev = container_of(listener, struct vhost_dev,
> + iommu_listener);
> +struct vhost_memory_region *r;
> +int i;
> +
> +trace_vhost_iommu_commit();
> +
> +if (!vhost_iommu_mr_enabled(dev)) {
> +/*
> +* This means iommu_platform is enabled, however iommu memory
> +* region is disabled, e.g., when device passthrough is setup.
> +* Then, no translation is needed any more.
> +*
> +* Let's first invalidate the whole IOTLB, then pre-heat the
> +* static mapping by looping over vhost memory ranges.
> +*/
> +
> +if (dev->vhost_ops->vhost_invalidate_device_iotlb(dev, 0,
> +  UINT64_MAX)) {
> +error_report("%s: flush existing IOTLB failed", __func__);
> +return;
> +}
> +
> +for (i = 0; i < dev->mem->nregions; i++) {
> +r = &dev->mem->regions[i];
> +/* Vhost regions are writable RAM, so IOMMU_RW suites. */
> +if (dev->vhost_ops->vhost_update_device_iotlb(dev,
> +  r->guest_phys_addr,
> +  r->userspace_addr,
> +  r->memory_size,
> +  IOMMU_RW)) {
> +error_report("%s: pre-heat static mapping failed", __func__);
> +return;
> +}
> +}
> +
> +trace_vhost_iommu_static_preheat();
> +}
> +}
> +
>  static void vhost_region_nop(MemoryListener *listener,
>   MemoryRegionSection *section)
>  {
> @@ -1298,6 +1346,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
>  hdev->iommu_listener = (MemoryListener) {
>  .region_add = vhost_iommu_region_add,
>  .region_del = vhost_iommu_region_del,
> +.commit = vhost_iommu_commit,
>  };
>  
>  if (hdev->migration_blocker == NULL) {
> -- 
> 2.7.4

Re: [Qemu-devel] [PATCH 06/10] virtio-ccw: use vmstate way for config migration

2017-05-19 Thread Dr. David Alan Gilbert

* Halil Pasic (pa...@linux.vnet.ibm.com) wrote:
> 
> 
> On 05/15/2017 09:07 PM, Dr. David Alan Gilbert wrote:
> > * Halil Pasic (pa...@linux.vnet.ibm.com) wrote:
> >>
> >>
> >> On 05/08/2017 08:42 PM, Dr. David Alan Gilbert wrote:
> >>> * Halil Pasic (pa...@linux.vnet.ibm.com) wrote:
> 
> 
>  On 05/08/2017 07:59 PM, Dr. David Alan Gilbert wrote:
> [..]
> 
>  Why not use virtio oddities? Because they are oddities. I have
>  figured, it's a good idea to separate the migration of the 
>  proxy form the rest: we have two QEMU Device objects and it
>  should be good practice, that these are migrating themselves via
>  DeviceClass.vmsd. That's what I get with this patch set, 
>  for new machine versions (since we can not fix the past), and
>  with the notable difference of config_vector, because it is
>  defined as a common infrastructure (struct VirtIODevice) but
>  ain't migrated as a common virtio infrastructure.
> >>>
> >>> Have you got a bit of a description of your classes/structure - it's
> >>> a little hard to get my head around.
> >>>
> >>
> >> Unfortunately I do not have any extra description besides the comments
> >> and the commit messages. What exactly do you mean  by 'my
> >> classes/structure'?  I would like to provide some helpful developer
> >> documentation on how migration works for s390x. There were voices on the
> >> internal mailing list too requesting something like that, but I find it
> >> hard, because for me, the most challenging part was understanding how
> >> qemu migration works in general and the virtio oddities come next. 
> > 
> > Yes, there are only about 2 people who have the overlap of understanding
> > migration AND s390 IO.
> > 
> >> Fore example, I still don't understand why is is (virtio) load_config
> >> called like that, when what it mainly does is loading state of the proxy
> >> which is basically the reification of the device side of the virtio spec
> >> calls the transport within QOM. (I say mainly, because of this
> >> config_vector which resides in core but is migrated by via a callback for
> >> some strange reason I do not understand).
> > 
> > I think the idea is that virtio_load is trying to act as a generic
> > save/load with a number of virtual components that are specialised for:
> >   a) The device (e.g. rng, serial, gpu, net, blk)
> >   b) The transport (PCI, MMIO, CCW etc)
> >   c) The virtio queue content
> >   d) But has a load of core stuff (features, the virtio ring management)
> > 
> > (a) & (b) are very much virtual-function like that doesn't fit that
> > well with the migration macro structure.
> > 
> > The split between (a) & (c) isn't necessary clean - gpu does it a
> > different way.
> > And the order of a/b/c/d is very random (aka wrong).
> > 
> 
> I mostly agree with your analysis. Honestly I have forgot abut this
> load_queue callback (I think its c)), but it's a strange one too. What it
> does is handling the vector of the queue which is again common
> infrastructure in a sense that it reside within VirtIODevice, but it may
> need some proxy specific handling.
> 
> In my understanding the virtio migration and the migration subsystem
> (lets call it vmstate) are a misfit in the following aspect. Most
> importantly it separation of concerns. In my understanding, for vmstate,
> each device is supposed to load/save itself, and loading state and doing
> stuff with the state we have loaded are separate concerns. I'm not sure
> whats the vmstate place for code which is supposed to run as a part of
> the migration logic, but requires cooperation of devices (e.g. notify in
> virtio_load which basically generates an interrupt). 
> 
> 
> >> Could tell me to which (specific) questions should I provide an answer?
> >> It would make my job much easier.
> >>
> >> About the general approach. First step was to provide VMStateDescription
> >> for the entities which have migration relevant state but no
> >> VMStateDescription (patches 3, 4 and 5).  This is done so that
> >> lots of qemu_put/qem_get calls can be replaced with few
> >> vmstate_save_state/vmstate_save_state calls (patch 6 and 7) on one hand,
> >> and that state not migrated yet but needed is also included, if the
> >> compat. switch (property) added in patch 2 is on. Then in patch 8, I add
> >> ORB which is a state we wanted to add for some time now, but we needed
> >> vmstate to add it without breaking migration. So we waited.
> > 
> > I'm most interested at this point in understanding which bits aren't
> > changing behaviour - if we've got stuff that's just converting qemu_get
> > to vmstate then lets go for it, no problem; easy to check.
> 
> The commit messages should be helpful. Up to patch 8 all I do is
> converting qemu_get to vmstate as you said. 
> 
> > I'm just trying to make sure I understand the bit where you're
> > converting from being a virtio device.
> > 
> 
> By converting from being a virtio device you mean factoring out the
> tra

Re: [Qemu-devel] [PATCH v12 2/2] migration: spapr: migrate pending_events of spapr state

2017-05-19 Thread Michael Roth

Quoting Daniel Henrique Barboza (2017-05-19 09:27:50)
> From: Jianjun Duan 
> 
> In racing situations between hotplug events and migration operation,
> a rtas hotplug event could have not yet be delivered to the source
> guest when migration is started. In this case the pending_events of
> spapr state need be transmitted to the target so that the hotplug
> event can be finished on the target.
> 
> All the different fields of the events are encoded as defined by
> PAPR. We can migrate them as a binary stream inside VBUFFER without
> any concerns about data padding or endianess.
> 
> pending_events is put in a subsection in the spapr state VMSD to make
> sure migration across different versions is not broken.
> 
> Signed-off-by: Jianjun Duan 
> Signed-off-by: Daniel Henrique Barboza 

Reviewed-by: Michael Roth 

> ---
>  hw/ppc/spapr.c | 32 
>  hw/ppc/spapr_events.c  | 12 
>  include/hw/ppc/spapr.h |  3 ++-
>  3 files changed, 46 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 0980d73..5afd328 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1444,6 +1444,37 @@ static bool version_before_3(void *opaque, int 
> version_id)
>  return version_id < 3;
>  }
> 
> +static bool spapr_pending_events_needed(void *opaque)
> +{
> +sPAPRMachineState *spapr = (sPAPRMachineState *)opaque;
> +return !QTAILQ_EMPTY(&spapr->pending_events);
> +}
> +
> +static const VMStateDescription vmstate_spapr_event_entry = {
> +.name = "spapr_event_log_entry",
> +.version_id = 1,
> +.minimum_version_id = 1,
> +.fields = (VMStateField[]) {
> +VMSTATE_INT32(log_type, sPAPREventLogEntry),
> +VMSTATE_UINT32(data_size, sPAPREventLogEntry),
> +VMSTATE_VBUFFER_ALLOC_UINT32(data, sPAPREventLogEntry, 0,
> + NULL, data_size),
> +VMSTATE_END_OF_LIST()
> +},
> +};
> +
> +static const VMStateDescription vmstate_spapr_pending_events = {
> +.name = "spapr_pending_events",
> +.version_id = 1,
> +.minimum_version_id = 1,
> +.needed = spapr_pending_events_needed,
> +.fields = (VMStateField[]) {
> +VMSTATE_QTAILQ_V(pending_events, sPAPRMachineState, 1,
> + vmstate_spapr_event_entry, sPAPREventLogEntry, 
> next),
> +VMSTATE_END_OF_LIST()
> +},
> +};
> +
>  static bool spapr_ov5_cas_needed(void *opaque)
>  {
>  sPAPRMachineState *spapr = opaque;
> @@ -1542,6 +1573,7 @@ static const VMStateDescription vmstate_spapr = {
>  .subsections = (const VMStateDescription*[]) {
>  &vmstate_spapr_ov5_cas,
>  &vmstate_spapr_patb_entry,
> +&vmstate_spapr_pending_events,
>  NULL
>  }
>  };
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index 73e2a18..a509c46 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -350,6 +350,18 @@ static void rtas_event_log_queue(int log_type, void 
> *data)
>  g_assert(data);
>  entry->log_type = log_type;
>  entry->data = data;
> +
> +switch (log_type) {
> +case RTAS_LOG_TYPE_EPOW:
> +entry->data_size = sizeof(struct epow_log_full);
> +break;
> +case RTAS_LOG_TYPE_HOTPLUG:
> +entry->data_size = sizeof(struct hp_log_full);
> +break;
> +default:
> +g_assert(false);
> +}
> +
>  QTAILQ_INSERT_TAIL(&spapr->pending_events, entry, next);
>  }
> 
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 02239a5..0554e11 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -597,8 +597,9 @@ struct sPAPRTCETable {
>  sPAPRTCETable *spapr_tce_find_by_liobn(target_ulong liobn);
> 
>  struct sPAPREventLogEntry {
> -int log_type;
> +int32_t log_type;
>  void *data;
> +uint32_t data_size;
>  QTAILQ_ENTRY(sPAPREventLogEntry) next;
>  };
> 
> -- 
> 2.9.4
>

Re: [Qemu-devel] [PATCH 03/10] s390x/css: add vmstate entities for css

2017-05-19 Thread Dr. David Alan Gilbert

* Halil Pasic (pa...@linux.vnet.ibm.com) wrote:
> 
> 
> On 05/19/2017 04:55 PM, Dr. David Alan Gilbert wrote:
> > * Halil Pasic (pa...@linux.vnet.ibm.com) wrote:
> >>
> >>
> >> On 05/15/2017 08:01 PM, Dr. David Alan Gilbert wrote:
> >>> * Halil Pasic (pa...@linux.vnet.ibm.com) wrote:
> 
> 
>  On 05/08/2017 06:45 PM, Dr. David Alan Gilbert wrote:
> > * Halil Pasic (pa...@linux.vnet.ibm.com) wrote:
> >> As a preparation for switching to a vmstate based migration let us
> >> introduce vmstate entities (e.g. VMStateDescription) for the css 
> >> entities
> >> to be migrated. Alongside some comments explaining or indicating the 
> >> not
> >> migration of certain members are introduced too.
> >>
> >> No changes in behavior, we just added some dead code -- which should
> >> rise to life soon.
> >>
> >> Signed-off-by: Halil Pasic 
> >> ---
> >>  hw/s390x/css.c | 276 
> >> +
> >>  include/hw/s390x/css.h |  10 +-
> >>  2 files changed, 285 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/hw/s390x/css.c b/hw/s390x/css.c
> >> index c03bb20..2bda7d0 100644
> >> --- a/hw/s390x/css.c
> >> +++ b/hw/s390x/css.c
> >> @@ -20,29 +20,231 @@
> >>  #include "hw/s390x/css.h"
> >>  #include "trace.h"
> >>  #include "hw/s390x/s390_flic.h"
> >> +#include "hw/s390x/s390-virtio-ccw.h"
> >>  
> 
>  [..]
> 
> >> +static int css_get_ind_addr(QEMUFile *f, void *pv, size_t size,
> >> +VMStateField *field)
> >> +{
> >> +int32_t len;
> >> +IndAddr **ind_addr = pv;
> >> +
> >> +len = qemu_get_be32(f);
> >> +if (len != 0) {
> >> +*ind_addr = get_indicator(qemu_get_be64(f), len);
> >> +} else {
> >> +qemu_get_be64(f);
> >> +*ind_addr = NULL;
> >> +}
> >> +return 0;
> >> +}
> >> +
> >> +static int css_put_ind_addr(QEMUFile *f, void *pv, size_t size,
> >> +VMStateField *field, QJSON *vmdesc)
> >> +{
> >> +IndAddr *ind_addr = *(IndAddr **) pv;
> >> +
> >> +if (ind_addr != NULL) {
> >> +qemu_put_be32(f, ind_addr->len);
> >> +qemu_put_be64(f, ind_addr->addr);
> >> +} else {
> >> +qemu_put_be32(f, 0);
> >> +qemu_put_be64(f, 0UL);
> >> +}
> >> +return 0;
> >> +}
> >> +
> >> +const VMStateInfo vmstate_info_ind_addr = {
> >> +.name = "s390_ind_addr",
> >> +.get = css_get_ind_addr,
> >> +.put = css_put_ind_addr
> >> +};
> >
> > You should be able to avoid this .get/.put by using VMSTATE_WITH_TMP,
> > declare a temporary struct something like:
> >   struct tmp_ind_addr {
> >  IndAddr *parent;
> >  uint32_t  len;
> >  uint64_t  addr;
> >   }
> >
> > and then your .get/.put routines turn into pre_save/post_load
> > routines to just setup the len/addr.
> >
> 
>  I don't think this is going to work -- unfortunately! You can see below,
>  how this IndAddr* migration stuff is supposed to be used:
>  the client code just uses the VMSTATE_PTR_TO_IND_ADDR macro as a
>  field when describing state which needs and IndAddr* migrated.
> 
>  The problem is, we do not know in what state will this field
>  be embedded, the pre_save/post_load called by put_tmp/get_tmp
>  is however copying the pointer to this state into the parent.
>  So instead of having a pointer to IndAddr* in those functions
>  and updating it accordingly, I would have to find the IndAddr*
>  in some arbitrary state (in our case VirtioCcwDevice) first,
>  and I lack information for that.
> 
>  If it's hard to follow I can give you the patch I was debugging
>  to come to this conclusion. (By the way I ended up with 10
>  lines of code more than in this version, and although I think
>  it looks nicer, it's simpler only if one knows how WITH_TMP
>  works. My plan was to ask you which version do you like more
>  and go with that before I realized it ain't gonna work.)
> 
> >>>
> >>> Yes, I see - I've got some similar other cases; the challenge
> >>> is it's a custom allocator - 'get_indicator' - and it's used
> >>> as fields in a few places.  Hmm.
> >>>
> >>>
> >>
> >> The problem can be worked around by wrapping the WITH_TMP into a another
> >> vmsd and using VMSTATE_STRUCT for describing the field in question. It's
> >> quite some boilerplate (+16 lines). Should I post the patch here?
> > 
> > Yes please.
> > 
> 8<--
> 
> From a0b6c725b114745c8434f683133995c4e33d936e Mon Sep 17 00:00:00 2001
> From: Halil Pasic 
> Date: Tue, 9 May 2017 12:06:42 +0200
> Subject: [PATCH 1/1] s390x/css: repla

Re: [Qemu-devel] [PATCH 03/10] s390x/css: add vmstate entities for css

2017-05-19 Thread Dr. David Alan Gilbert

* Halil Pasic (pa...@linux.vnet.ibm.com) wrote:
> 
> 
> On 05/19/2017 04:55 PM, Dr. David Alan Gilbert wrote:
> >> We could also consider making WITH_TMP act as a normal field. 
> >> Working on the whole state looks like a bit like a corner case:
> >> we have some stuff adjacent in the migration stream, and we have
> >> to map it on multiple fields (and vice-versa). Getting the whole
> >> state with a pointer to a certain field could work via container_of.
> > You do need to know which field you're working on to be able to safely
> > use container_of, so I'm not sure how it would work for you in this
> > case.
> 
> 
> Well, if you have to write to just one field you are good because you
> already have a pointer to that field (.offset was added).
> 
> If you need to write to multiple fields in post_load then you just pick
> one of the fields you are going to write to (probably the first) and use
> container_of to gain access to the whole state. The logic is specific to
> the bunch of the fields you are going to touch anyway.
> 
> In fact any member of the state struct will do it's only important that
> you use the same when creating the VMStateField and when trying to get a
> pointer to the parent in pre_save and post_load.
> 
> I haven't tried, so I'm not 100% sure, but if you like I can try, and send
> you a patch if it's viable. 
> 
> I think the key to a good solution is really what is intended and typical
> usage, and what is corner case. My patch in the other reply shows that we
> can do without changing the ways of VMSTATE_WITH_TMP. I think we can make
> what I'm trying to do here a bit prettier at the expense of making what
> you are doing in virtio-net a bit uglier, but whether it's a good idea to
> do so, I cant tell.

Lets go with what you put in the other patch (I replied to it); I hadn't
realised that was possible (hence my comment below).
Once we have a bunch of different uses of VMSTATE_WITH_TMP in the code
base, I'll step back and see how to tidy them up.

Dave

> > 
> > The other thought I'd had was that perhaps we could change the temporary
> > structure in VMSTATE_WITH_TMP to:
> > 
> >   struct foo {
> >  struct whatever **parent;
> > 
> > so now you could write to *parent in cases like these.
> >
> 
> Sorry, I do not get your idea. If you have some WIP patch in this
> direction I would be happy to provide some feedback.
> 
>  
> >> Btw, I would rather call it get_indicator a factory method or even a
> >> constructor than an allocator, but I think we understand each-other
> >> anyway.
> > Yes; I'm not too worried about the actual name as long as it's short
> > and obvious.
> > 
> > I'd thought of 'allocator' since in most cases it's used where the
> > load-time code allocates memory for the object being loaded.
> > A constructor is normally something I think of as happening after
> > allocation; and a factory, hmm maybe.  However, if it does the right
> > thing I wouldn't object to any of those names.
> > 
> 
> I think we are on the same page.
> 
> Cheers,
> Halil
> 
> > Dave
> 
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [Qemu-devel] [PATCH 06/10] virtio-ccw: use vmstate way for config migration

2017-05-19 Thread Halil Pasic

On 05/19/2017 07:28 PM, Dr. David Alan Gilbert wrote:
>> To sum it up although I'm currently leaning towards abandoning my idea
>> of two sections for two devices, I'm not comfortable with making the
>> call myself. I'm hoping for some maintainer guidance (s390x, virtio
>> and migration). 
>  dhildenb etc>
> 
> OK, so I think:
>   a) First split the series into two separate series; one that
> VMStatifies the existing stuff without breaking compatibility;
> and one that adds the new stuff.  Lets get the first of those
> going in while we think about the second.
> 
> a.1) I'd do this with patches that convert one chunk into
>  vmstate and remove the corresponding C code at the same time.
> 
>   b) While the way PCI devices are done is weird, I think it'll
> be a lot simpler if you can stick to a structure that's similar
> to them while diverging.  It's hard enough for those of us
> who don't do Virtio every day to get our minds around virtio-pci
> without it being different for virtio-ccw/css.
> 
>   c) To do (b) I suggest:
>   c.1) you *don't* add a vmsd to your virtio_ccw_device
> 
>   c.2) but *do* add a VMSTATE_CCW_DEVICE to the start of any
>  non-virtio devices you migrate (like each of the PCI devices
>   have)
> 
>   c.3) You can add extra state for CSS to the ->save_extra_state
>handle on virtio devices or to the config
> 
>   d) vmstatifying the config is OK as well.
> 
> I should say I'm no virtio expert, so if any of that's truly
> mad say so.
> 
> Dave
> 

Agreed (a,b,c,d). Reorganizing my patch set according to a) is
going to require some effort, but it should not be too bad. 
About c.2): I don't think we have any migratable non virtio devices
yet, but I'll keep it in mind.

I do not understand what you mean by c.3) and extra_sate. Maybe
it will settle with time. My idea of extending VirtioCcwDevice
is just adding subsections to it's VMStateDescription. The
call vmstate_save_state(f, &vmstate_virtio_ccw_dev, dev, NULL)
in save_config should take care of compatibility. Maybe some
staring at virtio-pci is going to help, but right now I can't tell
what's the extra_state for, and how/why is it 'extra'.

Thanks for your patience!

Regards,
Halil

[Qemu-devel] [PATCH v2 0/5] Fix handling of IPv4/IPv6 dual stack

2017-05-19 Thread Daniel P. Berrange

This is a (much larger) followup to:

  v1: https://lists.nongnu.org/archive/html/qemu-devel/2017-04/msg05659.html

This series aims to fix a lot of bugs related to handling of IPv4 / IPv6
dual stack.

 - The VNC server mistakenly listened on two separate ports 5900+5901
   when the to= parameter was given
 - IPv6 sockets are accepting IPv4 clients even when IPv4 is set to
   be disabled
 - IPv6 sockets are failing to accept IPv4 clients when IPv4 is not set
   to be disabled
 - The VNC server was loosing the ipv4=/ipv6= settings due to a bug
   in the DNS resolver

The behaviour of all this is really subtle and hard to get working correctly
across all the different network backends. Thus, the most important part of
this patch series is the last patch which adds a test case covering the
backends for -vnc, -chardev tcp, -net socket, and -incoming socket, with
a 120 entry matrix.

IOW, if you think any of the first 4 patches are applying the wrong logic,
then take a look at the last patch and indicate which test matrix entries
are believed to be defining wrong behaviour :-)

Daniel P. Berrange (5):
  sockets: ensure we can bind to both ipv4 & ipv6 separately
  sockets: don't block IPv4 clients when listening on "::"
  sockets: ensure we don't accept IPv4 clients when IPv4 is disabled
  io: preserve ipv4/ipv6 flags when resolving InetSocketAddress
  tests: add functional test validating ipv4/ipv6 address flag handling

 io/dns-resolver.c  |   6 +-
 tests/.gitignore   |   1 +
 tests/Makefile.include |   4 +
 tests/test-sockets-proto.c | 855 +
 util/qemu-sockets.c|  71 +++-
 5 files changed, 916 insertions(+), 21 deletions(-)
 create mode 100644 tests/test-sockets-proto.c

-- 
2.9.3

[Qemu-devel] [PATCH v2 1/5] sockets: ensure we can bind to both ipv4 & ipv6 separately

2017-05-19 Thread Daniel P. Berrange

When binding to an IPv6 socket we currently force the
IPV6_V6ONLY flag to off. This means that the IPv6 socket
will accept both IPv4 & IPv6 sockets when QEMU is launched
with something like

  -vnc :::1

While this is good for that case, it is bad for other
cases. For example if an empty hostname is given,
getaddrinfo resolves it to 2 addresses 0.0.0.0 and ::,
in that order. We will thus bind to 0.0.0.0 first, and
then fail to bind to :: on the same port. The same
problem can happen if any other hostname lookup causes
the IPv4 address to be reported before the IPv6 address.

When we get an IPv6 bind failure, we should re-try the
same port, but with IPV6_V6ONLY turned on again, to
avoid clash with any IPv4 listener.

This ensures that

  -vnc :1

will bind successfully to both 0.0.0.0 and ::, and also
avoid

  -vnc :1,to=2

from mistakenly using a 2nd port for the :: listener.

Signed-off-by: Daniel P. Berrange 
---
 util/qemu-sockets.c | 31 +++
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
index d8183f7..397212b 100644
--- a/util/qemu-sockets.c
+++ b/util/qemu-sockets.c
@@ -208,22 +208,37 @@ static int inet_listen_saddr(InetSocketAddress *saddr,
 }
 
 socket_set_fast_reuse(slisten);
-#ifdef IPV6_V6ONLY
-if (e->ai_family == PF_INET6) {
-/* listen on both ipv4 and ipv6 */
-const int off = 0;
-qemu_setsockopt(slisten, IPPROTO_IPV6, IPV6_V6ONLY, &off,
-sizeof(off));
-}
-#endif
 
 port_min = inet_getport(e);
 port_max = saddr->has_to ? saddr->to + port_offset : port_min;
 for (p = port_min; p <= port_max; p++) {
+#ifdef IPV6_V6ONLY
+/* listen on both ipv4 and ipv6 */
+int v6only = 0;
+#endif
 inet_setport(e, p);
+#ifdef IPV6_V6ONLY
+rebind:
+if (e->ai_family == PF_INET6) {
+qemu_setsockopt(slisten, IPPROTO_IPV6, IPV6_V6ONLY, &v6only,
+sizeof(v6only));
+}
+#endif
 if (bind(slisten, e->ai_addr, e->ai_addrlen) == 0) {
 goto listen;
 }
+
+#ifdef IPV6_V6ONLY
+/* If we got EADDRINUSE from an IPv6 bind & V6ONLY is unset,
+ * it could be that the IPv4 port is already claimed, so retry
+ * with V6ONLY set
+ */
+if (e->ai_family == PF_INET6 && errno == EADDRINUSE && !v6only) {
+v6only = 1;
+goto rebind;
+}
+#endif
+
 if (p == port_max) {
 if (!e->ai_next) {
 error_setg_errno(errp, errno, "Failed to bind socket");
-- 
2.9.3

[Qemu-devel] [PATCH v2 2/5] sockets: don't block IPv4 clients when listening on "::"

2017-05-19 Thread Daniel P. Berrange

When inet_parse() parses the hostname, it is forcing the
has_ipv6 && ipv6 flags if the address contains a ":". This
means that if the user had set the ipv4=on flag, to try to
restrict the listener to just ipv4, an error would not have
been raised.  eg

   -incoming tcp:[::]:9000,ipv4

should have raised an error because listening for IPv4
on "::" is a non-sensical combination. With this removed,
we now call getaddrinfo() on "::" passing PF_INET and
so getaddrinfo reports an error about the hostname being
incompatible with the requested protocol.

Likewise it is explicitly setting the has_ipv4 & ipv4
flags when the address contains only digits + '.'. This
has no ill-effect, but also has no benefit, so is removed.

Signed-off-by: Daniel P. Berrange 
---
 util/qemu-sockets.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
index 397212b..b82412e 100644
--- a/util/qemu-sockets.c
+++ b/util/qemu-sockets.c
@@ -618,16 +618,12 @@ int inet_parse(InetSocketAddress *addr, const char *str, 
Error **errp)
 error_setg(errp, "error parsing IPv6 address '%s'", str);
 return -1;
 }
-addr->ipv6 = addr->has_ipv6 = true;
 } else {
 /* hostname or IPv4 addr */
 if (sscanf(str, "%64[^:]:%32[^,]%n", host, port, &pos) != 2) {
 error_setg(errp, "error parsing address '%s'", str);
 return -1;
 }
-if (host[strspn(host, "0123456789.")] == '\0') {
-addr->ipv4 = addr->has_ipv4 = true;
-}
 }
 
 addr->host = g_strdup(host);
-- 
2.9.3

[Qemu-devel] [PATCH v2 4/5] io: preserve ipv4/ipv6 flags when resolving InetSocketAddress

2017-05-19 Thread Daniel P. Berrange

The original InetSocketAddress struct may have has_ipv4 and
has_ipv6 fields set, which will control both the ai_family
used during DNS resolution, and later use of the V6ONLY
flag.

Currently the standalone DNS resolver code drops the
has_ipv4 & has_ipv6 flags after resolving, which means
the later bind() code won't correctly set V6ONLY.

This fixes the following scenarios

  -vnc :0,ipv4=off
  -vnc :0,ipv6=on
  -vnc :::0,ipv4=off
  -vnc :::0,ipv6=on

which all mistakenly accepted IPv4 clients

Signed-off-by: Daniel P. Berrange 
---
 io/dns-resolver.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/io/dns-resolver.c b/io/dns-resolver.c
index 57a8896..c072d12 100644
--- a/io/dns-resolver.c
+++ b/io/dns-resolver.c
@@ -116,8 +116,10 @@ static int 
qio_dns_resolver_lookup_sync_inet(QIODNSResolver *resolver,
 .numeric = true,
 .has_to = iaddr->has_to,
 .to = iaddr->to,
-.has_ipv4 = false,
-.has_ipv6 = false,
+.has_ipv4 = iaddr->has_ipv4,
+.ipv4 = iaddr->ipv4,
+.has_ipv6 = iaddr->has_ipv6,
+.ipv6 = iaddr->ipv6,
 };
 
 (*addrs)[i] = newaddr;
-- 
2.9.3

[Qemu-devel] [PATCH v2 3/5] sockets: ensure we don't accept IPv4 clients when IPv4 is disabled

2017-05-19 Thread Daniel P. Berrange

Currently if you disable listening on IPv4 addresses, via the
CLI flag ipv4=off, we still mistakenly accept IPv4 clients via
the IPv6 listener socket due to IPV6_V6ONLY flag being unset.

We must ensure IPV6_V6ONLY is always set if ipv4=off

This fixes the following scenarios

  -incoming tcp::9000,ipv6=on
  -incoming tcp:[::]:9000,ipv6=on
  -chardev socket,id=cdev0,host=,port=9000,server,nowait,ipv4=off
  -chardev socket,id=cdev0,host=,port=9000,server,nowait,ipv6=on
  -chardev socket,id=cdev0,host=::,port=9000,server,nowait,ipv4=off
  -chardev socket,id=cdev0,host=::,port=9000,server,nowait,ipv6=on

which all mistakenly accepted IPv4 clients

Signed-off-by: Daniel P. Berrange 
---
 util/qemu-sockets.c | 40 +++-
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
index b82412e..c0f2d92 100644
--- a/util/qemu-sockets.c
+++ b/util/qemu-sockets.c
@@ -104,17 +104,16 @@ NetworkAddressFamily inet_netfamily(int family)
  *   f t   PF_INET6
  *   t -   PF_INET
  *   t f   PF_INET
- *   t t   PF_INET6
+ *   t t   PF_INET6/PF_UNSPEC
  *
  * NB, this matrix is only about getting the necessary results
  * from getaddrinfo(). Some of the cases require further work
  * after reading results from getaddrinfo in order to fully
- * apply the logic the end user wants. eg with the last case
- * ipv4=t + ipv6=t + PF_INET6, getaddrinfo alone can only
- * guarantee the ipv6=t part of the request - we need more
- * checks to provide ipv4=t part of the guarantee. This is
- * outside scope of this method and not currently handled by
- * callers at all.
+ * apply the logic the end user wants.
+ *
+ * In the first and last cases, we must set IPV6_V6ONLY=0
+ * when binding, to allow a single listener to potentially
+ * accept both IPv4+6 addresses.
  */
 int inet_ai_family_from_address(InetSocketAddress *addr,
 Error **errp)
@@ -124,6 +123,23 @@ int inet_ai_family_from_address(InetSocketAddress *addr,
 error_setg(errp, "Cannot disable IPv4 and IPv6 at same time");
 return PF_UNSPEC;
 }
+if ((addr->has_ipv6 && addr->ipv6) && (addr->has_ipv4 && addr->ipv4)) {
+/*
+ * Some backends can only do a single listener. In that case
+ * we want empty hostname to resolve to "::" and then use the
+ * flag IPV6_V6ONLY==0 to get both protocols on 1 socket. This
+ * doesn't work for addresses other than "", so they're just
+ * inevitably broken until multiple listeners can be used,
+ * and thus we honour getaddrinfo automatic protocol detection
+ * Once all backends do multi-listener, remove the PF_INET6
+ * branch entirely.
+ */
+if (!addr->host || g_str_equal(addr->host, "")) {
+return PF_INET6;
+} else {
+return PF_UNSPEC;
+}
+}
 if ((addr->has_ipv6 && addr->ipv6) || (addr->has_ipv4 && !addr->ipv4)) {
 return PF_INET6;
 }
@@ -213,8 +229,14 @@ static int inet_listen_saddr(InetSocketAddress *saddr,
 port_max = saddr->has_to ? saddr->to + port_offset : port_min;
 for (p = port_min; p <= port_max; p++) {
 #ifdef IPV6_V6ONLY
-/* listen on both ipv4 and ipv6 */
-int v6only = 0;
+/*
+ * Deals with first & last cases in matrix in comment
+ * for inet_ai_family_from_address().
+ */
+int v6only =
+((!saddr->has_ipv4 && !saddr->has_ipv6) ||
+ (saddr->has_ipv4 && saddr->ipv4 &&
+  saddr->has_ipv6 && saddr->ipv6)) ? 0 : 1;
 #endif
 inet_setport(e, p);
 #ifdef IPV6_V6ONLY
-- 
2.9.3

Re: [Qemu-devel] [PATCH 03/10] s390x/css: add vmstate entities for css

2017-05-19 Thread Halil Pasic



On 05/19/2017 07:47 PM, Dr. David Alan Gilbert wrote:
> * Halil Pasic (pa...@linux.vnet.ibm.com) wrote:
>>
>>
>> On 05/19/2017 04:55 PM, Dr. David Alan Gilbert wrote:
 We could also consider making WITH_TMP act as a normal field. 
 Working on the whole state looks like a bit like a corner case:
 we have some stuff adjacent in the migration stream, and we have
 to map it on multiple fields (and vice-versa). Getting the whole
 state with a pointer to a certain field could work via container_of.
>>> You do need to know which field you're working on to be able to safely
>>> use container_of, so I'm not sure how it would work for you in this
>>> case.
>>
>>
>> Well, if you have to write to just one field you are good because you
>> already have a pointer to that field (.offset was added).
>>
>> If you need to write to multiple fields in post_load then you just pick
>> one of the fields you are going to write to (probably the first) and use
>> container_of to gain access to the whole state. The logic is specific to
>> the bunch of the fields you are going to touch anyway.
>>
>> In fact any member of the state struct will do it's only important that
>> you use the same when creating the VMStateField and when trying to get a
>> pointer to the parent in pre_save and post_load.
>>
>> I haven't tried, so I'm not 100% sure, but if you like I can try, and send
>> you a patch if it's viable. 
>>
>> I think the key to a good solution is really what is intended and typical
>> usage, and what is corner case. My patch in the other reply shows that we
>> can do without changing the ways of VMSTATE_WITH_TMP. I think we can make
>> what I'm trying to do here a bit prettier at the expense of making what
>> you are doing in virtio-net a bit uglier, but whether it's a good idea to
>> do so, I cant tell.
> 
> Lets go with what you put in the other patch (I replied to it); I hadn't
> realised that was possible (hence my comment below).
> Once we have a bunch of different uses of VMSTATE_WITH_TMP in the code
> base, I'll step back and see how to tidy them up.
> 
> Dave
> 

Sounds very reasonable. Let's do it like that!

Halil

>>>
>>> The other thought I'd had was that perhaps we could change the temporary
>>> structure in VMSTATE_WITH_TMP to:
>>>
>>>   struct foo {
>>>  struct whatever **parent;
>>>
>>> so now you could write to *parent in cases like these.
>>>
>>
>> Sorry, I do not get your idea. If you have some WIP patch in this
>> direction I would be happy to provide some feedback.
>>
>>  
 Btw, I would rather call it get_indicator a factory method or even a
 constructor than an allocator, but I think we understand each-other
 anyway.
>>> Yes; I'm not too worried about the actual name as long as it's short
>>> and obvious.
>>>
>>> I'd thought of 'allocator' since in most cases it's used where the
>>> load-time code allocates memory for the object being loaded.
>>> A constructor is normally something I think of as happening after
>>> allocation; and a factory, hmm maybe.  However, if it does the right
>>> thing I wouldn't object to any of those names.
>>>
>>
>> I think we are on the same page.
>>
>> Cheers,
>> Halil
>>
>>> Dave
>>
> --
> Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK
>

[Qemu-devel] [PATCH v2 5/5] tests: add functional test validating ipv4/ipv6 address flag handling

2017-05-19 Thread Daniel P. Berrange

The semantics around handling ipv4=on|off & ipv6=on|off are quite
subtle to understand in combination with the various hostname addresses
and backend types. Introduce a massive test matrix that launches QEMU
and validates the ability to connect a client on each protocol as
appropriate.

The test requires that the host has ability to bind to both :: and
0.0.0.0, on port 9000. If either protocol is not available, or if
something is already listening on that port the test will skip.

Although it isn't using the QTest APIs, it expects the
QTEST_QEMU_BINARY env variable to be set.

Signed-off-by: Daniel P. Berrange 
---
 tests/.gitignore   |   1 +
 tests/Makefile.include |   4 +
 tests/test-sockets-proto.c | 855 +
 3 files changed, 860 insertions(+)
 create mode 100644 tests/test-sockets-proto.c

diff --git a/tests/.gitignore b/tests/.gitignore
index 40c2e3e..9fa14e5 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -76,6 +76,7 @@ test-qobject-output-visitor
 test-rcu-list
 test-replication
 test-shift128
+test-sockets-proto
 test-string-input-visitor
 test-string-output-visitor
 test-thread-pool
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 16ff8f3..c3b487e 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -261,6 +261,7 @@ check-qtest-i386-y += tests/test-filter-redirector$(EXESUF)
 check-qtest-i386-y += tests/postcopy-test$(EXESUF)
 check-qtest-i386-y += tests/test-x86-cpuid-compat$(EXESUF)
 check-qtest-i386-y += tests/numa-test$(EXESUF)
+check-qtest-i386-y += tests/test-sockets-proto$(EXESUF)
 check-qtest-x86_64-y += $(check-qtest-i386-y)
 gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c
 gcov-files-x86_64-y = $(subst 
i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y))
@@ -287,6 +288,7 @@ check-qtest-ppc64-y += tests/prom-env-test$(EXESUF)
 check-qtest-ppc64-y += tests/pnv-xscom-test$(EXESUF)
 check-qtest-ppc64-y += tests/drive_del-test$(EXESUF)
 check-qtest-ppc64-y += tests/postcopy-test$(EXESUF)
+check-qtest-ppc64-y += tests/test-sockets-proto$(EXESUF)
 check-qtest-ppc64-y += tests/boot-serial-test$(EXESUF)
 check-qtest-ppc64-y += tests/rtas-test$(EXESUF)
 check-qtest-ppc64-y += tests/pxe-test$(EXESUF)
@@ -741,6 +743,8 @@ tests/usb-hcd-ehci-test$(EXESUF): tests/usb-hcd-ehci-test.o 
$(libqos-usb-obj-y)
 tests/usb-hcd-xhci-test$(EXESUF): tests/usb-hcd-xhci-test.o $(libqos-usb-obj-y)
 tests/pc-cpu-test$(EXESUF): tests/pc-cpu-test.o
 tests/postcopy-test$(EXESUF): tests/postcopy-test.o
+tests/test-sockets-proto$(EXESUF): tests/test-sockets-proto.o \
+   $(test-io-obj-y)
 tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o $(test-util-obj-y) \
$(qtest-obj-y) $(test-io-obj-y) $(libqos-virtio-obj-y) 
$(libqos-pc-obj-y) \
$(chardev-obj-y)
diff --git a/tests/test-sockets-proto.c b/tests/test-sockets-proto.c
new file mode 100644
index 000..f058a50
--- /dev/null
+++ b/tests/test-sockets-proto.c
@@ -0,0 +1,855 @@
+/*
+ * QTest for IPv4/IPv6 protocol setup
+ *
+ * Copyright (c) 2017 Red Hat, Inc. and/or its affiliates
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+
+#include "io/channel-socket.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
+
+typedef struct {
+const char *name;
+const char *args;
+bool ipv4;
+bool ipv6;
+bool error;
+} QSocketsData;
+
+/*
+ * This is the giant matrix of combinations we need to consider.
+ * There are 3 axes we deal with
+ *
+ * Axis 1: Protocol flags:
+ *
+ *  ipv4=unset, ipv6=unset  -> v4 & v6 clients ([1]
+ *  ipv4=unset, ipv6=off-> v4 clients only
+ *  ipv4=unset, ipv6=on -> v6 clients only
+ *  ipv4=off, ipv6=unset-> v6 clients only
+ *  ipv4=off, ipv6=off  -> error - can't disable both [2]
+ *  ipv4=off, ipv6=on   -> v6 clients only
+ *  ipv4=on, ipv6=unset -> v4 clients only
+ *  ipv4=on, ipv6=off   -> v4 clients only
+ *  ipv4=on, ipv6=on-> v4 & v6 clients [3]
+ *
+ * Depending on the listening address, some of those combinations
+ * may result in errors. eg ipv4=off,ipv6=on combined with 0.0.0.0
+ * is nonsensical.
+ *
+ * [1] Some backends only support a single socket listener, so
+ * will actually only allow v4 clients
+ * [2] QEMU should fail to startup in this case
+ * [3] If hostname is "" or "::", then we get a single listener
+ * on IPv6 and thus can also accept v4 clients. For all other
+ * hostnames, have same problem as [1].
+ *
+ * Axis 2: Listening address:
+ *
+ *  ""- resolves to 0.0.0.0 and ::, in that order
+ *  "0.0.0.0" - v4 clients only
+ *  "::"  - Mostly v6 clients only. Some scenarios should
+ *  permit v4 clients too.
+ *
+ * Axis 3: Backend type:
+ *
+ *  Migration - restricted to a single listener. Also relies
+ *  on buggy inet_parse() which can't accept
+ *  =off/=on parame

Re: [Qemu-devel] [PATCH 06/10] virtio-ccw: use vmstate way for config migration

2017-05-19 Thread Dr. David Alan Gilbert

* Halil Pasic (pa...@linux.vnet.ibm.com) wrote:
> 
> 
> On 05/19/2017 07:28 PM, Dr. David Alan Gilbert wrote:
> >> To sum it up although I'm currently leaning towards abandoning my idea
> >> of two sections for two devices, I'm not comfortable with making the
> >> call myself. I'm hoping for some maintainer guidance (s390x, virtio
> >> and migration). 
> >  > dhildenb etc>
> > 
> > OK, so I think:
> >   a) First split the series into two separate series; one that
> > VMStatifies the existing stuff without breaking compatibility;
> > and one that adds the new stuff.  Lets get the first of those
> > going in while we think about the second.
> > 
> > a.1) I'd do this with patches that convert one chunk into
> >  vmstate and remove the corresponding C code at the same time.
> > 
> >   b) While the way PCI devices are done is weird, I think it'll
> > be a lot simpler if you can stick to a structure that's similar
> > to them while diverging.  It's hard enough for those of us
> > who don't do Virtio every day to get our minds around virtio-pci
> > without it being different for virtio-ccw/css.
> > 
> >   c) To do (b) I suggest:
> >   c.1) you *don't* add a vmsd to your virtio_ccw_device
> > 
> >   c.2) but *do* add a VMSTATE_CCW_DEVICE to the start of any
> >  non-virtio devices you migrate (like each of the PCI devices
> >   have)
> > 
> >   c.3) You can add extra state for CSS to the ->save_extra_state
> >handle on virtio devices or to the config
> > 
> >   d) vmstatifying the config is OK as well.
> > 
> > I should say I'm no virtio expert, so if any of that's truly
> > mad say so.
> > 
> > Dave
> > 
> 
> Agreed (a,b,c,d). Reorganizing my patch set according to a) is
> going to require some effort, but it should not be too bad. 
> About c.2): I don't think we have any migratable non virtio devices
> yet, but I'll keep it in mind.
> 
> I do not understand what you mean by c.3) and extra_sate. Maybe
> it will settle with time. My idea of extending VirtioCcwDevice
> is just adding subsections to it's VMStateDescription. The
> call vmstate_save_state(f, &vmstate_virtio_ccw_dev, dev, NULL)
> in save_config should take care of compatibility. Maybe some
> staring at virtio-pci is going to help, but right now I can't tell
> what's the extra_state for, and how/why is it 'extra'.

Yes adding extra subsections into the 'config' is probably fine;
but there's also another hook that Jason added, see a6df8adf3,
it's an existing subsection at the end of the virtio state
that can be linked for transport specific data.

Dave

> Thanks for your patience!
> 
> Regards,
> Halil
> 
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [Qemu-devel] [PATCH RFC 1/6] io: only allow return path for socket typed

2017-05-19 Thread Dr. David Alan Gilbert

* Daniel P. Berrange (berra...@redhat.com) wrote:
> On Fri, May 19, 2017 at 03:33:12PM +0100, Dr. David Alan Gilbert wrote:
> > * Daniel P. Berrange (berra...@redhat.com) wrote:
> > > > shutdown() is safe, in that it stops any other threads accessing the fd
> > > > but doesn't allow it's reallocation until the close;  We perform the
> > > > close only when we've joined all other threads that were using the fd.
> > > > Any of the threads that do new calls on the fd get an error and quickly
> > > > fall down their error paths.
> > > 
> > > Ahh that's certainly an interesting scenario. That would certainly be
> > > a problem with the migration code when this was originally written.
> > > It had two QEMUFile structs each with an 'int fd' field, so when you
> > > close 'fd' on one QEMUFile struct, it wouldn't update the other QEMUFile
> > > used by another thread.
> > > 
> > > Since we switched over to use QIOChannel though, I think the thread
> > > scenario you describe should be avoided entirely. When you have multiple
> > > QEMUFile objects, they each have a reference counted pointer to the same
> > > underlying QIOChannel object instance. So when QEMUFile triggers a call
> > > to qio_channel_close() in one thread, that'll set fd=-1 in the QIOChannel.
> > > Since the other threads have a reference to the same QIOChannel object,
> > > they'll now see this fd == -1 straightaway.
> > > 
> > > So, IIUC, this should make the need for shutdown() redundant (at least
> > > for the thread race conditions you describe).
> > 
> > That's not thread safe unless you're doing some very careful locking.
> > Consider:
> >   T1  T2   
> >  oldfd=fd   tmp=fd
> >  fd=-1
> >  close(oldfd)
> >  unrelated open()
> > read(tmp,...
> > 
> > In practice every use of fd will be a copy into a tmp and then the
> > syscall; the unrelated open() could happen in another thread.
> > (OK, the gap between the tmp and the read is tiny, although if we're
> > doing multiple operations chances are the compiler will optimise
> > it to the top of a loop).
> > 
> > There's no way to make that code safe.
> 
> Urgh, yes, I see what you mean.
> 
> Currently the QIOChannelCommand implementation, uses a pair of anonymous
> pipes for stdin/out to the child process. I wonder if we could switch
> that to use socketpair() instead, thus letting us shutdown() on it too.
> 
> Though I guess it would be sufficient for qio_channel_shutdown() to
> merely kill the child PID, while leaving the FDs open, as then you'd
> get EOF and/or EPIPE on the read/writes.

Yes, I guess it's a question of which one is more likely to actually
kill the exec child off; the socketpair is more likely to cause the
source side migration code to cancel cleanly, although a kill -9 
should sort out a wayward exec child.

Dave

> Regards,
> Daniel
> -- 
> |: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [Qemu-devel] [PATCH V5 4/9] migration: split ufd_version_check onto receive/request features part

2017-05-19 Thread Dr. David Alan Gilbert

* Alexey (a.pereva...@samsung.com) wrote:
> On Tue, May 16, 2017 at 11:32:51AM +0100, Dr. David Alan Gilbert wrote:
> > * Alexey Perevalov (a.pereva...@samsung.com) wrote:
> > > This modification is necessary for userfault fd features which are
> > > required to be requested from userspace.
> > > UFFD_FEATURE_THREAD_ID is a one of such "on demand" feature, which will
> > > be introduced in the next patch.
> > > 
> > > QEMU need to use separate userfault file descriptor, due to
> > > userfault context has internal state, and after first call of
> > > ioctl UFFD_API it changes its state to UFFD_STATE_RUNNING (in case of
> > > success), but
> > > kernel while handling ioctl UFFD_API expects UFFD_STATE_WAIT_API. So
> > > only one ioctl with UFFD_API is possible per ufd.
> > > 
> > > Signed-off-by: Alexey Perevalov 
> > > ---
> > >  migration/postcopy-ram.c | 82 
> > > ++--
> > >  1 file changed, 73 insertions(+), 9 deletions(-)
> > > 
> > > diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> > > index 0f75700..c96d5f5 100644
> > > --- a/migration/postcopy-ram.c
> > > +++ b/migration/postcopy-ram.c
> > > @@ -60,32 +60,96 @@ struct PostcopyDiscardState {
> > >  #include 
> > >  #include 
> > >  
> > > -static bool ufd_version_check(int ufd, MigrationIncomingState *mis)
> > > +
> > > +/*
> > > + * Check userfault fd features, to request only supported features in
> > > + * future.
> > > + * __NR_userfaultfd - should be checked before
> > > + * Return obtained features
> > 
> > That's not quite right;
> >  * Returns: True on success, sets *features to supported features
> > False on failure or if kernel doesn't support ufd
> > 
> yes, obtained features is out parameter,
> but I want to keep false uncommented and just add error_report into
> syscall check, because the possible reason of failure is:
> 1. No syscall userfaultfd, but function expects that syscall, it reflects in
> comment
> 2  Within syscall:  exhausted fd or out of memory (file in kernel
> is allocating)
> 3. Problem in ioctl due to internal state of UFFD, as example
> UFFDIO_API after UFFDIO_REGISTER

I don't think we're allowed to depend on error pointers, but either
way we should comment it to make sure it's clear, so if you have a
boolean return at least say it's true for success and explain features
etc.

> Also I would prefer follow migration/ram.c comment style.

Yes, that's fine - it's the content of the comment I was more
worried about (and the one below).

Dave

> > > + */
> > > +static bool receive_ufd_features(uint64_t *features)
> > >  {
> > > -struct uffdio_api api_struct;
> > > -uint64_t ioctl_mask;
> > > +struct uffdio_api api_struct = {0};
> > > +int ufd;
> > > +bool ret = true;
> > > +
> > > +/* if we are here __NR_userfaultfd should exists */
> > > +ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
> > > +if (ufd == -1) {
> > > +return false;
> > > +}
> > >  
> > > +/* ask features */
> > >  api_struct.api = UFFD_API;
> > >  api_struct.features = 0;
> > >  if (ioctl(ufd, UFFDIO_API, &api_struct)) {
> > > -error_report("%s: UFFDIO_API failed: %s", __func__
> > > +error_report("%s: UFFDIO_API failed: %s", __func__,
> > >   strerror(errno));
> > > +ret = false;
> > > +goto release_ufd;
> > > +}
> > > +
> > > +*features = api_struct.features;
> > > +
> > > +release_ufd:
> > > +close(ufd);
> > > +return ret;
> > > +}
> > 
> > Needs a comment; perhaps something like:
> >   * Called once on a newly opened ufd, can request specific features.
> >   * Returns: True on success
> > 
> > > +static bool request_ufd_features(int ufd, uint64_t features)
> > > +{
> > > +struct uffdio_api api_struct = {0};
> > > +uint64_t ioctl_mask;
> > > +
> > > +api_struct.api = UFFD_API;
> > > +api_struct.features = features;
> > > +if (ioctl(ufd, UFFDIO_API, &api_struct)) {
> > > +error_report("%s failed: UFFDIO_API failed: %s", __func__,
> > > +strerror(errno));
> > >  return false;
> > >  }
> > >  
> > > -ioctl_mask = (__u64)1 << _UFFDIO_REGISTER |
> > > - (__u64)1 << _UFFDIO_UNREGISTER;
> > > +ioctl_mask = 1 << _UFFDIO_REGISTER |
> > > + 1 << _UFFDIO_UNREGISTER;
> > >  if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) {
> > >  error_report("Missing userfault features: %" PRIx64,
> > >   (uint64_t)(~api_struct.ioctls & ioctl_mask));
> > >  return false;
> > >  }
> > >  
> > > +return true;
> > > +}
> > > +
> > > +static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis)
> > > +{
> > > +uint64_t asked_features = 0;
> > > +uint64_t supported_features;
> > > +
> > > +/*
> > > + * it's not possible to
> > > + * request UFFD_API twice per one fd
> > > + */
> > > +if (!receive_ufd_features(&su

< 1 2 3 >

101 - 200 of 252 matches

Mail list logo