[Qemu-devel] [PATCH 3/6] intel_iommu: provide AddressSpaceOps.iommu_get instance

2017-10-30 Thread Liu, Yi L
From: Peter Xu 

Provide AddressSpaceOps.iommu_get() in Intel IOMMU emulator.

Signed-off-by: Peter Xu 
Signed-off-by: Liu, Yi L 
---
 hw/i386/intel_iommu.c | 7 +++
 include/hw/i386/intel_iommu.h | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index e81c706..54343e5 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2687,6 +2687,12 @@ static const MemoryRegionOps vtd_mem_ir_ops = {
 },
 };
 
+static IOMMUObject *vtd_as_iommu_get(AddressSpace *as)
+{
+VTDAddressSpace *vtd_dev_as = container_of(as, VTDAddressSpace, as);
+return _dev_as->iommu_object;
+}
+
 VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
 {
 uintptr_t key = (uintptr_t)bus;
@@ -2748,6 +2754,7 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, 
PCIBus *bus, int devfn)
 VTD_INTERRUPT_ADDR_FIRST,
 _dev_as->iommu_ir, 64);
 address_space_init(_dev_as->as, _dev_as->root, name);
+vtd_dev_as->as.as_ops.iommu_get = vtd_as_iommu_get;
 memory_region_add_subregion_overlap(_dev_as->root, 0,
 _dev_as->sys_alias, 1);
 memory_region_add_subregion_overlap(_dev_as->root, 0,
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index c85f9ff..a3c6d45 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -27,6 +27,7 @@
 #include "hw/i386/ioapic.h"
 #include "hw/pci/msi.h"
 #include "hw/sysbus.h"
+#include "hw/core/iommu.h"
 
 #define TYPE_INTEL_IOMMU_DEVICE "intel-iommu"
 #define INTEL_IOMMU_DEVICE(obj) \
@@ -90,6 +91,7 @@ struct VTDAddressSpace {
 MemoryRegion sys_alias;
 MemoryRegion iommu_ir;  /* Interrupt region: 0xfeeX */
 IntelIOMMUState *iommu_state;
+IOMMUObject iommu_object;
 VTDContextCacheEntry context_cache_entry;
 };
 
-- 
1.9.1




[Qemu-devel] [PATCH 5/6] vfio/pci: add notify framework based on IOMMUObject

2017-10-30 Thread Liu, Yi L
This patch introduce a notify framework for IOMMUObject.iommu_notifiers.
Introduce VFIOGuestIOMMUObject is to link VFIO Container and the new
IOMMUObject notififiers.

VFIOGuestIOMMUObject instance is allocated when device is assigned and
meanwhile vIOMMU is exposed to guest.

If there is IOMMUObject behind the device AddressSpace(a.ka vIOMMU exposed).
The VFIOGuestIOMMUObject instance would be allocated and inserted to the
VFIOContainer.giommu_object_list.

Signed-off-by: Liu, Yi L 
---
 hw/vfio/pci.c | 39 ++-
 include/hw/vfio/vfio-common.h |  8 
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index c977ee3..5b77c7e 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2642,6 +2642,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
 VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
 VFIODevice *vbasedev_iter;
 VFIOGroup *group;
+AddressSpace *as;
+IOMMUObject *iommu;
 char *tmp, group_path[PATH_MAX], *group_name;
 Error *err = NULL;
 ssize_t len;
@@ -2694,7 +2696,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
 
 trace_vfio_realize(vdev->vbasedev.name, groupid);
 
-group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev), 
errp);
+as = pci_device_iommu_address_space(pdev);
+group = vfio_get_group(groupid, as, errp);
 if (!group) {
 goto error;
 }
@@ -2877,6 +2880,17 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
 vfio_register_req_notifier(vdev);
 vfio_setup_resetfn_quirk(vdev);
 
+iommu = address_space_iommu_get(as);
+if (iommu != NULL) {
+VFIOGuestIOMMUObject *giommu;
+giommu = g_malloc0(sizeof(*giommu));
+giommu->iommu = iommu;
+giommu->container = group->container;
+QLIST_INSERT_HEAD(>container->giommu_object_list,
+  giommu,
+  giommu_next);
+}
+
 return;
 
 out_teardown:
@@ -2907,6 +2921,28 @@ static void vfio_instance_finalize(Object *obj)
 vfio_put_group(group);
 }
 
+static void vfio_release_iommu_object(PCIDevice *pdev)
+{
+VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+AddressSpace *as;
+IOMMUObject *iommu;
+
+as = pci_device_iommu_address_space(pdev);
+iommu = address_space_iommu_get(as);
+if (iommu != NULL) {
+VFIOGuestIOMMUObject *giommu, *tmp;
+VFIOGroup *group;
+group = vdev->vbasedev.group;
+
+QLIST_FOREACH_SAFE(giommu,
+   >container->giommu_object_list,
+   giommu_next, tmp) {
+QLIST_REMOVE(giommu, giommu_next);
+g_free(giommu);
+}
+}
+return;
+}
 static void vfio_exitfn(PCIDevice *pdev)
 {
 VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
@@ -2915,6 +2951,7 @@ static void vfio_exitfn(PCIDevice *pdev)
 vfio_unregister_err_notifier(vdev);
 pci_device_set_intx_routing_notifier(>pdev, NULL);
 vfio_disable_interrupts(vdev);
+vfio_release_iommu_object(pdev);
 if (vdev->intx.mmap_timer) {
 timer_free(vdev->intx.mmap_timer);
 }
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 702a085..e4963cc 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -88,6 +88,7 @@ typedef struct VFIOContainer {
  * future
  */
 QLIST_HEAD(, VFIOGuestIOMMUMR) giommu_mr_list;
+QLIST_HEAD(, VFIOGuestIOMMUObject) giommu_object_list;
 QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
 QLIST_HEAD(, VFIOGroup) group_list;
 QLIST_ENTRY(VFIOContainer) next;
@@ -101,6 +102,13 @@ typedef struct VFIOGuestIOMMUMR {
 QLIST_ENTRY(VFIOGuestIOMMUMR) giommu_next;
 } VFIOGuestIOMMUMR;
 
+typedef struct VFIOGuestIOMMUObject {
+VFIOContainer *container;
+IOMMUObject *iommu;
+IOMMUNotifier n;
+QLIST_ENTRY(VFIOGuestIOMMUObject) giommu_next;
+} VFIOGuestIOMMUObject;
+
 typedef struct VFIOHostDMAWindow {
 hwaddr min_iova;
 hwaddr max_iova;
-- 
1.9.1




[Qemu-devel] [PATCH 2/6] memory: introduce AddressSpaceOps and IOMMUObject

2017-10-30 Thread Liu, Yi L
From: Peter Xu 

AddressSpaceOps is similar to MemoryRegionOps, it's just for address
spaces to store arch-specific hooks.

The first hook I would like to introduce is iommu_get(). Return an
IOMMUObject behind the AddressSpace.

For systems that have IOMMUs, we will create a special address
space per device which is different from system default address
space for it (please refer to pci_device_iommu_address_space()).
Normally when that happens, there will be one specific IOMMU (or
say, translation unit) stands right behind that new address space.

This iommu_get() fetches that guy behind the address space. Here,
the guy is defined as IOMMUObject, which includes a notifier_list
so far, may extend in future. Along with IOMMUObject, a new iommu
notifier mechanism is introduced. It would be used for virt-svm.
Also IOMMUObject can further have a IOMMUObjectOps which is similar
to MemoryRegionOps. The difference is IOMMUObjectOps is not relied
on MemoryRegion.

Signed-off-by: Peter Xu 
Signed-off-by: Liu, Yi L 
---
 hw/core/Makefile.objs   |  1 +
 hw/core/iommu.c | 58 +++
 include/exec/memory.h   | 22 +++
 include/hw/core/iommu.h | 73 +
 memory.c|  8 ++
 5 files changed, 162 insertions(+)
 create mode 100644 hw/core/iommu.c
 create mode 100644 include/hw/core/iommu.h

diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs
index f8d7a4a..d688412 100644
--- a/hw/core/Makefile.objs
+++ b/hw/core/Makefile.objs
@@ -5,6 +5,7 @@ common-obj-y += fw-path-provider.o
 # irq.o needed for qdev GPIO handling:
 common-obj-y += irq.o
 common-obj-y += hotplug.o
+common-obj-y += iommu.o
 common-obj-y += nmi.o
 
 common-obj-$(CONFIG_EMPTY_SLOT) += empty_slot.o
diff --git a/hw/core/iommu.c b/hw/core/iommu.c
new file mode 100644
index 000..7c4fcfe
--- /dev/null
+++ b/hw/core/iommu.c
@@ -0,0 +1,58 @@
+/*
+ * QEMU emulation of IOMMU logic
+ *
+ * Copyright (C) 2017 Red Hat Inc.
+ *
+ * Authors: Peter Xu ,
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "hw/core/iommu.h"
+
+void iommu_notifier_register(IOMMUObject *iommu,
+ IOMMUNotifier *n,
+ IOMMUNotifyFn fn,
+ IOMMUEvent event)
+{
+n->event = event;
+n->iommu_notify = fn;
+QLIST_INSERT_HEAD(>iommu_notifiers, n, node);
+return;
+}
+
+void iommu_notifier_unregister(IOMMUObject *iommu,
+   IOMMUNotifier *notifier)
+{
+IOMMUNotifier *cur, *next;
+
+QLIST_FOREACH_SAFE(cur, >iommu_notifiers, node, next) {
+if (cur == notifier) {
+QLIST_REMOVE(cur, node);
+break;
+}
+}
+}
+
+void iommu_notify(IOMMUObject *iommu, IOMMUEventData *event_data)
+{
+IOMMUNotifier *cur;
+
+QLIST_FOREACH(cur, >iommu_notifiers, node) {
+if ((cur->event == event_data->event) && cur->iommu_notify) {
+cur->iommu_notify(cur, event_data);
+}
+}
+}
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 03595e3..8350973 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -26,6 +26,7 @@
 #include "qom/object.h"
 #include "qemu/rcu.h"
 #include "hw/qdev-core.h"
+#include "hw/core/iommu.h"
 
 #define RAM_ADDR_INVALID (~(ram_addr_t)0)
 
@@ -301,6 +302,19 @@ struct MemoryListener {
 };
 
 /**
+ * AddressSpaceOps: callbacks structure for address space specific operations
+ *
+ * @iommu_get: returns an IOMMU object that backs the address space.
+ * Normally this should be NULL for generic address
+ * spaces, and it's only used when there is one
+ * translation unit behind this address space.
+ */
+struct AddressSpaceOps {
+IOMMUObject *(*iommu_get)(AddressSpace *as);
+};
+typedef struct AddressSpaceOps AddressSpaceOps;
+
+/**
  * AddressSpace: describes a mapping of addresses to #MemoryRegion objects
  */
 struct AddressSpace {
@@ -316,6 +330,7 @@ struct AddressSpace {
 struct MemoryRegionIoeventfd *ioeventfds;
 QTAILQ_HEAD(memory_listeners_as, MemoryListener) listeners;
 QTAILQ_ENTRY(AddressSpace) address_spaces_link;
+AddressSpaceOps as_ops;
 };
 
 FlatView 

[Qemu-devel] [PATCH 6/6] vfio/pci: register vfio_iommu_bind_pasidtbl_notify notifier

2017-10-30 Thread Liu, Yi L
This is an example to show the usage of IOMMUObject based notifier.

For passthru devices, if there is a vIOMMU exposed to guest, guest
would issue iommu operation on the devices. And the iommu operations
needs to be propagated to host iommu driver.

In future, the IOMMUObject notifiers may include:
* notifier for guest pasid table binding
* notifier for guest iommu tlb invalidation
Both of the two notifiers would be include in future virt-SVM patchset.

In virt-SVM patchset, this notifier would be fulfilled.

Signed-off-by: Liu, Yi L 
---
 hw/vfio/pci.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 5b77c7e..3ed521e 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2637,6 +2637,14 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice 
*vdev)
 vdev->req_enabled = false;
 }
 
+static void vfio_iommu_bind_pasidtbl_notify(IOMMUNotifier *n,
+IOMMUEventData *event_data)
+{
+/*  Sample code, would be detailed in coming virt-SVM patchset.
+VFIOGuestIOMMUObject *giommu = container_of(n, VFIOGuestIOMMUObject, n);
+VFIOContainer *container = giommu->container;
+*/
+}
 static void vfio_realize(PCIDevice *pdev, Error **errp)
 {
 VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
@@ -2889,6 +2897,12 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
 QLIST_INSERT_HEAD(>container->giommu_object_list,
   giommu,
   giommu_next);
+/* Register vfio_iommu_bind_pasidtbl_notify with event flag
+   IOMMU_EVENT_BIND_PASIDT */
+iommu_notifier_register(iommu,
+>n,
+vfio_iommu_bind_pasidtbl_notify,
+IOMMU_EVENT_BIND_PASIDT);
 }
 
 return;
-- 
1.9.1




[Qemu-devel] [PATCH 0/6] Introduce new iommu notifier framework

2017-10-30 Thread Liu, Yi L
This patchset is a follow-up of Peter Xu's patchset as the link below.
In brief, Peter's patchset is to introduce a common IOMMU object which
is not depending on platform (x86/ppc/...), or bus (PCI/...). And based
on it, a iommu object based notifier framework is introduced and also
AddressSpaceOps is added to provide methods like getting IOMMUObject
behind an AddressSpace. It could be used to detect the exposure of
vIOMMU.

https://lists.gnu.org/archive/html/qemu-devel/2017-04/msg05360.html

Here let me try to address why we need such change.

I'm working on virt-SVM enabling for passthru devices on Intel platform.
This work is to extend the existing intel iommu emulator in Qemu. Among
the extensions, there are two requirements which ae related to the topic
we are talking here.

* intel iommu emulator needs to propagate a guest pasid table pointer
  to host through VFIO. So that host intel iommu driver could set it to
  its ctx table. With guest pasid table pointer set, host would be able
  to get guest CR3 table after guest calls intel_svm_bind_mm(). Then HW
  iommu could do nested translation to get GVA->GPA GPA->HPA. Thus enables
  Shared Virtual Memory in guest.

* intel iommu emulator needs to propagate guest's iotlb(1st level cache)
  flush to host through VFIO.

Since the two requirements need to talk with VFIO, so notifiers are
needed. Meanwhile, the notifiers should be registered as long as there
is vIOMMU exposed to guest.

Qemu has an existing notifier framework based on MemoryRegion. And we
are using it for MAP/UNMAP. However, we cannot use it here. Reason is
as below:

* IOMMU MemoryRegion notifiers depends on IOMMU MemoryRegion. If guest
  iommu driver configs to bypass the IOVA adress translation. The address
  space would be system ram address space. The MemoryRegion would be the
  RAM MemoryRegion. Details can be got in Peter's patch to allow dynamic
  switch of IOMMU region.
  https://lists.gnu.org/archive/html/qemu-devel/2016-12/msg02690.html

* virt-SVM requires guest to config to bypass the IOVA address translation
  With such config, we can make sure host would have a GPA->HPA mapping,
  and meanwhile intel iommu emulator could propagate the guest CR3 table
  (GVA->GPA) to host. With nested translation, we are able to achieve
  GVA->GPA and then GPA->HPA translation. However, if so, the IOMMU
  MemoryRegion notifiers would not be registered. It means for virt-SVM,
  we need another notifier framework.

Based on Peter's patch, I did some clean up and fulfill the notifier
framework based on IOMMUObject and also provide an example of the newly
introduced notifier framework. The notifier framework introduced here
is going to be used in my virt-SVM patchset.

For virt-SVM design details, you may refer to svm RFC patch.
https://lists.gnu.org/archive/html/qemu-devel/2017-04/msg04925.html

Liu, Yi L (3):
  vfio: rename GuestIOMMU to be GuestIOMMUMR
  vfio/pci: add notify framework based on IOMMUObject
  vfio/pci: register vfio_iommu_bind_pasidtbl_notify notifier

Peter Xu (3):
  memory: rename existing iommu notifier to be iommu mr notifier
  memory: introduce AddressSpaceOps and IOMMUObject
  intel_iommu: provide AddressSpaceOps.iommu_get instance

 hw/core/Makefile.objs |  1 +
 hw/core/iommu.c   | 58 
 hw/i386/amd_iommu.c   |  6 ++--
 hw/i386/intel_iommu.c | 41 +--
 hw/ppc/spapr_iommu.c  |  8 ++---
 hw/s390x/s390-pci-bus.c   |  2 +-
 hw/vfio/common.c  | 25 +++---
 hw/vfio/pci.c | 53 -
 hw/virtio/vhost.c | 10 +++---
 include/exec/memory.h | 77 ---
 include/hw/core/iommu.h   | 73 
 include/hw/i386/intel_iommu.h | 10 +++---
 include/hw/vfio/vfio-common.h | 16 ++---
 include/hw/virtio/vhost.h |  4 +--
 memory.c  | 45 +++--
 15 files changed, 331 insertions(+), 98 deletions(-)
 create mode 100644 hw/core/iommu.c
 create mode 100644 include/hw/core/iommu.h

-- 
1.9.1




[Qemu-devel] [PATCH 1/6] memory: rename existing iommu notifier to be iommu mr notifier

2017-10-30 Thread Liu, Yi L
From: Peter Xu 

IOMMU notifiers before are mostly used for [dev-]IOTLB stuffs. It is not
suitable for other kind of notifiers (one example would be the future
virt-svm support). Considering that current notifiers are targeted for
per memory region, renaming the iommu notifier definitions.

* all the notifier types from IOMMU_NOTIFIER_* prefix into IOMMU_MR_EVENT_*
  to better show its usage (for memory regions).
* rename IOMMUNotifier to IOMMUMRNotifier
* rename iommu_notifier to iommu_mr_notifier

Signed-off-by: Peter Xu 
Signed-off-by: Liu, Yi L 
---
 hw/i386/amd_iommu.c   |  6 ++---
 hw/i386/intel_iommu.c | 34 +-
 hw/ppc/spapr_iommu.c  |  8 +++
 hw/s390x/s390-pci-bus.c   |  2 +-
 hw/vfio/common.c  | 10 
 hw/virtio/vhost.c | 10 
 include/exec/memory.h | 55 ++-
 include/hw/i386/intel_iommu.h |  8 +++
 include/hw/vfio/vfio-common.h |  2 +-
 include/hw/virtio/vhost.h |  4 ++--
 memory.c  | 37 +++--
 11 files changed, 89 insertions(+), 87 deletions(-)

diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index ad8155c..8f756e8 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -1072,12 +1072,12 @@ static const MemoryRegionOps mmio_mem_ops = {
 };
 
 static void amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
-IOMMUNotifierFlag old,
-IOMMUNotifierFlag new)
+IOMMUMREventFlag old,
+IOMMUMREventFlag new)
 {
 AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
 
-if (new & IOMMU_NOTIFIER_MAP) {
+if (new & IOMMU_MR_EVENT_MAP) {
 error_report("device %02x.%02x.%x requires iommu notifier which is not 
"
  "currently supported", as->bus_num, PCI_SLOT(as->devfn),
  PCI_FUNC(as->devfn));
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 3a5bb0b..e81c706 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -1234,7 +1234,7 @@ static void 
vtd_interrupt_remap_table_setup(IntelIOMMUState *s)
 
 static void vtd_iommu_replay_all(IntelIOMMUState *s)
 {
-IntelIOMMUNotifierNode *node;
+IntelIOMMUMRNotifierNode *node;
 
 QLIST_FOREACH(node, >notifiers_list, next) {
 memory_region_iommu_replay_all(>vtd_as->iommu);
@@ -1308,7 +1308,7 @@ static void vtd_context_device_invalidate(IntelIOMMUState 
*s,
 /*
  * So a device is moving out of (or moving into) a
  * domain, a replay() suites here to notify all the
- * IOMMU_NOTIFIER_MAP registers about this change.
+ * IOMMU_MR_EVENT_MAP registers about this change.
  * This won't bring bad even if we have no such
  * notifier registered - the IOMMU notification
  * framework will skip MAP notifications if that
@@ -1358,7 +1358,7 @@ static void vtd_iotlb_global_invalidate(IntelIOMMUState 
*s)
 
 static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
 {
-IntelIOMMUNotifierNode *node;
+IntelIOMMUMRNotifierNode *node;
 VTDContextEntry ce;
 VTDAddressSpace *vtd_as;
 
@@ -1388,7 +1388,7 @@ static void 
vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
uint16_t domain_id, hwaddr addr,
uint8_t am)
 {
-IntelIOMMUNotifierNode *node;
+IntelIOMMUMRNotifierNode *node;
 VTDContextEntry ce;
 int ret;
 
@@ -2318,21 +2318,21 @@ static IOMMUTLBEntry 
vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
 }
 
 static void vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
-  IOMMUNotifierFlag old,
-  IOMMUNotifierFlag new)
+  IOMMUMREventFlag old,
+  IOMMUMREventFlag new)
 {
 VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
 IntelIOMMUState *s = vtd_as->iommu_state;
-IntelIOMMUNotifierNode *node = NULL;
-IntelIOMMUNotifierNode *next_node = NULL;
+IntelIOMMUMRNotifierNode *node = NULL;
+IntelIOMMUMRNotifierNode *next_node = NULL;
 
-if (!s->caching_mode && new & IOMMU_NOTIFIER_MAP) {
+if (!s->caching_mode && new & IOMMU_MR_EVENT_MAP) {
 error_report("We need to set cache_mode=1 for intel-iommu to enable "
  "device assignment with IOMMU protection.");
 exit(1);
 }
 
-if (old == IOMMU_NOTIFIER_NONE) {
+if (old == IOMMU_MR_EVENT_NONE) {
 node = g_malloc0(sizeof(*node));
 

[Qemu-devel] [PATCH 4/6] vfio: rename GuestIOMMU to be GuestIOMMUMR

2017-10-30 Thread Liu, Yi L
Rename GuestIOMMU to GuestIOMMUMR as the existing GuestIOMMU is
for MemoryRegion related notifiers.

Signed-off-by: Liu, Yi L 
---
 hw/vfio/common.c  | 15 ---
 include/hw/vfio/vfio-common.h |  8 
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 1f7d516..3d40bec 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -348,7 +348,7 @@ static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void 
**vaddr,
 
 static void vfio_iommu_map_notify(IOMMUMRNotifier *n, IOMMUTLBEntry *iotlb)
 {
-VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
+VFIOGuestIOMMUMR *giommu = container_of(n, VFIOGuestIOMMUMR, n);
 VFIOContainer *container = giommu->container;
 hwaddr iova = iotlb->iova + giommu->iommu_offset;
 bool read_only;
@@ -478,7 +478,7 @@ static void vfio_listener_region_add(MemoryListener 
*listener,
 memory_region_ref(section->mr);
 
 if (memory_region_is_iommu(section->mr)) {
-VFIOGuestIOMMU *giommu;
+VFIOGuestIOMMUMR *giommu;
 IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr);
 
 trace_vfio_listener_region_add_iommu(iova, end);
@@ -500,7 +500,7 @@ static void vfio_listener_region_add(MemoryListener 
*listener,
IOMMU_MR_EVENT_ALL,
section->offset_within_region,
int128_get64(llend));
-QLIST_INSERT_HEAD(>giommu_list, giommu, giommu_next);
+QLIST_INSERT_HEAD(>giommu_mr_list, giommu, giommu_next);
 
 memory_region_register_iommu_notifier(section->mr, >n);
 memory_region_iommu_replay(giommu->iommu, >n);
@@ -567,9 +567,9 @@ static void vfio_listener_region_del(MemoryListener 
*listener,
 }
 
 if (memory_region_is_iommu(section->mr)) {
-VFIOGuestIOMMU *giommu;
+VFIOGuestIOMMUMR *giommu;
 
-QLIST_FOREACH(giommu, >giommu_list, giommu_next) {
+QLIST_FOREACH(giommu, >giommu_mr_list, giommu_next) {
 if (MEMORY_REGION(giommu->iommu) == section->mr &&
 giommu->n.start == section->offset_within_region) {
 memory_region_unregister_iommu_notifier(section->mr,
@@ -1163,12 +1163,13 @@ static void vfio_disconnect_container(VFIOGroup *group)
 
 if (QLIST_EMPTY(>group_list)) {
 VFIOAddressSpace *space = container->space;
-VFIOGuestIOMMU *giommu, *tmp;
+VFIOGuestIOMMUMR *giommu, *tmp;
 
 vfio_listener_release(container);
 QLIST_REMOVE(container, next);
 
-QLIST_FOREACH_SAFE(giommu, >giommu_list, giommu_next, tmp) {
+QLIST_FOREACH_SAFE(giommu, >giommu_mr_list,
+   giommu_next, tmp) {
 memory_region_unregister_iommu_notifier(
 MEMORY_REGION(giommu->iommu), >n);
 QLIST_REMOVE(giommu, giommu_next);
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 865e3e7..702a085 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -87,19 +87,19 @@ typedef struct VFIOContainer {
  * contiguous IOVA window.  We may need to generalize that in
  * future
  */
-QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
+QLIST_HEAD(, VFIOGuestIOMMUMR) giommu_mr_list;
 QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
 QLIST_HEAD(, VFIOGroup) group_list;
 QLIST_ENTRY(VFIOContainer) next;
 } VFIOContainer;
 
-typedef struct VFIOGuestIOMMU {
+typedef struct VFIOGuestIOMMUMR {
 VFIOContainer *container;
 IOMMUMemoryRegion *iommu;
 hwaddr iommu_offset;
 IOMMUMRNotifier n;
-QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
-} VFIOGuestIOMMU;
+QLIST_ENTRY(VFIOGuestIOMMUMR) giommu_next;
+} VFIOGuestIOMMUMR;
 
 typedef struct VFIOHostDMAWindow {
 hwaddr min_iova;
-- 
1.9.1




Re: [Qemu-devel] Qemu start VM in huge page without '-mem-prealloc' will get memory leak?

2017-10-30 Thread Sam
Why could I not find in ovs-dpdk maillist? could some one give the bug
number?

2017-10-27 15:08 GMT+08:00 Daniel P. Berrange :

> On Fri, Oct 27, 2017 at 11:28:33AM +0800, Sam wrote:
> > After restart ovs-dpdk(which is openvswitch with dpdk lib), memory is
> > released.
> >
> > But problem is in product environment, I could not restart ovs-dpdk..
>
> This is a clear bug in ovs-dpdk then - it is failing to release memory
> when QEMU exits.
>
>
> Regards,
> Daniel
> --
> |: https://berrange.com  -o-https://www.flickr.com/photos/
> dberrange :|
> |: https://libvirt.org -o-
> https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-https://www.instagram.com/
> dberrange :|
>


[Qemu-devel] [Bug] virtio-blk: qemu will crash if hotplug virtio-blk device failed

2017-10-30 Thread linzhecheng
I found that hotplug virtio-blk device will lead to qemu crash.

Re-production steps:

1.   Run VM named vm001

2.   Create a virtio-blk.xml which contains wrong configurations:

  
  
  


3.   Run command : virsh attach-device vm001 vm001

Libvirt will return err msg:

error: Failed to attach device from blk-scsi.xml

error: internal error: unable to execute QEMU command 'device_add': Please set 
scsi=off for virtio-blk devices in order to use virtio 1.0

it means hotplug virtio-blk device failed.

4.   Suspend or shutdown VM will leads to qemu crash



from gdb:


(gdb) bt
#0  object_get_class (obj=obj@entry=0x0) at qom/object.c:750
#1  0x7f9a72582e01 in virtio_vmstate_change (opaque=0x7f9a73d10960, 
running=0, state=) at 
/mnt/sdb/lzc/code/open/qemu/hw/virtio/virtio.c:2203
#2  0x7f9a7261ef52 in vm_state_notify (running=running@entry=0, 
state=state@entry=RUN_STATE_PAUSED) at vl.c:1685
#3  0x7f9a7252603a in do_vm_stop (state=RUN_STATE_PAUSED) at 
/mnt/sdb/lzc/code/open/qemu/cpus.c:941
#4  vm_stop (state=state@entry=RUN_STATE_PAUSED) at 
/mnt/sdb/lzc/code/open/qemu/cpus.c:1807
#5  0x7f9a7262eb1b in qmp_stop (errp=errp@entry=0x7ffe63e25590) at qmp.c:102
#6  0x7f9a7262c70a in qmp_marshal_stop (args=, 
ret=, errp=0x7ffe63e255d8) at qmp-marshal.c:5854
#7  0x7f9a72897e79 in do_qmp_dispatch (errp=0x7ffe63e255d0, 
request=0x7f9a76510120, cmds=0x7f9a72ee7980 ) at 
qapi/qmp-dispatch.c:104
#8  qmp_dispatch (cmds=0x7f9a72ee7980 , 
request=request@entry=0x7f9a76510120) at qapi/qmp-dispatch.c:131
#9  0x7f9a725288d5 in handle_qmp_command (parser=, 
tokens=) at /mnt/sdb/lzc/code/open/qemu/monitor.c:3852
#10 0x7f9a7289d514 in json_message_process_token (lexer=0x7f9a73ce4498, 
input=0x7f9a73cc6880, type=JSON_RCURLY, x=36, y=17) at 
qobject/json-streamer.c:105
#11 0x7f9a728bb69b in json_lexer_feed_char 
(lexer=lexer@entry=0x7f9a73ce4498, ch=125 '}', flush=flush@entry=false) at 
qobject/json-lexer.c:323
#12 0x7f9a728bb75e in json_lexer_feed (lexer=0x7f9a73ce4498, 
buffer=, size=) at qobject/json-lexer.c:373
#13 0x7f9a7289d5d9 in json_message_parser_feed (parser=, 
buffer=, size=) at qobject/json-streamer.c:124
#14 0x7f9a7252722e in monitor_qmp_read (opaque=, 
buf=, size=) at 
/mnt/sdb/lzc/code/open/qemu/monitor.c:3894
#15 0x7f9a7284ee1b in tcp_chr_read (chan=, cond=, opaque=) at chardev/char-socket.c:441
#16 0x7f9a6e03e99a in g_main_context_dispatch () from 
/usr/lib64/libglib-2.0.so.0
#17 0x7f9a728a342c in glib_pollfds_poll () at util/main-loop.c:214
#18 os_host_main_loop_wait (timeout=) at util/main-loop.c:261
#19 main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:515
#20 0x7f9a724e7547 in main_loop () at vl.c:1999
#21 main (argc=, argv=, envp=) at 
vl.c:4877

Problem happens in virtio_vmstate_change which is called by vm_state_notify,
static void virtio_vmstate_change(void *opaque, int running, RunState state)
{
VirtIODevice *vdev = opaque;
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
vdev->vm_running = running;

if (backend_run) {
virtio_set_status(vdev, vdev->status);
}

if (k->vmstate_change) {
k->vmstate_change(qbus->parent, backend_run);
}

if (!backend_run) {
virtio_set_status(vdev, vdev->status);
}
}

Vdev's parent_bus is NULL, so qdev_get_parent_bus(DEVICE(vdev)) will crash.
virtio_vmstate_change is added to the list vm_change_state_head at 
virtio_blk_device_realize(virtio_init),
but after hotplug virtio-blk failed, virtio_vmstate_change will not be removed 
from vm_change_state_head.


I apply a patch as follews:

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 5884ce3..ea532dc 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2491,6 +2491,7 @@ static void virtio_device_realize(DeviceState *dev, Error 
**errp)
 virtio_bus_device_plugged(vdev, );
 if (err != NULL) {
 error_propagate(errp, err);
+vdc->unrealize(dev, NULL);
 return;
 }


[Qemu-devel] [Bug 997631] Re: Windows 2008R2 very slow cold boot when 4 CPUs

2017-10-30 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/997631

Title:
  Windows 2008R2 very slow cold boot when 4 CPUs

Status in QEMU:
  Expired

Bug description:
  Hi,

  well, I'm in a similar boat as the one in #992067. But regardless any 
memory-settings.
  It takes "ages" in a cold-boot Windows 2008R2 with qemu-1.0.1, qemu-1.0.50 
and latest-n-greatest from today ( 1.0.50 /qemu-1b3e76e ). It eats up 400% 
host-cpu-load until login-prompt is shown on the console.

  Meanwhile I tried couple of settings with "-cpu features (hv_spinlocks), 
hv_relaxed and hv_vapic. ".
  Due to some Clock-glitches I start qemu-system-x86_64 with "-no-hpet".

  With 2 processors the system is up after 2 minutes, with 4 procs
  almost 10 minutes... After a reset ( warmstart) the 4 proc-system is
  up after a couple of 20 secs.

  Hints welcome, though once started, the system seems to operate
  "normally".

  Thnx in@vance,

  Oliver.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/997631/+subscriptions



Re: [Qemu-devel] [PATCH v2 1/4] build: allow setting a custom GIT binary for transparent proxying

2017-10-30 Thread Alexey Kardashevskiy
On 30/10/17 18:52, Daniel P. Berrange wrote:
> On Mon, Oct 30, 2017 at 10:49:01AM +1100, Alexey Kardashevskiy wrote:
>> On 30/10/17 03:29, Daniel P. Berrange wrote:
>>> On Mon, Oct 30, 2017 at 01:08:56AM +1100, Alexey Kardashevskiy wrote:

 I run ./scripts/git-submodule.sh on a server (where source directory is
 writeable), them "rm .git-submodule-status", then run "Makefile" on a build
 machine (which has the source directory mounted as read-only). I basically
 recreate the situation where I was when I just discovered this brand new
 ./scripts/git-submodule.sh.
>>>
>>> Don't rm the .git-submodule-status. That means 'make' thinks the modules
>>> are out of date.
>>>
>>> Just run 'scripts/git-submodules.sh ...mods..' on the writable source
>>> dir, and then run 'make' on the build machine.
>>>
 I know that now, all I am asking is an error message to print exact command
 to run...
>>>
>>> If you hadn't deleted the .git-submodule-status, it would have worked fine.
>>
>> No.
>>
>> I do this on a server:
>>
>> [vpl1 qemu]$ git co v2.10.0
>> At this point no .git-submodule-status is expected/required/exist.
>>
>> [vpl1 qemu]$ ssh aikhostos2 make -C /home/aik/pbuild/qemu-aikhostos2-ppc64/
>> -j24
>>
>> Configure succeeds, compiles just fine, it has been working like this for
>> years.
>>
>>
>> Now:
>>
>> [vpl1 qemu]$ git co git-submodule  (this is your stuff)
>> [vpl1 qemu]$ ssh aikhostos2 make -C /home/aik/pbuild/qemu-aikhostos2-ppc64/
>> -j24
>> touch: cannot touch ‘.git-submodule-status’: Read-only file system
>> make: Entering directory `/home/aik/pbuild/qemu-aikhostos2-ppc64'
>>   GEN ppc64-softmmu/config-devices.mak.tmp
>> [...]
>>
>> ./scripts/git-submodule.sh: line 74: .git-submodule-status: Read-only file
>> system
>> ./scripts/git-submodule.sh: failed to save git submodule status
>> make: *** [git-submodule-update] Error 1
> 
> Oh I see, the git submodules list is empty because you have a previous
> built source tree, so its shortcircuiting the extra check I added. Tihs
> is easy enough to address


btw why is the name "git-submodule.sh", not update-submodule.sh or
update-git-submodule.sh on a par with update-linux-headers.sh?




-- 
Alexey



Re: [Qemu-devel] [PATCH v2] arm: implement cache/shareability attribute bits for PAR registers

2017-10-30 Thread Andrew Baumann via Qemu-devel
> From: Peter Maydell [mailto:peter.mayd...@linaro.org]
> Sent: Tuesday, 31 October 2017 03:25
> 
> On 20 October 2017 at 22:49, Andrew Baumann
>  wrote:
> > On a successful address translation instruction, PAR is supposed to
> > contain cacheability and shareability attributes determined by the
> > translation. We previously returned 0 for these bits (in line with the
> > general strategy of ignoring caches and memory attributes), but some
> > guest OSes may depend on them.
> >
> > This patch collects the attribute bits in the page-table walk, and
> > updates PAR with the correct attributes for all LPAE
> > translations. Short descriptor formats still return 0 for these bits,
> > as in the prior implementation, but now log an unimplemented message.
> >
> > Signed-off-by: Andrew Baumann 
> > ---
> > v2:
> >  * return attrs via out parameter from get_phys_addr, rather than
> MemTxAttrs
> >  * move MAIR lookup/index inline, since it turned out to be simple
> >  * implement attributes for stage 2 translations
> >  * combine attributes from stages 1 and 2 when required
> 
> Hi. This is looking pretty good, but I have a few comments below,
> and we're pretty much at the softfreeze date (KVM Forum last week
> meant I didn't get much code review done, unfortunately). Would
> you be too sad if this missed 2.11 ?

No worries. It would be nice to have a stable release that we can tell people 
supports arm64 Windows, but I recognise that this is a non-trivial change, so 
if we have to wait for 2.12 to get that, then fair enough.

> > Attributes for short PTE formats remain unimplemented; there's a
> LOG_UNIMP for
> > this case, but it's likely to be noisy for guests that trigger it -- do we 
> > need
> > a one-shot mechanism for the log statement?
> 
> I think we should just drop that LOG_UNIMP.

Ok.

> > @@ -8929,6 +8939,28 @@ static bool get_phys_addr_lpae(CPUARMState
> *env, target_ulong address,
> >   */
> >  txattrs->secure = false;
> >  }
> > +
> > +if (cacheattrs != NULL) {
> > +if (mmu_idx == ARMMMUIdx_S2NS) {
> > +/* Translate from the 4-bit stage 2 representation of
> > + * memory attributes (without cache-allocation hints) to
> > + * the 8-bit representation of the stage 1 MAIR registers
> > + * (which includes allocation hints).
> > + */
> > +uint8_t memattr = extract32(attrs, 0, 4);
> > +cacheattrs->attrs = (extract32(memattr, 2, 2) << 4)
> > +  | (extract32(memattr, 0, 2) << 2);
> 
> Pseudocode S2ConvertAttrsHints() specifies some hint bit defaults
> (no-allocate for NC; RW-allocate for WT or WB) -- do we want to
> follow that?

Thanks for the pointer. Yes, I think we do.

> 
> > +cacheattrs->shareability = extract32(attrs, 4, 2);
> 
> Are you sure this is the right bit offset for the shareability bits?
> I think 4,2 is the S2AP (access) bits, and the SH bits are in 6,2, same
> as for stage 1 descriptors.

You're right. I was convinced it differed, but I don't recall why. Thanks for 
catching this.

> > +} else {
> > +/* Index into MAIR registers for cache attributes */
> > +uint8_t attrindx = extract32(attrs, 0, 3);
> > +uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)];
> > +assert(attrindx <= 7);
> > +cacheattrs->attrs = extract64(mair, attrindx * 8, 8);
> > +cacheattrs->shareability = extract32(attrs, 6, 2);
> > +}
> > +}
> > +
> >  *phys_ptr = descaddr;
> >  *page_size_ptr = page_size;
> >  return false;
> > @@ -9490,6 +9522,89 @@ static bool
> get_phys_addr_pmsav5(CPUARMState *env, uint32_t address,
> >  return false;
> >  }
> >
> > +/* Combine either inner or outer cacheability attributes for normal
> > + * memory, according to table D4-42 of ARM DDI 0487B.b (the ARMv8
> ARM).
> > + *
> > + * NB: only stage 1 includes allocation hints (RW bits), leading to
> > + * some asymmetry.
> > + */
> > +static uint8_t combine_cacheattr_nibble(uint8_t s1, uint8_t s2)
> > +{
> > +if (s1 == 4 || s2 == 4) {
> > +/* non-cacheable has precedence */
> > +return 4;
> > +} else if (extract32(s1, 2, 2) == 0 || extract32(s1, 2, 2) == 2) {
> > +/* stage 1 write-through takes precedence */
> > +return s1;
> > +} else if (extract32(s2, 2, 2) == 2) {
> > +/* stage 2 write-through takes precedence */
> > +return s2;
> > +} else { /* write-back */
> > +return s1;
> > +}
> 
> The v8A ARM ARM pseudocode CombineS1S2AttrHints() says that the hint
> bits always come from s1 regardless of whose attrs won.

Aha, I was wondering about this. Thanks for the pointer to the pseudocode... it 
isn't referenced anywhere in the relevant section! It's reassuring to see that, 
aside from the hints (where the English was ambiguous IIRC), I 

[Qemu-devel] Retrocomputing devroom at FOSDEM 2018

2017-10-30 Thread François Revol
Hi,
I just found out someone managed to get a retrocomputing devroom
accepted at FOSDEM:

https://lists.fosdem.org/pipermail/retrocomputing-devroom/2017-October/00.html

https://www.elpauer.org/2017/10/retrocomputing-devroom-call-for-participation/comment-page-1/

Surely there are a lot of subjects in QEMU to talk about in such a
devroom ;-)

François.



Re: [Qemu-devel] [Qemu-ppc] [PATCH v1 0/3] ppc: adding some RTAS calls in tests/libqos

2017-10-30 Thread Daniel Henrique Barboza



On 10/30/2017 06:29 PM, Daniel Henrique Barboza wrote:



On 10/30/2017 06:12 PM, David Gibson wrote:

On Thu, Oct 26, 2017 at 06:22:47PM -0200, Daniel Henrique Barboza wrote:

This series implements a few RTAS hypercalls in tests/libqos
that, used together, implement the DRC state transition described
in PAPR 2.7+, 13.4.

This started as an attempt of implementing hot unplug qtests for the
sPAPR machine but I've found a few issues that will require more time
solving:

- CPU hot unplug: for some reason the machine freezes after the
callback is returned.

- LMB hot unplug: not supported by the sPAPR machine if not
set in CAS.

I have a feeling that the CPU hot unplug  issue might be related
with the lack of CAS negotiation step as well, but only way to be
sure is to further understanding how the CAS negotation interfere
with the device hot unplug. If needed we'll have to implement the
client architecture support hypercall as well in the future.

Until then, I believe these hypercalls have a value of their own and
are worth being pushed upstream.

Unfortunately, these changes break the Travis build on MacOS.


Hmpf  how can I run this Travis build to see the errors? I've 
searched

here and found out something about making a Github pull request and
then https://travis-ci.org/qemu/qemu runs the Travis build in the 
request.

Is this a valid way of running it?



Just managed to run the build and saw the error. I'll fix it in v2.


Daniel



Thanks,

Daniel



Daniel Henrique Barboza (3):
   tests: adding 'check_exception' RTAS implementation
   tests: adding 'set_indicator' RTAS call
   tests: ibm,configure-connector RTAS call implementation

  tests/libqos/rtas.c | 105 +
  tests/libqos/rtas.h |   5 ++
  tests/rtas-test.c   | 218 


  3 files changed, 328 insertions(+)









Re: [Qemu-devel] [PATCH v2] don't hardcode EL1 in extended_addresses_enabled

2017-10-30 Thread Stefano Stabellini
On Mon, 30 Oct 2017, Peter Maydell wrote:
> On 26 October 2017 at 00:28, Stefano Stabellini  
> wrote:
> > extended_addresses_enabled calls arm_el_is_aa64, hardcoding exception
> > level 1. Instead, add an additional "el" argument to
> > extended_addresses_enabled.
> >
> > The caller will pass the right value. In most cases, it will be
> > arm_current_el(env). However, arm_debug_excp_handler will
> > use arm_debug_target_el(env), as the target el for a debug trap can be
> > different from the current el.
> >
> > Signed-off-by: Stefano Stabellini 
> 
> I have some longer comments below about what a mess this whole
> area is. Fixing some of that requires some heavy refactoring,
> which I don't want to do just now since we're about to go into
> softfreeze for the next release.
> 
> What's the specific situation/bug that you're trying to fix with
> this patch? You don't say in the commit message.
> We should be able to put in a point fix to deal with whatever it is,
> but it's hard to suggest what that would be without the detail
> of what exactly we're getting wrong. (It's the PAR format stuff,
> right? But which ATS instruction are you using, from which
> exception level, with which register width, for which stage
> 1 page table format and stage 1 guest register width?)

Thank you for understanding, I am not really up for heavy refactoring
in QEMU right now :-)

Yes, I am trying to fix the AT instruction, which is used by Xen for
address translations. Xen always runs at EL2. do_ats_write takes the
wrong path because extended_addresses_enabled assumes EL1.

To go more into details, virt_to_maddr translates a Xen virtual address
into a physical address. Xen implements virt_to_maddr as:

  static inline paddr_t __virt_to_maddr(vaddr_t va)
  {
  uint64_t par = va_to_par(va);
  return (par & PADDR_MASK & PAGE_MASK) | (va & ~PAGE_MASK);
  }

Where va_to_par is:

  #define ATS1HR  p15,4,c7,c8,0   /* Address Translation Stage 1 Hyp. 
Read */
  static inline uint64_t __va_to_par(vaddr_t va)
  {
  uint64_t par, tmp;
  tmp = READ_CP64(PAR);
  WRITE_CP32(va, ATS1HR);
  isb(); /* Ensure result is available. */
  par = READ_CP64(PAR);
  WRITE_CP64(tmp, PAR);
  return par;
  }

This is what breaks Xen 64-bit booting on qemu-system-aarch64.

For completeness, I'll also point out other uses of ATS instructions in
Xen.

Xen uses the following to translate a guest virtual address into a
physical address (Xen has no saying in the guest pagetable format or
register width):

  #define ATS12NSOPR  p15,0,c7,c8,4   /* Address Translation Stage 1+2 
Non-Secure Kernel Read */
  #define ATS12NSOPW  p15,0,c7,c8,5   /* Address Translation Stage 1+2 
Non-Secure Kernel Write */
  static inline uint64_t gva_to_ma_par(vaddr_t va, unsigned int flags)
  {
  uint64_t par, tmp;
  tmp = READ_CP64(PAR);
  if ( (flags & GV2M_WRITE) == GV2M_WRITE )
  WRITE_CP32(va, ATS12NSOPW);
  else
  WRITE_CP32(va, ATS12NSOPR);
  isb(); /* Ensure result is available. */
  par = READ_CP64(PAR);
  WRITE_CP64(tmp, PAR);
  return par;
  }

Finally, Xen uses the following to translate guest virtual addresses
into pseudo-physical addresses:

  #define ATS1CPR p15,0,c7,c8,0   /* Address Translation Stage 1. 
Non-Secure Kernel Read */
  #define ATS1CPW p15,0,c7,c8,1   /* Address Translation Stage 1. 
Non-Secure Kernel Write */
  
  static inline uint64_t gva_to_ipa_par(vaddr_t va, unsigned int flags)
  {
  uint64_t par, tmp;
  tmp = READ_CP64(PAR);
  if ( (flags & GV2M_WRITE) == GV2M_WRITE )
  WRITE_CP32(va, ATS1CPW);
  else
  WRITE_CP32(va, ATS1CPR);
  isb(); /* Ensure result is available. */
  par = READ_CP64(PAR);
  WRITE_CP64(tmp, PAR);
  return par;
  }



[Qemu-devel] [PULL 10/12] nbd/client: prepare nbd_receive_reply for structured reply

2017-10-30 Thread Eric Blake
From: Vladimir Sementsov-Ogievskiy 

In following patch nbd_receive_reply will be used both for simple
and structured reply header receiving.
NBDReply is altered into union of simple reply header and structured
reply chunk header, simple error translation moved to block/nbd-client
to be consistent with further structured reply error translation.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Eric Blake 
Message-Id: <20171027104037.8319-11-ebl...@redhat.com>
---
 include/block/nbd.h |  30 ---
 block/nbd-client.c  |   8 ++--
 nbd/client.c| 104 +---
 nbd/trace-events|   3 +-
 4 files changed, 113 insertions(+), 32 deletions(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index 225e9575e4..2ee1578420 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -57,12 +57,6 @@ struct NBDRequest {
 };
 typedef struct NBDRequest NBDRequest;

-struct NBDReply {
-uint64_t handle;
-uint32_t error;
-};
-typedef struct NBDReply NBDReply;
-
 typedef struct NBDSimpleReply {
 uint32_t magic;  /* NBD_SIMPLE_REPLY_MAGIC */
 uint32_t error;
@@ -78,6 +72,20 @@ typedef struct NBDStructuredReplyChunk {
 uint32_t length; /* length of payload */
 } QEMU_PACKED NBDStructuredReplyChunk;

+typedef union NBDReply {
+NBDSimpleReply simple;
+NBDStructuredReplyChunk structured;
+struct {
+/* @magic and @handle fields have the same offset and size both in
+ * simple reply and structured reply chunk, so let them be accessible
+ * without ".simple." or ".structured." specification
+ */
+uint32_t magic;
+uint32_t _skip;
+uint64_t handle;
+} QEMU_PACKED;
+} NBDReply;
+
 /* Header of NBD_REPLY_TYPE_OFFSET_DATA, complete NBD_REPLY_TYPE_OFFSET_HOLE */
 typedef struct NBDStructuredRead {
 NBDStructuredReplyChunk h;
@@ -256,4 +264,14 @@ void nbd_client_put(NBDClient *client);
 void nbd_server_start(SocketAddress *addr, const char *tls_creds,
   Error **errp);

+static inline bool nbd_reply_is_simple(NBDReply *reply)
+{
+return reply->magic == NBD_SIMPLE_REPLY_MAGIC;
+}
+
+static inline bool nbd_reply_is_structured(NBDReply *reply)
+{
+return reply->magic == NBD_STRUCTURED_REPLY_MAGIC;
+}
+
 #endif
diff --git a/block/nbd-client.c b/block/nbd-client.c
index c0683c3c83..58493b7ac4 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -92,7 +92,9 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
 i = HANDLE_TO_INDEX(s, s->reply.handle);
 if (i >= MAX_NBD_REQUESTS ||
 !s->requests[i].coroutine ||
-!s->requests[i].receiving) {
+!s->requests[i].receiving ||
+nbd_reply_is_structured(>reply))
+{
 break;
 }

@@ -194,8 +196,8 @@ static int nbd_co_receive_reply(NBDClientSession *s,
 ret = -EIO;
 } else {
 assert(s->reply.handle == handle);
-ret = -s->reply.error;
-if (qiov && s->reply.error == 0) {
+ret = -nbd_errno_to_system_errno(s->reply.simple.error);
+if (qiov && ret == 0) {
 if (qio_channel_readv_all(s->ioc, qiov->iov, qiov->niov,
   NULL) < 0) {
 ret = -EIO;
diff --git a/nbd/client.c b/nbd/client.c
index 9acf745b79..4f0745f601 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -908,6 +908,57 @@ int nbd_send_request(QIOChannel *ioc, NBDRequest *request)
 return nbd_write(ioc, buf, sizeof(buf), NULL);
 }

+/* nbd_receive_simple_reply
+ * Read simple reply except magic field (which should be already read).
+ * Payload is not read (payload is possible for CMD_READ, but here we even
+ * don't know whether it take place or not).
+ */
+static int nbd_receive_simple_reply(QIOChannel *ioc, NBDSimpleReply *reply,
+Error **errp)
+{
+int ret;
+
+assert(reply->magic == NBD_SIMPLE_REPLY_MAGIC);
+
+ret = nbd_read(ioc, (uint8_t *)reply + sizeof(reply->magic),
+   sizeof(*reply) - sizeof(reply->magic), errp);
+if (ret < 0) {
+return ret;
+}
+
+be32_to_cpus(>error);
+be64_to_cpus(>handle);
+
+return 0;
+}
+
+/* nbd_receive_structured_reply_chunk
+ * Read structured reply chunk except magic field (which should be already
+ * read).
+ * Payload is not read.
+ */
+static int nbd_receive_structured_reply_chunk(QIOChannel *ioc,
+  NBDStructuredReplyChunk *chunk,
+  Error **errp)
+{
+int ret;
+
+assert(chunk->magic == NBD_STRUCTURED_REPLY_MAGIC);
+
+ret = nbd_read(ioc, (uint8_t *)chunk + sizeof(chunk->magic),
+   sizeof(*chunk) - sizeof(chunk->magic), errp);
+if (ret < 0) {
+return ret;
+}
+
+be16_to_cpus(>flags);
+

[Qemu-devel] [PULL 08/12] nbd/server: Include human-readable message in structured errors

2017-10-30 Thread Eric Blake
The NBD spec permits including a human-readable error string if
structured replies are in force, so we might as well send the
client the message that we logged on any error.

Signed-off-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20171027104037.8319-9-ebl...@redhat.com>
---
 nbd/server.c | 20 +---
 nbd/trace-events |  2 +-
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index 3261fd1bd7..70b40ed27e 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1296,24 +1296,25 @@ static int coroutine_fn 
nbd_co_send_structured_read(NBDClient *client,
 static int coroutine_fn nbd_co_send_structured_error(NBDClient *client,
  uint64_t handle,
  uint32_t error,
+ const char *msg,
  Error **errp)
 {
 NBDStructuredError chunk;
 int nbd_err = system_errno_to_nbd_errno(error);
 struct iovec iov[] = {
 {.iov_base = , .iov_len = sizeof(chunk)},
-/* FIXME: Support human-readable error message */
+{.iov_base = (char *)msg, .iov_len = msg ? strlen(msg) : 0},
 };

 assert(nbd_err);
 trace_nbd_co_send_structured_error(handle, nbd_err,
-   nbd_err_lookup(nbd_err));
+   nbd_err_lookup(nbd_err), msg ? msg : 
"");
 set_be_chunk(, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_ERROR, handle,
- sizeof(chunk) - sizeof(chunk.h));
+ sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
 stl_be_p(, nbd_err);
-stw_be_p(_length, 0);
+stw_be_p(_length, iov[1].iov_len);

-return nbd_co_send_iov(client, iov, 1, errp);
+return nbd_co_send_iov(client, iov, 1 + !!iov[1].iov_len, errp);
 }

 /* nbd_co_receive_request
@@ -1414,6 +1415,7 @@ static coroutine_fn void nbd_trip(void *opaque)
 int flags;
 int reply_data_len = 0;
 Error *local_err = NULL;
+char *msg = NULL;

 trace_nbd_trip();
 if (client->closing) {
@@ -1530,14 +1532,17 @@ reply:
 if (local_err) {
 /* If we get here, local_err was not a fatal error, and should be sent
  * to the client. */
+assert(ret < 0);
+msg = g_strdup(error_get_pretty(local_err));
 error_report_err(local_err);
 local_err = NULL;
 }

-if (client->structured_reply && request.type == NBD_CMD_READ) {
+if (client->structured_reply &&
+(ret < 0 || request.type == NBD_CMD_READ)) {
 if (ret < 0) {
 ret = nbd_co_send_structured_error(req->client, request.handle,
-   -ret, _err);
+   -ret, msg, _err);
 } else {
 ret = nbd_co_send_structured_read(req->client, request.handle,
   request.from, req->data,
@@ -1548,6 +1553,7 @@ reply:
ret < 0 ? -ret : 0,
req->data, reply_data_len, _err);
 }
+g_free(msg);
 if (ret < 0) {
 error_prepend(_err, "Failed to send reply: ");
 goto disconnect;
diff --git a/nbd/trace-events b/nbd/trace-events
index 6894f8bbb4..52150bd738 100644
--- a/nbd/trace-events
+++ b/nbd/trace-events
@@ -57,7 +57,7 @@ nbd_blk_aio_attached(const char *name, void *ctx) "Export %s: 
Attaching clients
 nbd_blk_aio_detach(const char *name, void *ctx) "Export %s: Detaching clients 
from AIO context %p\n"
 nbd_co_send_simple_reply(uint64_t handle, uint32_t error, const char *errname, 
int len) "Send simple reply: handle = %" PRIu64 ", error = %" PRIu32 " (%s), 
len = %d"
 nbd_co_send_structured_read(uint64_t handle, uint64_t offset, void *data, 
size_t size) "Send structured read data reply: handle = %" PRIu64 ", offset = 
%" PRIu64 ", data = %p, len = %zu"
-nbd_co_send_structured_error(uint64_t handle, int err, const char *errname) 
"Send structured error reply: handle = %" PRIu64 ", error = %d (%s)"
+nbd_co_send_structured_error(uint64_t handle, int err, const char *errname, 
const char *msg) "Send structured error reply: handle = %" PRIu64 ", error = %d 
(%s), msg = '%s'"
 nbd_co_receive_request_decode_type(uint64_t handle, uint16_t type, const char 
*name) "Decoding type: handle = %" PRIu64 ", type = %" PRIu16 " (%s)"
 nbd_co_receive_request_payload_received(uint64_t handle, uint32_t len) 
"Payload received: handle = %" PRIu64 ", len = %" PRIu32
 nbd_co_receive_request_cmd_write(uint32_t len) "Reading %" PRIu32 " byte(s)"
-- 
2.13.6




[Qemu-devel] [PULL 04/12] nbd/server: Report error for write to read-only export

2017-10-30 Thread Eric Blake
When the server is read-only, we were already reporting an error
message for NBD_CMD_WRITE_ZEROES, but failed to set errp for a
similar NBD_CMD_WRITE.  This will matter more once structured
replies allow the server to propagate the errp information back
to the client.  While at it, use an error message that makes a
bit more sense if viewed on the client side.

Note that when using qemu-io to test qemu-nbd behavior, it is
rather difficult to convince qemu-io to send protocol violations
(such as a read beyond bounds), because we have a lot of active
checking on the client side that a qemu-io request makes sense
before it ever goes over the wire to the server.  The case of a
client attempting a write when the server is started as
'qemu-nbd -r' is one of the few places where we can easily test
error path handling, without having to resort to hacking in known
temporary bugs to either the server or client.  [Maybe we want a
future patch to the client to do up-front checking on writes to a
read-only export, the way it does up-front bounds checking; but I
don't see anything in the NBD spec that points to a protocol
violation in our current behavior.]

Signed-off-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20171027104037.8319-5-ebl...@redhat.com>
---
 nbd/server.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/nbd/server.c b/nbd/server.c
index efb6003364..05ff7470d5 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1381,6 +1381,7 @@ static coroutine_fn void nbd_trip(void *opaque)
 break;
 case NBD_CMD_WRITE:
 if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
+error_setg(_err, "Export is read-only");
 ret = -EROFS;
 break;
 }
@@ -1398,7 +1399,7 @@ static coroutine_fn void nbd_trip(void *opaque)
 break;
 case NBD_CMD_WRITE_ZEROES:
 if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
-error_setg(_err, "Server is read-only, return error");
+error_setg(_err, "Export is read-only");
 ret = -EROFS;
 break;
 }
-- 
2.13.6




[Qemu-devel] [PULL 07/12] nbd: Minimal structured read for server

2017-10-30 Thread Eric Blake
From: Vladimir Sementsov-Ogievskiy 

Minimal implementation of structured read: one structured reply chunk,
no segmentation.
Minimal structured error implementation: no text message.
Support DF flag, but just ignore it, as there is no segmentation any
way.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Eric Blake 
Message-Id: <20171027104037.8319-8-ebl...@redhat.com>
---
 nbd/server.c | 107 +--
 nbd/trace-events |   2 ++
 2 files changed, 99 insertions(+), 10 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index cf815603a6..3261fd1bd7 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -100,6 +100,8 @@ struct NBDClient {
 QTAILQ_ENTRY(NBDClient) next;
 int nb_requests;
 bool closing;
+
+bool structured_reply;
 };

 /* That's all folks */
@@ -769,6 +771,23 @@ static int nbd_negotiate_options(NBDClient *client, 
uint16_t myflags,
  "TLS not configured");
 }
 break;
+
+case NBD_OPT_STRUCTURED_REPLY:
+if (length) {
+ret = nbd_reject_length(client, length, option, false,
+errp);
+} else if (client->structured_reply) {
+ret = nbd_negotiate_send_rep_err(
+client->ioc, NBD_REP_ERR_INVALID, option, errp,
+"structured reply already negotiated");
+} else {
+ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK,
+ option, errp);
+client->structured_reply = true;
+myflags |= NBD_FLAG_SEND_DF;
+}
+break;
+
 default:
 if (nbd_drop(client->ioc, length, errp) < 0) {
 return -EIO;
@@ -1243,6 +1262,60 @@ static int nbd_co_send_simple_reply(NBDClient *client,
 return nbd_co_send_iov(client, iov, len ? 2 : 1, errp);
 }

+static inline void set_be_chunk(NBDStructuredReplyChunk *chunk, uint16_t flags,
+uint16_t type, uint64_t handle, uint32_t 
length)
+{
+stl_be_p(>magic, NBD_STRUCTURED_REPLY_MAGIC);
+stw_be_p(>flags, flags);
+stw_be_p(>type, type);
+stq_be_p(>handle, handle);
+stl_be_p(>length, length);
+}
+
+static int coroutine_fn nbd_co_send_structured_read(NBDClient *client,
+uint64_t handle,
+uint64_t offset,
+void *data,
+size_t size,
+Error **errp)
+{
+NBDStructuredRead chunk;
+struct iovec iov[] = {
+{.iov_base = , .iov_len = sizeof(chunk)},
+{.iov_base = data, .iov_len = size}
+};
+
+trace_nbd_co_send_structured_read(handle, offset, data, size);
+set_be_chunk(, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_OFFSET_DATA,
+ handle, sizeof(chunk) - sizeof(chunk.h) + size);
+stq_be_p(, offset);
+
+return nbd_co_send_iov(client, iov, 2, errp);
+}
+
+static int coroutine_fn nbd_co_send_structured_error(NBDClient *client,
+ uint64_t handle,
+ uint32_t error,
+ Error **errp)
+{
+NBDStructuredError chunk;
+int nbd_err = system_errno_to_nbd_errno(error);
+struct iovec iov[] = {
+{.iov_base = , .iov_len = sizeof(chunk)},
+/* FIXME: Support human-readable error message */
+};
+
+assert(nbd_err);
+trace_nbd_co_send_structured_error(handle, nbd_err,
+   nbd_err_lookup(nbd_err));
+set_be_chunk(, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_ERROR, handle,
+ sizeof(chunk) - sizeof(chunk.h));
+stl_be_p(, nbd_err);
+stw_be_p(_length, 0);
+
+return nbd_co_send_iov(client, iov, 1, errp);
+}
+
 /* nbd_co_receive_request
  * Collect a client request. Return 0 if request looks valid, -EIO to drop
  * connection right away, and any other negative value to report an error to
@@ -1253,6 +1326,7 @@ static int nbd_co_receive_request(NBDRequestData *req, 
NBDRequest *request,
   Error **errp)
 {
 NBDClient *client = req->client;
+int valid_flags;

 g_assert(qemu_in_coroutine());
 assert(client->recv_coroutine == qemu_coroutine_self());
@@ -1314,13 +1388,15 @@ static int nbd_co_receive_request(NBDRequestData *req, 
NBDRequest *request,
(uint64_t)client->exp->size);
 return request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL;
 }
-if (request->flags 

[Qemu-devel] [PULL 12/12] nbd: Minimal structured read for client

2017-10-30 Thread Eric Blake
From: Vladimir Sementsov-Ogievskiy 

Minimal implementation: for structured error only error_report error
message.

Note that test 83 is now more verbose, because the implementation
prints more warnings about unexpected communication errors; perhaps
future patches should tone things down by using trace messages
instead of traces, but the common case of successful communication
is no noisier than before.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Eric Blake 
Message-Id: <20171027104037.8319-13-ebl...@redhat.com>
---
 block/nbd-client.h |   1 +
 include/block/nbd.h|  12 ++
 nbd/nbd-internal.h |   1 -
 block/nbd-client.c | 490 ++---
 nbd/client.c   |  12 ++
 tests/qemu-iotests/083.out |  15 ++
 6 files changed, 498 insertions(+), 33 deletions(-)

diff --git a/block/nbd-client.h b/block/nbd-client.h
index b435754b82..612c4c21a0 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -19,6 +19,7 @@

 typedef struct {
 Coroutine *coroutine;
+uint64_t offset;/* original offset of the request */
 bool receiving; /* waiting for read_reply_co? */
 } NBDClientRequest;

diff --git a/include/block/nbd.h b/include/block/nbd.h
index da6e305dd5..92d1723d7c 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -197,6 +197,11 @@ enum {
 #define NBD_REPLY_TYPE_ERROR NBD_REPLY_ERR(1)
 #define NBD_REPLY_TYPE_ERROR_OFFSET  NBD_REPLY_ERR(2)

+static inline bool nbd_reply_type_is_error(int type)
+{
+return type & (1 << 15);
+}
+
 /* NBD errors are based on errno numbers, so there is a 1:1 mapping,
  * but only a limited set of errno values is specified in the protocol.
  * Everything else is squashed to EINVAL.
@@ -214,6 +219,11 @@ enum {
 struct NBDExportInfo {
 /* Set by client before nbd_receive_negotiate() */
 bool request_sizes;
+
+/* In-out fields, set by client before nbd_receive_negotiate() and
+ * updated by server results during nbd_receive_negotiate() */
+bool structured_reply;
+
 /* Set by server results during nbd_receive_negotiate() */
 uint64_t size;
 uint16_t flags;
@@ -284,4 +294,6 @@ static inline bool nbd_reply_is_structured(NBDReply *reply)
 return reply->magic == NBD_STRUCTURED_REPLY_MAGIC;
 }

+const char *nbd_reply_type_lookup(uint16_t type);
+
 #endif
diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index b64eb1cc9b..eeff78d3c9 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -104,7 +104,6 @@ const char *nbd_opt_lookup(uint32_t opt);
 const char *nbd_rep_lookup(uint32_t rep);
 const char *nbd_info_lookup(uint16_t info);
 const char *nbd_cmd_lookup(uint16_t info);
-const char *nbd_reply_type_lookup(uint16_t type);
 const char *nbd_err_lookup(int err);

 int nbd_drop(QIOChannel *ioc, size_t size, Error **errp);
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 58493b7ac4..b44d4d4a01 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -93,7 +93,7 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
 if (i >= MAX_NBD_REQUESTS ||
 !s->requests[i].coroutine ||
 !s->requests[i].receiving ||
-nbd_reply_is_structured(>reply))
+(nbd_reply_is_structured(>reply) && !s->info.structured_reply))
 {
 break;
 }
@@ -141,6 +141,7 @@ static int nbd_co_send_request(BlockDriverState *bs,
 assert(i < MAX_NBD_REQUESTS);

 s->requests[i].coroutine = qemu_coroutine_self();
+s->requests[i].offset = request->from;
 s->requests[i].receiving = false;

 request->handle = INDEX_TO_HANDLE(s, i);
@@ -181,75 +182,489 @@ err:
 return rc;
 }

-static int nbd_co_receive_reply(NBDClientSession *s,
-uint64_t handle,
-QEMUIOVector *qiov)
+static inline uint16_t payload_advance16(uint8_t **payload)
+{
+*payload += 2;
+return lduw_be_p(*payload - 2);
+}
+
+static inline uint32_t payload_advance32(uint8_t **payload)
+{
+*payload += 4;
+return ldl_be_p(*payload - 4);
+}
+
+static inline uint64_t payload_advance64(uint8_t **payload)
+{
+*payload += 8;
+return ldq_be_p(*payload - 8);
+}
+
+static int nbd_parse_offset_hole_payload(NBDStructuredReplyChunk *chunk,
+ uint8_t *payload, uint64_t 
orig_offset,
+ QEMUIOVector *qiov, Error **errp)
+{
+uint64_t offset;
+uint32_t hole_size;
+
+if (chunk->length != sizeof(offset) + sizeof(hole_size)) {
+error_setg(errp, "Protocol error: invalid payload for "
+ "NBD_REPLY_TYPE_OFFSET_HOLE");
+return -EINVAL;
+}
+
+offset = payload_advance64();
+hole_size = payload_advance32();
+
+if (offset < orig_offset || hole_size > qiov->size ||
+offset > orig_offset + qiov->size - 

[Qemu-devel] [PULL 06/12] nbd/server: Refactor zero-length option check

2017-10-30 Thread Eric Blake
Consolidate the response for a non-zero-length option payload
into a new function, nbd_reject_length().  This check will
also be used when introducing support for structured replies.

Note that STARTTLS response differs based on time: if the connection
is still unencrypted, we set fatal to true (a client that can't
request TLS correctly may still think that we are ready to start
the TLS handshake, so we must disconnect); while if the connection
is already encrypted, the client is sending a bogus request but
is no longer at risk of being confused by continuing the connection.

Signed-off-by: Eric Blake 
Message-Id: <20171027104037.8319-7-ebl...@redhat.com>
[eblake: correct return value on STARTTLS]
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 nbd/server.c | 74 +---
 1 file changed, 46 insertions(+), 28 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index 2f03059b4c..cf815603a6 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -253,21 +253,10 @@ static int nbd_negotiate_send_rep_list(QIOChannel *ioc, 
NBDExport *exp,

 /* Process the NBD_OPT_LIST command, with a potential series of replies.
  * Return -errno on error, 0 on success. */
-static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length,
- Error **errp)
+static int nbd_negotiate_handle_list(NBDClient *client, Error **errp)
 {
 NBDExport *exp;

-if (length) {
-if (nbd_drop(client->ioc, length, errp) < 0) {
-return -EIO;
-}
-return nbd_negotiate_send_rep_err(client->ioc,
-  NBD_REP_ERR_INVALID, NBD_OPT_LIST,
-  errp,
-  "OPT_LIST should not have length");
-}
-
 /* For each export, send a NBD_REP_SERVER reply. */
 QTAILQ_FOREACH(exp, , next) {
 if (nbd_negotiate_send_rep_list(client->ioc, exp, errp)) {
@@ -531,7 +520,6 @@ static int nbd_negotiate_handle_info(NBDClient *client, 
uint32_t length,
 /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the
  * new channel for all further (now-encrypted) communication. */
 static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
- uint32_t length,
  Error **errp)
 {
 QIOChannel *ioc;
@@ -540,15 +528,6 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient 
*client,

 trace_nbd_negotiate_handle_starttls();
 ioc = client->ioc;
-if (length) {
-if (nbd_drop(ioc, length, errp) < 0) {
-return NULL;
-}
-nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS,
-   errp,
-   "OPT_STARTTLS should not have length");
-return NULL;
-}

 if (nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK,
NBD_OPT_STARTTLS, errp) < 0) {
@@ -584,6 +563,34 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient 
*client,
 return QIO_CHANNEL(tioc);
 }

+/* nbd_reject_length: Handle any unexpected payload.
+ * @fatal requests that we quit talking to the client, even if we are able
+ * to successfully send an error to the guest.
+ * Return:
+ * -errno  transmission error occurred or @fatal was requested, errp is set
+ * 0   error message successfully sent to client, errp is not set
+ */
+static int nbd_reject_length(NBDClient *client, uint32_t length,
+ uint32_t option, bool fatal, Error **errp)
+{
+int ret;
+
+assert(length);
+if (nbd_drop(client->ioc, length, errp) < 0) {
+return -EIO;
+}
+ret = nbd_negotiate_send_rep_err(client->ioc, NBD_REP_ERR_INVALID,
+ option, errp,
+ "option '%s' should have zero length",
+ nbd_opt_lookup(option));
+if (fatal && !ret) {
+error_setg(errp, "option '%s' should have zero length",
+   nbd_opt_lookup(option));
+return -EINVAL;
+}
+return ret;
+}
+
 /* nbd_negotiate_options
  * Process all NBD_OPT_* client option commands, during fixed newstyle
  * negotiation.
@@ -674,7 +681,13 @@ static int nbd_negotiate_options(NBDClient *client, 
uint16_t myflags,
 }
 switch (option) {
 case NBD_OPT_STARTTLS:
-tioc = nbd_negotiate_handle_starttls(client, length, errp);
+if (length) {
+/* Unconditionally drop the connection if the client
+ * can't start a TLS negotiation correctly */
+return nbd_reject_length(client, length, option, true,
+ errp);
+}
+tioc = 

[Qemu-devel] [PULL 03/12] nbd: Expose constants and structs for structured read

2017-10-30 Thread Eric Blake
Upcoming patches will implement the NBD structured reply
extension [1] for both client and server roles.  Declare the
constants, structs, and lookup routines that will be valuable
whether the server or client code is backported in isolation.

This includes moving one constant from an internal header to
the public header, as part of the structured read processing
will be done in block/nbd-client.c rather than nbd/client.c.

[1]https://github.com/NetworkBlockDevice/nbd/blob/extension-structured-reply/doc/proto.md

Based on patches from Vladimir Sementsov-Ogievskiy.

Signed-off-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20171027104037.8319-4-ebl...@redhat.com>
---
 include/block/nbd.h | 41 +
 nbd/nbd-internal.h  |  2 +-
 nbd/common.c| 27 +++
 nbd/server.c|  2 ++
 4 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index dc62b5cd19..225e9575e4 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -69,6 +69,28 @@ typedef struct NBDSimpleReply {
 uint64_t handle;
 } QEMU_PACKED NBDSimpleReply;

+/* Header of all structured replies */
+typedef struct NBDStructuredReplyChunk {
+uint32_t magic;  /* NBD_STRUCTURED_REPLY_MAGIC */
+uint16_t flags;  /* combination of NBD_REPLY_FLAG_* */
+uint16_t type;   /* NBD_REPLY_TYPE_* */
+uint64_t handle; /* request handle */
+uint32_t length; /* length of payload */
+} QEMU_PACKED NBDStructuredReplyChunk;
+
+/* Header of NBD_REPLY_TYPE_OFFSET_DATA, complete NBD_REPLY_TYPE_OFFSET_HOLE */
+typedef struct NBDStructuredRead {
+NBDStructuredReplyChunk h;
+uint64_t offset;
+} QEMU_PACKED NBDStructuredRead;
+
+/* Header of all NBD_REPLY_TYPE_ERROR* errors */
+typedef struct NBDStructuredError {
+NBDStructuredReplyChunk h;
+uint32_t error;
+uint16_t message_length;
+} QEMU_PACKED NBDStructuredError;
+
 /* Transmission (export) flags: sent from server to client during handshake,
but describe what will happen during transmission */
 #define NBD_FLAG_HAS_FLAGS (1 << 0) /* Flags are there */
@@ -79,6 +101,7 @@ typedef struct NBDSimpleReply {
rotational media */
 #define NBD_FLAG_SEND_TRIM (1 << 5) /* Send TRIM (discard) */
 #define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send WRITE_ZEROES */
+#define NBD_FLAG_SEND_DF   (1 << 7) /* Send DF (Do not Fragment) */

 /* New-style handshake (global) flags, sent from server to client, and
control what will happen during handshake phase. */
@@ -125,6 +148,7 @@ typedef struct NBDSimpleReply {
 /* Request flags, sent from client to server during transmission phase */
 #define NBD_CMD_FLAG_FUA(1 << 0) /* 'force unit access' during write */
 #define NBD_CMD_FLAG_NO_HOLE(1 << 1) /* don't punch hole on zero run */
+#define NBD_CMD_FLAG_DF (1 << 2) /* don't fragment structured read */

 /* Supported request types */
 enum {
@@ -149,6 +173,22 @@ enum {
  * aren't overflowing some other buffer. */
 #define NBD_MAX_NAME_SIZE 256

+/* Two types of reply structures */
+#define NBD_SIMPLE_REPLY_MAGIC  0x67446698
+#define NBD_STRUCTURED_REPLY_MAGIC  0x668e33ef
+
+/* Structured reply flags */
+#define NBD_REPLY_FLAG_DONE  (1 << 0) /* This reply-chunk is last */
+
+/* Structured reply types */
+#define NBD_REPLY_ERR(value) ((1 << 15) | (value))
+
+#define NBD_REPLY_TYPE_NONE  0
+#define NBD_REPLY_TYPE_OFFSET_DATA   1
+#define NBD_REPLY_TYPE_OFFSET_HOLE   2
+#define NBD_REPLY_TYPE_ERROR NBD_REPLY_ERR(1)
+#define NBD_REPLY_TYPE_ERROR_OFFSET  NBD_REPLY_ERR(2)
+
 /* NBD errors are based on errno numbers, so there is a 1:1 mapping,
  * but only a limited set of errno values is specified in the protocol.
  * Everything else is squashed to EINVAL.
@@ -159,6 +199,7 @@ enum {
 #define NBD_ENOMEM 12
 #define NBD_EINVAL 22
 #define NBD_ENOSPC 28
+#define NBD_EOVERFLOW  75
 #define NBD_ESHUTDOWN  108

 /* Details collected by NBD_OPT_EXPORT_NAME and NBD_OPT_GO */
diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index df6c8b2f24..4f24d6e57d 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -47,7 +47,6 @@
 #define NBD_OLDSTYLE_NEGOTIATE_SIZE (8 + 8 + 8 + 4 + 124)

 #define NBD_REQUEST_MAGIC   0x25609513
-#define NBD_SIMPLE_REPLY_MAGIC  0x67446698
 #define NBD_OPTS_MAGIC  0x49484156454F5054LL
 #define NBD_CLIENT_MAGIC0x420281861253LL
 #define NBD_REP_MAGIC   0x0003e889045565a9LL
@@ -114,6 +113,7 @@ const char *nbd_opt_lookup(uint32_t opt);
 const char *nbd_rep_lookup(uint32_t rep);
 const char *nbd_info_lookup(uint16_t info);
 const char *nbd_cmd_lookup(uint16_t info);
+const char *nbd_reply_type_lookup(uint16_t type);
 const char *nbd_err_lookup(int err);

 int nbd_drop(QIOChannel *ioc, size_t size, Error **errp);

[Qemu-devel] [PULL 11/12] nbd: Move nbd_read() to common header

2017-10-30 Thread Eric Blake
An upcoming change to block/nbd-client.c will want to read the
tail of a structured reply chunk directly from the wire.  Move
this function to make it easier.

Based on a patch from Vladimir Sementsov-Ogievskiy.

Signed-off-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20171027104037.8319-12-ebl...@redhat.com>
---
 include/block/nbd.h | 10 ++
 nbd/nbd-internal.h  |  9 -
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index 2ee1578420..da6e305dd5 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -264,6 +264,16 @@ void nbd_client_put(NBDClient *client);
 void nbd_server_start(SocketAddress *addr, const char *tls_creds,
   Error **errp);

+
+/* nbd_read
+ * Reads @size bytes from @ioc. Returns 0 on success.
+ */
+static inline int nbd_read(QIOChannel *ioc, void *buffer, size_t size,
+   Error **errp)
+{
+return qio_channel_read_all(ioc, buffer, size, errp) < 0 ? -EIO : 0;
+}
+
 static inline bool nbd_reply_is_simple(NBDReply *reply)
 {
 return reply->magic == NBD_SIMPLE_REPLY_MAGIC;
diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index 4f24d6e57d..b64eb1cc9b 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -82,15 +82,6 @@ static inline int nbd_read_eof(QIOChannel *ioc, void 
*buffer, size_t size,
 return ret;
 }

-/* nbd_read
- * Reads @size bytes from @ioc. Returns 0 on success.
- */
-static inline int nbd_read(QIOChannel *ioc, void *buffer, size_t size,
-   Error **errp)
-{
-return qio_channel_read_all(ioc, buffer, size, errp) < 0 ? -EIO : 0;
-}
-
 /* nbd_write
  * Writes @size bytes to @ioc. Returns 0 on success.
  */
-- 
2.13.6




[Qemu-devel] [PULL 05/12] nbd/server: Simplify nbd_negotiate_options loop

2017-10-30 Thread Eric Blake
Instead of making each caller check whether a transmission error
occurred, we can sink a common error check to the end of the loop.

Signed-off-by: Eric Blake 
Message-Id: <20171027104037.8319-6-ebl...@redhat.com>
[eblake: squash in compiler warning fix]
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 nbd/server.c | 19 ---
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index 05ff7470d5..2f03059b4c 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -678,6 +678,7 @@ static int nbd_negotiate_options(NBDClient *client, 
uint16_t myflags,
 if (!tioc) {
 return -EIO;
 }
+ret = 0;
 object_unref(OBJECT(client->ioc));
 client->ioc = QIO_CHANNEL(tioc);
 break;
@@ -698,9 +699,6 @@ static int nbd_negotiate_options(NBDClient *client, 
uint16_t myflags,
  "Option 0x%" PRIx32
  "not permitted before TLS",
  option);
-if (ret < 0) {
-return ret;
-}
 /* Let the client keep trying, unless they asked to
  * quit. In this mode, we've already sent an error, so
  * we can't ack the abort.  */
@@ -713,9 +711,6 @@ static int nbd_negotiate_options(NBDClient *client, 
uint16_t myflags,
 switch (option) {
 case NBD_OPT_LIST:
 ret = nbd_negotiate_handle_list(client, length, errp);
-if (ret < 0) {
-return ret;
-}
 break;

 case NBD_OPT_ABORT:
@@ -738,9 +733,6 @@ static int nbd_negotiate_options(NBDClient *client, 
uint16_t myflags,
 assert(option == NBD_OPT_GO);
 return 0;
 }
-if (ret) {
-return ret;
-}
 break;

 case NBD_OPT_STARTTLS:
@@ -758,9 +750,6 @@ static int nbd_negotiate_options(NBDClient *client, 
uint16_t myflags,
  option, errp,
  "TLS not configured");
 }
-if (ret < 0) {
-return ret;
-}
 break;
 default:
 if (nbd_drop(client->ioc, length, errp) < 0) {
@@ -772,9 +761,6 @@ static int nbd_negotiate_options(NBDClient *client, 
uint16_t myflags,
  "Unsupported option 0x%"
  PRIx32 " (%s)", option,
  nbd_opt_lookup(option));
-if (ret < 0) {
-return ret;
-}
 break;
 }
 } else {
@@ -794,6 +780,9 @@ static int nbd_negotiate_options(NBDClient *client, 
uint16_t myflags,
 return -EINVAL;
 }
 }
+if (ret < 0) {
+return ret;
+}
 }
 }

-- 
2.13.6




[Qemu-devel] [PULL 01/12] nbd: Include error names in trace messages

2017-10-30 Thread Eric Blake
NBD errors were originally sent over the wire based on Linux errno
values; but not all the world is Linux, and not all platforms share
the same values.  Since a number isn't very easy to decipher on all
platforms, update the trace messages to include the name of NBD
errors being sent/received over the wire.  Tweak the trace messages
to be at the point where we are using the NBD error, not the
translation to the host errno values.

Signed-off-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20171027104037.8319-2-ebl...@redhat.com>
---
 nbd/nbd-internal.h |  1 +
 nbd/client.c   |  3 ++-
 nbd/common.c   | 23 +++
 nbd/server.c   |  3 ++-
 nbd/trace-events   |  4 ++--
 5 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index 11a130d050..4bfe5be884 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -126,6 +126,7 @@ const char *nbd_opt_lookup(uint32_t opt);
 const char *nbd_rep_lookup(uint32_t rep);
 const char *nbd_info_lookup(uint16_t info);
 const char *nbd_cmd_lookup(uint16_t info);
+const char *nbd_err_lookup(int err);

 int nbd_drop(QIOChannel *ioc, size_t size, Error **errp);

diff --git a/nbd/client.c b/nbd/client.c
index cd5a2c80ac..59d7c9d49f 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -940,6 +940,8 @@ int nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, 
Error **errp)
 reply->error  = ldl_be_p(buf + 4);
 reply->handle = ldq_be_p(buf + 8);

+trace_nbd_receive_reply(magic, reply->error, nbd_err_lookup(reply->error),
+reply->handle);
 reply->error = nbd_errno_to_system_errno(reply->error);

 if (reply->error == ESHUTDOWN) {
@@ -947,7 +949,6 @@ int nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, 
Error **errp)
 error_setg(errp, "server shutting down");
 return -EINVAL;
 }
-trace_nbd_receive_reply(magic, reply->error, reply->handle);

 if (magic != NBD_SIMPLE_REPLY_MAGIC) {
 error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic);
diff --git a/nbd/common.c b/nbd/common.c
index 59a5316be9..7456021f7e 100644
--- a/nbd/common.c
+++ b/nbd/common.c
@@ -148,3 +148,26 @@ const char *nbd_cmd_lookup(uint16_t cmd)
 return "";
 }
 }
+
+
+const char *nbd_err_lookup(int err)
+{
+switch (err) {
+case NBD_SUCCESS:
+return "success";
+case NBD_EPERM:
+return "EPERM";
+case NBD_EIO:
+return "EIO";
+case NBD_ENOMEM:
+return "ENOMEM";
+case NBD_EINVAL:
+return "EINVAL";
+case NBD_ENOSPC:
+return "ENOSPC";
+case NBD_ESHUTDOWN:
+return "ESHUTDOWN";
+default:
+return "";
+}
+}
diff --git a/nbd/server.c b/nbd/server.c
index 3df3548d6d..459e00c553 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1227,7 +1227,8 @@ static int nbd_co_send_simple_reply(NBDClient *client,
 {.iov_base = data, .iov_len = len}
 };

-trace_nbd_co_send_simple_reply(handle, nbd_err, len);
+trace_nbd_co_send_simple_reply(handle, nbd_err, nbd_err_lookup(nbd_err),
+   len);
 set_be_simple_reply(, nbd_err, handle);

 return nbd_co_send_iov(client, iov, len ? 2 : 1, errp);
diff --git a/nbd/trace-events b/nbd/trace-events
index e27614f050..920c8a0e5e 100644
--- a/nbd/trace-events
+++ b/nbd/trace-events
@@ -29,7 +29,7 @@ nbd_client_loop_ret(int ret, const char *error) "NBD loop 
returned %d: %s"
 nbd_client_clear_queue(void) "Clearing NBD queue"
 nbd_client_clear_socket(void) "Clearing NBD socket"
 nbd_send_request(uint64_t from, uint32_t len, uint64_t handle, uint16_t flags, 
uint16_t type, const char *name) "Sending request to server: { .from = %" 
PRIu64", .len = %" PRIu32 ", .handle = %" PRIu64 ", .flags = 0x%" PRIx16 ", 
.type = %" PRIu16 " (%s) }"
-nbd_receive_reply(uint32_t magic, int32_t error, uint64_t handle) "Got reply: 
{ magic = 0x%" PRIx32 ", .error = % " PRId32 ", handle = %" PRIu64" }"
+nbd_receive_reply(uint32_t magic, int32_t error, const char *errname, uint64_t 
handle) "Got reply: { magic = 0x%" PRIx32 ", .error = %" PRId32 " (%s), handle 
= %" PRIu64" }"

 # nbd/server.c
 nbd_negotiate_send_rep_len(uint32_t opt, const char *optname, uint32_t type, 
const char *typename, uint32_t len) "Reply opt=0x%" PRIx32 " (%s), type=0x%" 
PRIx32 " (%s), len=%" PRIu32
@@ -53,7 +53,7 @@ nbd_negotiate_success(void) "Negotiation succeeded"
 nbd_receive_request(uint32_t magic, uint16_t flags, uint16_t type, uint64_t 
from, uint32_t len) "Got request: { magic = 0x%" PRIx32 ", .flags = 0x%" PRIx16 
", .type = 0x%" PRIx16 ", from = %" PRIu64 ", len = %" PRIu32 " }"
 nbd_blk_aio_attached(const char *name, void *ctx) "Export %s: Attaching 
clients to AIO context %p\n"
 nbd_blk_aio_detach(const char *name, void *ctx) "Export %s: Detaching clients 
from AIO context %p\n"
-nbd_co_send_simple_reply(uint64_t handle, uint32_t error, int len) 

[Qemu-devel] [PULL 02/12] nbd: Move nbd_errno_to_system_errno() to public header

2017-10-30 Thread Eric Blake
This is needed in preparation for structured reply handling,
as we will be performing the translation from NBD error to
system errno value higher in the stack at block/nbd-client.c.

Signed-off-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20171027104037.8319-3-ebl...@redhat.com>
---
 include/block/nbd.h | 13 +
 nbd/nbd-internal.h  | 12 
 nbd/client.c| 32 
 nbd/common.c| 34 ++
 nbd/trace-events|  4 +++-
 5 files changed, 50 insertions(+), 45 deletions(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index a6df5ce8b5..dc62b5cd19 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -149,6 +149,18 @@ enum {
  * aren't overflowing some other buffer. */
 #define NBD_MAX_NAME_SIZE 256

+/* NBD errors are based on errno numbers, so there is a 1:1 mapping,
+ * but only a limited set of errno values is specified in the protocol.
+ * Everything else is squashed to EINVAL.
+ */
+#define NBD_SUCCESS0
+#define NBD_EPERM  1
+#define NBD_EIO5
+#define NBD_ENOMEM 12
+#define NBD_EINVAL 22
+#define NBD_ENOSPC 28
+#define NBD_ESHUTDOWN  108
+
 /* Details collected by NBD_OPT_EXPORT_NAME and NBD_OPT_GO */
 struct NBDExportInfo {
 /* Set by client before nbd_receive_negotiate() */
@@ -172,6 +184,7 @@ int nbd_send_request(QIOChannel *ioc, NBDRequest *request);
 int nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp);
 int nbd_client(int fd);
 int nbd_disconnect(int fd);
+int nbd_errno_to_system_errno(int err);

 typedef struct NBDExport NBDExport;
 typedef struct NBDClient NBDClient;
diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index 4bfe5be884..df6c8b2f24 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -64,18 +64,6 @@
 #define NBD_SET_TIMEOUT _IO(0xab, 9)
 #define NBD_SET_FLAGS   _IO(0xab, 10)

-/* NBD errors are based on errno numbers, so there is a 1:1 mapping,
- * but only a limited set of errno values is specified in the protocol.
- * Everything else is squashed to EINVAL.
- */
-#define NBD_SUCCESS0
-#define NBD_EPERM  1
-#define NBD_EIO5
-#define NBD_ENOMEM 12
-#define NBD_EINVAL 22
-#define NBD_ENOSPC 28
-#define NBD_ESHUTDOWN  108
-
 /* nbd_read_eof
  * Tries to read @size bytes from @ioc.
  * Returns 1 on success
diff --git a/nbd/client.c b/nbd/client.c
index 59d7c9d49f..50f36b511e 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -22,38 +22,6 @@
 #include "trace.h"
 #include "nbd-internal.h"

-static int nbd_errno_to_system_errno(int err)
-{
-int ret;
-switch (err) {
-case NBD_SUCCESS:
-ret = 0;
-break;
-case NBD_EPERM:
-ret = EPERM;
-break;
-case NBD_EIO:
-ret = EIO;
-break;
-case NBD_ENOMEM:
-ret = ENOMEM;
-break;
-case NBD_ENOSPC:
-ret = ENOSPC;
-break;
-case NBD_ESHUTDOWN:
-ret = ESHUTDOWN;
-break;
-default:
-trace_nbd_unknown_error(err);
-/* fallthrough */
-case NBD_EINVAL:
-ret = EINVAL;
-break;
-}
-return ret;
-}
-
 /* Definitions for opaque data types */

 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
diff --git a/nbd/common.c b/nbd/common.c
index 7456021f7e..593904f148 100644
--- a/nbd/common.c
+++ b/nbd/common.c
@@ -18,6 +18,7 @@

 #include "qemu/osdep.h"
 #include "qapi/error.h"
+#include "trace.h"
 #include "nbd-internal.h"

 /* Discard length bytes from channel.  Return -errno on failure and 0 on
@@ -171,3 +172,36 @@ const char *nbd_err_lookup(int err)
 return "";
 }
 }
+
+
+int nbd_errno_to_system_errno(int err)
+{
+int ret;
+switch (err) {
+case NBD_SUCCESS:
+ret = 0;
+break;
+case NBD_EPERM:
+ret = EPERM;
+break;
+case NBD_EIO:
+ret = EIO;
+break;
+case NBD_ENOMEM:
+ret = ENOMEM;
+break;
+case NBD_ENOSPC:
+ret = ENOSPC;
+break;
+case NBD_ESHUTDOWN:
+ret = ESHUTDOWN;
+break;
+default:
+trace_nbd_unknown_error(err);
+/* fallthrough */
+case NBD_EINVAL:
+ret = EINVAL;
+break;
+}
+return ret;
+}
diff --git a/nbd/trace-events b/nbd/trace-events
index 920c8a0e5e..ab3d7dad4f 100644
--- a/nbd/trace-events
+++ b/nbd/trace-events
@@ -1,5 +1,4 @@
 # nbd/client.c
-nbd_unknown_error(int err) "Squashing unexpected error %d to EINVAL"
 nbd_send_option_request(uint32_t opt, const char *name, uint32_t len) "Sending 
option request %" PRIu32" (%s), len %" PRIu32
 nbd_receive_option_reply(uint32_t option, const char *optname, uint32_t type, 
const char *typename, uint32_t length) "Received option reply 0x%" PRIx32" 
(%s), type 0x%" PRIx32" (%s), len %" PRIu32
 nbd_reply_err_unsup(uint32_t option, const char *name) "server 

[Qemu-devel] [PULL 09/12] nbd/client: refactor nbd_receive_starttls

2017-10-30 Thread Eric Blake
From: Vladimir Sementsov-Ogievskiy 

Split out nbd_request_simple_option to be reused for structured reply
option.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Eric Blake 
Message-Id: <20171027104037.8319-10-ebl...@redhat.com>
---
 nbd/client.c | 70 ++--
 nbd/trace-events |  4 +---
 2 files changed, 49 insertions(+), 25 deletions(-)

diff --git a/nbd/client.c b/nbd/client.c
index 50f36b511e..9acf745b79 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -508,35 +508,61 @@ static int nbd_receive_query_exports(QIOChannel *ioc,
 }
 }

+/* nbd_request_simple_option: Send an option request, and parse the reply
+ * return 1 for successful negotiation,
+ *0 if operation is unsupported,
+ *-1 with errp set for any other error
+ */
+static int nbd_request_simple_option(QIOChannel *ioc, int opt, Error **errp)
+{
+nbd_opt_reply reply;
+int error;
+
+if (nbd_send_option_request(ioc, opt, 0, NULL, errp) < 0) {
+return -1;
+}
+
+if (nbd_receive_option_reply(ioc, opt, , errp) < 0) {
+return -1;
+}
+error = nbd_handle_reply_err(ioc, , errp);
+if (error <= 0) {
+return error;
+}
+
+if (reply.type != NBD_REP_ACK) {
+error_setg(errp, "Server answered option %d (%s) with unexpected "
+   "reply %" PRIx32 " (%s)", opt, nbd_opt_lookup(opt),
+   reply.type, nbd_rep_lookup(reply.type));
+nbd_send_opt_abort(ioc);
+return -1;
+}
+
+if (reply.length != 0) {
+error_setg(errp, "Option %d ('%s') response length is %" PRIu32
+   " (it should be zero)", opt, nbd_opt_lookup(opt),
+   reply.length);
+nbd_send_opt_abort(ioc);
+return -1;
+}
+
+return 1;
+}
+
 static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
 QCryptoTLSCreds *tlscreds,
 const char *hostname, Error **errp)
 {
-nbd_opt_reply reply;
+int ret;
 QIOChannelTLS *tioc;
 struct NBDTLSHandshakeData data = { 0 };

-trace_nbd_receive_starttls_request();
-if (nbd_send_option_request(ioc, NBD_OPT_STARTTLS, 0, NULL, errp) < 0) {
-return NULL;
-}
-
-trace_nbd_receive_starttls_reply();
-if (nbd_receive_option_reply(ioc, NBD_OPT_STARTTLS, , errp) < 0) {
-return NULL;
-}
-
-if (reply.type != NBD_REP_ACK) {
-error_setg(errp, "Server rejected request to start TLS %" PRIx32,
-   reply.type);
-nbd_send_opt_abort(ioc);
-return NULL;
-}
-
-if (reply.length != 0) {
-error_setg(errp, "Start TLS response was not zero %" PRIu32,
-   reply.length);
-nbd_send_opt_abort(ioc);
+ret = nbd_request_simple_option(ioc, NBD_OPT_STARTTLS, errp);
+if (ret <= 0) {
+if (ret == 0) {
+error_setg(errp, "Server don't support STARTTLS option");
+nbd_send_opt_abort(ioc);
+}
 return NULL;
 }

diff --git a/nbd/trace-events b/nbd/trace-events
index 52150bd738..596df96575 100644
--- a/nbd/trace-events
+++ b/nbd/trace-events
@@ -8,9 +8,7 @@ nbd_opt_go_info_unknown(int info, const char *name) "Ignoring 
unknown info %d (%
 nbd_opt_go_info_block_size(uint32_t minimum, uint32_t preferred, uint32_t 
maximum) "Block sizes are 0x%" PRIx32 ", 0x%" PRIx32 ", 0x%" PRIx32
 nbd_receive_query_exports_start(const char *wantname) "Querying export list 
for '%s'"
 nbd_receive_query_exports_success(const char *wantname) "Found desired export 
name '%s'"
-nbd_receive_starttls_request(void) "Requesting TLS from server"
-nbd_receive_starttls_reply(void) "Getting TLS reply from server"
-nbd_receive_starttls_new_client(void) "TLS request approved, setting up TLS"
+nbd_receive_starttls_new_client(void) "Setting up TLS"
 nbd_receive_starttls_tls_handshake(void) "Starting TLS handshake"
 nbd_receive_negotiate(void *tlscreds, const char *hostname) "Receiving 
negotiation tlscreds=%p hostname=%s"
 nbd_receive_negotiate_magic(uint64_t magic) "Magic is 0x%" PRIx64
-- 
2.13.6




[Qemu-devel] [PULL 00/12] NBD patches prior to 2.11 soft freeze

2017-10-30 Thread Eric Blake
The following changes since commit abf6e752e55b2f5afb48303429dea2db7c3a62de:

  Merge remote-tracking branch 'remotes/borntraeger/tags/s390x-20171030' into 
staging (2017-10-30 13:02:45 +)

are available in the git repository at:

  git://repo.or.cz/qemu/ericb.git tags/pull-nbd-2017-10-30

for you to fetch changes up to f140e3000371e67ff4e00df3213e2d576d9c91be:

  nbd: Minimal structured read for client (2017-10-30 21:48:41 +0100)


nbd patches for 2017-10-30

Vladimir Sementsov-Ogievskiy (some patches co-authored by Eric Blake):
00/12 nbd minimal structured read


Eric Blake (8):
  nbd: Include error names in trace messages
  nbd: Move nbd_errno_to_system_errno() to public header
  nbd: Expose constants and structs for structured read
  nbd/server: Report error for write to read-only export
  nbd/server: Simplify nbd_negotiate_options loop
  nbd/server: Refactor zero-length option check
  nbd/server: Include human-readable message in structured errors
  nbd: Move nbd_read() to common header

Vladimir Sementsov-Ogievskiy (4):
  nbd: Minimal structured read for server
  nbd/client: refactor nbd_receive_starttls
  nbd/client: prepare nbd_receive_reply for structured reply
  nbd: Minimal structured read for client

 block/nbd-client.h |   1 +
 include/block/nbd.h| 106 +-
 nbd/nbd-internal.h |  23 +--
 block/nbd-client.c | 492 ++---
 nbd/client.c   | 217 +---
 nbd/common.c   |  84 
 nbd/server.c   | 210 ++-
 nbd/trace-events   |  15 +-
 tests/qemu-iotests/083.out |  15 ++
 9 files changed, 969 insertions(+), 194 deletions(-)

-- 
2.13.6




Re: [Qemu-devel] [PATCH 1/6] qio: Make port 0 work for qio

2017-10-30 Thread Daniel P. Berrange
On Mon, Oct 30, 2017 at 12:21:07PM +0100, Juan Quintela wrote:
> For tcp sockets we read back what is the socket/address.  So we know
> what is the port that we are listening into.
> 
> Looked all callers of the function, and they just create the addr, use
> it, and drop it, so no problem that we always update the port in the
> address.

Can you explain more why you need this ?

Nothing should be using the socket_listen() method directly any more IIRC,
and for the QIOChannelSocket classes, you should not rely on the address that
you pass in, being the same as the one that ultimately gets passed into the
socket_listen() method.

Patches that I have pending change things so that listening happens in two
phases. First we take the SocketAddress and do DNS resolution to create
mutliple new SocketAddress structs. These are then passed into the lowlevel
socket_listen() method. So with that happening, even if you update the address
in socket_listen() that info won't get back upto the caller.

If you have a QIOChannelSocket instance, and you want to know what port it
ended up listening on, you should call qio_channel_socket_get_local_address()
method instead, which returns a dynamically popualted SocketAddress struct.
This should mean socket_listen() never needs to update the address that it
binds on.

IOW, I think this patch is redundant.

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|



Re: [Qemu-devel] [PATCH v6 06/12] nbd/server: Refactor zero-length option check

2017-10-30 Thread Eric Blake
On 10/30/2017 09:11 PM, Eric Blake wrote:
> On 10/30/2017 06:22 PM, Vladimir Sementsov-Ogievskiy wrote:
>> 27.10.2017 13:40, Eric Blake wrote:
>>> Consolidate the response for a non-zero-length option payload
>>> into a new function, nbd_reject_length().  This check will
>>> also be used when introducing support for structured replies.
>>>

>>> +    if (length) {
>>> +    /* Unconditionally drop the connection if the client
>>> + * can't start a TLS negotiation correctly */
>>> +    nbd_reject_length(client, length, option, true,
>>> errp);
>>> +    return -EINVAL;
>>
>> why not to return nbd_reject_length's result? this EINVAL may not
>> correspond to errp (about EIO for example)..
> 
> Somewhat true, if nbd_reject_length() fails. But nbd_reject_length() may
> also return 0 without setting errp, in which case, maybe this code
> should set errp rather than just blindly returning -EINVAL.
> 
>>
>> with or without this fixed:
>>
>> Reviewed-by: Vladimir Sementsov-Ogievskiy 
>>
> 
> Okay, I'll squash this in, and include it in my pull request to be sent
> shortly.

D'oh. Long week for me. The whole reason I added a 'bool fatal'
parameter was so that I don't have to worry about nbd_reject_length()
returning 0.  So it is instead better to just do:

> --- i/nbd/server.c
> +++ w/nbd/server.c
> @@ -684,8 +684,13 @@ static int nbd_negotiate_options(NBDClient *client,
> uint16_t myflags,
>  if (length) {
>  /* Unconditionally drop the connection if the client
>   * can't start a TLS negotiation correctly */
> -nbd_reject_length(client, length, option, true, errp);
> -return -EINVAL;
> +return nbd_reject_length(client, length, option, true,
> + errp);

rather than repeating an error message.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [Qemu-ppc] [PATCH v1 0/3] ppc: adding some RTAS calls in tests/libqos

2017-10-30 Thread Daniel Henrique Barboza



On 10/30/2017 06:12 PM, David Gibson wrote:

On Thu, Oct 26, 2017 at 06:22:47PM -0200, Daniel Henrique Barboza wrote:

This series implements a few RTAS hypercalls in tests/libqos
that, used together, implement the DRC state transition described
in PAPR 2.7+, 13.4.

This started as an attempt of implementing hot unplug qtests for the
sPAPR machine but I've found a few issues that will require more time
solving:

- CPU hot unplug: for some reason the machine freezes after the
callback is returned.

- LMB hot unplug: not supported by the sPAPR machine if not
set in CAS.

I have a feeling that the CPU hot unplug  issue might be related
with the lack of CAS negotiation step as well, but only way to be
sure is to further understanding how the CAS negotation interfere
with the device hot unplug. If needed we'll have to implement the
client architecture support hypercall as well in the future.

Until then, I believe these hypercalls have a value of their own and
are worth being pushed upstream.

Unfortunately, these changes break the Travis build on MacOS.


Hmpf  how can I run this Travis build to see the errors? I've searched
here and found out something about making a Github pull request and
then https://travis-ci.org/qemu/qemu runs the Travis build in the request.
Is this a valid way of running it?


Thanks,

Daniel



Daniel Henrique Barboza (3):
   tests: adding 'check_exception' RTAS implementation
   tests: adding 'set_indicator' RTAS call
   tests: ibm,configure-connector RTAS call implementation

  tests/libqos/rtas.c | 105 +
  tests/libqos/rtas.h |   5 ++
  tests/rtas-test.c   | 218 
  3 files changed, 328 insertions(+)






[Qemu-devel] [PULL 1/2] hmp: Replace error_report_err

2017-10-30 Thread Dr. David Alan Gilbert (git)
From: ZhiPeng Lu 

Use hmp_handle_error instend of error_report_err to set error.

Signed-off-by: ZhiPeng Lu 
Reviewed-by: Jiyun Fan 
Message-Id: <1508411793-22868-1-git-send-email-lu.zhip...@zte.com.cn>
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Dr. David Alan Gilbert 
Signed-off-by: Dr. David Alan Gilbert 
---
 hmp.c | 26 --
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/hmp.c b/hmp.c
index a01be50daa..35a7041824 100644
--- a/hmp.c
+++ b/hmp.c
@@ -670,7 +670,7 @@ void hmp_info_vnc(Monitor *mon, const QDict *qdict)
 
 info2l = qmp_query_vnc_servers();
 if (err) {
-error_report_err(err);
+hmp_handle_error(mon, );
 return;
 }
 if (!info2l) {
@@ -785,7 +785,7 @@ void hmp_info_balloon(Monitor *mon, const QDict *qdict)
 
 info = qmp_query_balloon();
 if (err) {
-error_report_err(err);
+hmp_handle_error(mon, );
 return;
 }
 
@@ -1128,7 +1128,7 @@ void hmp_ringbuf_read(Monitor *mon, const QDict *qdict)
 
 data = qmp_ringbuf_read(chardev, size, false, 0, );
 if (err) {
-error_report_err(err);
+hmp_handle_error(mon, );
 return;
 }
 
@@ -1195,9 +1195,7 @@ void hmp_balloon(Monitor *mon, const QDict *qdict)
 Error *err = NULL;
 
 qmp_balloon(value, );
-if (err) {
-error_report_err(err);
-}
+hmp_handle_error(mon, );
 }
 
 void hmp_block_resize(Monitor *mon, const QDict *qdict)
@@ -1534,10 +1532,7 @@ void hmp_migrate_set_cache_size(Monitor *mon, const 
QDict *qdict)
 Error *err = NULL;
 
 qmp_migrate_set_cache_size(value, );
-if (err) {
-error_report_err(err);
-return;
-}
+hmp_handle_error(mon, );
 }
 
 /* Kept for backwards compatibility */
@@ -1568,10 +1563,7 @@ void hmp_migrate_set_capability(Monitor *mon, const 
QDict *qdict)
 
 end:
 qapi_free_MigrationCapabilityStatusList(caps);
-
-if (err) {
-error_report_err(err);
-}
+hmp_handle_error(mon, );
 }
 
 void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
@@ -1680,9 +1672,7 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict 
*qdict)
  cleanup:
 qapi_free_MigrateSetParameters(p);
 visit_free(v);
-if (err) {
-error_report_err(err);
-}
+hmp_handle_error(mon, );
 }
 
 void hmp_client_migrate_info(Monitor *mon, const QDict *qdict)
@@ -1936,7 +1926,7 @@ void hmp_migrate(Monitor *mon, const QDict *qdict)
 
 qmp_migrate(uri, !!blk, blk, !!inc, inc, false, false, );
 if (err) {
-error_report_err(err);
+hmp_handle_error(mon, );
 return;
 }
 
-- 
2.14.3




[Qemu-devel] [PULL 2/2] monitor: fix dangling CPU pointer

2017-10-30 Thread Dr. David Alan Gilbert (git)
From: Greg Kurz 

If a CPU selected with the "cpu" command is hot-unplugged then "info cpus"
causes QEMU to exit:

(qemu) device_del cpu1
(qemu) info cpus
qemu:qemu_cpu_kick_thread: No such process

This happens because "cpu" stores the pointer to the selected CPU into
the monitor structure. When the CPU is hot-unplugged, we end up with a
dangling pointer. The "info cpus" command then does:

hmp_info_cpus()
 monitor_get_cpu_index()
  mon_get_cpu()
   cpu_synchronize_state() <--- called with dangling pointer

This could cause a QEMU crash as well.

This patch switches the monitor to store the QOM path instead of a
pointer to the current CPU. The path is then resolved when needed.
If the resolution fails, we assume that the CPU was removed and the
path is resetted to the default (ie, path of first_cpu).

Reported-by: Satheesh Rajendran 
Suggested-by: Igor Mammedov 
Signed-off-by: Greg Kurz 
Message-Id: <150822818243.26242.12993827911736928961.st...@bahia.lan>
Reviewed-by: Igor Mammedov 
Signed-off-by: Dr. David Alan Gilbert 
---
 monitor.c | 23 ++-
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/monitor.c b/monitor.c
index 7a802a345e..e36fb5308d 100644
--- a/monitor.c
+++ b/monitor.c
@@ -200,7 +200,7 @@ struct Monitor {
 
 ReadLineState *rs;
 MonitorQMP qmp;
-CPUState *mon_cpu;
+gchar *mon_cpu_path;
 BlockCompletionFunc *password_completion_cb;
 void *password_opaque;
 mon_cmd_t *cmd_table;
@@ -579,6 +579,7 @@ static void monitor_data_init(Monitor *mon)
 
 static void monitor_data_destroy(Monitor *mon)
 {
+g_free(mon->mon_cpu_path);
 qemu_chr_fe_deinit(>chr, false);
 if (monitor_is_qmp(mon)) {
 json_message_parser_destroy(>qmp.parser);
@@ -1047,20 +1048,32 @@ int monitor_set_cpu(int cpu_index)
 if (cpu == NULL) {
 return -1;
 }
-cur_mon->mon_cpu = cpu;
+g_free(cur_mon->mon_cpu_path);
+cur_mon->mon_cpu_path = object_get_canonical_path(OBJECT(cpu));
 return 0;
 }
 
 CPUState *mon_get_cpu(void)
 {
-if (!cur_mon->mon_cpu) {
+CPUState *cpu;
+
+if (cur_mon->mon_cpu_path) {
+cpu = (CPUState *) object_resolve_path_type(cur_mon->mon_cpu_path,
+TYPE_CPU, NULL);
+if (!cpu) {
+g_free(cur_mon->mon_cpu_path);
+cur_mon->mon_cpu_path = NULL;
+}
+}
+if (!cur_mon->mon_cpu_path) {
 if (!first_cpu) {
 return NULL;
 }
 monitor_set_cpu(first_cpu->cpu_index);
+cpu = first_cpu;
 }
-cpu_synchronize_state(cur_mon->mon_cpu);
-return cur_mon->mon_cpu;
+cpu_synchronize_state(cpu);
+return cpu;
 }
 
 CPUArchState *mon_get_cpu_env(void)
-- 
2.14.3




[Qemu-devel] [PULL 0/2] hmp queue

2017-10-30 Thread Dr. David Alan Gilbert (git)
From: "Dr. David Alan Gilbert" <dgilb...@redhat.com>

The following changes since commit abf6e752e55b2f5afb48303429dea2db7c3a62de:

  Merge remote-tracking branch 'remotes/borntraeger/tags/s390x-20171030' into 
staging (2017-10-30 13:02:45 +)

are available in the Git repository at:

  git://github.com/dagrh/qemu.git tags/pull-hmp-20171030

for you to fetch changes up to 751f8cfe2a556b3ef49f6af2860e2d1d2a1ec66a:

  monitor: fix dangling CPU pointer (2017-10-30 18:46:32 +)


hmp pull 2017-10-30


Greg Kurz (1):
  monitor: fix dangling CPU pointer

ZhiPeng Lu (1):
  hmp: Replace error_report_err

 hmp.c | 26 --
 monitor.c | 23 ++-
 2 files changed, 26 insertions(+), 23 deletions(-)



Re: [Qemu-devel] drive_add: file names with spaces

2017-10-30 Thread Dr. David Alan Gilbert
* Eric Blake (ebl...@redhat.com) wrote:
> On 10/30/2017 03:32 PM, Dr. David Alan Gilbert wrote:
> 
> > Now, the real challenge is how to deal with a filename with a comma in;
> >drive_add 1 "file=foo,bar"
> >Could not open 'foo': No such file or directory
> > 
> 
> Does HMP use the QemuOpts parser?  If so, try:
> 
> drive_add 1 "file=foo,,bar"
> 
> (that is, reproduce the double-comma escaping mechanism that we use on
> the command line).
> 
> But I haven't personally tried this, so I have no idea if it will help.

Ah yes it does;  I never knew that worked.

Dave

> -- 
> Eric Blake, Principal Software Engineer
> Red Hat, Inc.   +1-919-301-3266
> Virtualization:  qemu.org | libvirt.org
> 



--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK



Re: [Qemu-devel] [PATCH v1 0/3] ppc: adding some RTAS calls in tests/libqos

2017-10-30 Thread David Gibson
On Thu, Oct 26, 2017 at 06:22:47PM -0200, Daniel Henrique Barboza wrote:
> This series implements a few RTAS hypercalls in tests/libqos
> that, used together, implement the DRC state transition described
> in PAPR 2.7+, 13.4.
> 
> This started as an attempt of implementing hot unplug qtests for the
> sPAPR machine but I've found a few issues that will require more time
> solving:
> 
> - CPU hot unplug: for some reason the machine freezes after the
> callback is returned.
> 
> - LMB hot unplug: not supported by the sPAPR machine if not
> set in CAS.
> 
> I have a feeling that the CPU hot unplug  issue might be related
> with the lack of CAS negotiation step as well, but only way to be
> sure is to further understanding how the CAS negotation interfere
> with the device hot unplug. If needed we'll have to implement the
> client architecture support hypercall as well in the future.
> 
> Until then, I believe these hypercalls have a value of their own and
> are worth being pushed upstream.

Unfortunately, these changes break the Travis build on MacOS.
> 
> 
> Daniel Henrique Barboza (3):
>   tests: adding 'check_exception' RTAS implementation
>   tests: adding 'set_indicator' RTAS call
>   tests: ibm,configure-connector RTAS call implementation
> 
>  tests/libqos/rtas.c | 105 +
>  tests/libqos/rtas.h |   5 ++
>  tests/rtas-test.c   | 218 
> 
>  3 files changed, 328 insertions(+)
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH v6 06/12] nbd/server: Refactor zero-length option check

2017-10-30 Thread Eric Blake
On 10/30/2017 06:22 PM, Vladimir Sementsov-Ogievskiy wrote:
> 27.10.2017 13:40, Eric Blake wrote:
>> Consolidate the response for a non-zero-length option payload
>> into a new function, nbd_reject_length().  This check will
>> also be used when introducing support for structured replies.
>>
>> Note that STARTTLS response differs based on time: if the connection
>> is still unencrypted, we set fatal to true (a client that can't
>> request TLS correctly may still think that we are ready to start
>> the TLS handshake, so we must disconnect); while if the connection
>> is already encrypted, the client is sending a bogus request but
>> is no longer at risk of being confused by continuing the connection.
>>

>>   switch (option) {
>>   case NBD_OPT_STARTTLS:
>> -    tioc = nbd_negotiate_handle_starttls(client, length,
>> errp);
>> +    if (length) {
>> +    /* Unconditionally drop the connection if the client
>> + * can't start a TLS negotiation correctly */
>> +    nbd_reject_length(client, length, option, true,
>> errp);
>> +    return -EINVAL;
> 
> why not to return nbd_reject_length's result? this EINVAL may not
> correspond to errp (about EIO for example)..

Somewhat true, if nbd_reject_length() fails. But nbd_reject_length() may
also return 0 without setting errp, in which case, maybe this code
should set errp rather than just blindly returning -EINVAL.

> 
> with or without this fixed:
> 
> Reviewed-by: Vladimir Sementsov-Ogievskiy 
> 

Okay, I'll squash this in, and include it in my pull request to be sent
shortly.

diff --git i/nbd/server.c w/nbd/server.c
index a9480e42cd..91f81a0f19 100644
--- i/nbd/server.c
+++ w/nbd/server.c
@@ -684,8 +684,13 @@ static int nbd_negotiate_options(NBDClient *client,
uint16_t myflags,
 if (length) {
 /* Unconditionally drop the connection if the client
  * can't start a TLS negotiation correctly */
-nbd_reject_length(client, length, option, true, errp);
-return -EINVAL;
+ret = nbd_reject_length(client, length, option,
true, errp);
+if (!ret) {
+error_setg(errp, "option '%s' should have zero
length",
+   nbd_opt_lookup(option));
+ret = -EINVAL;
+}
+return ret;
 }
 tioc = nbd_negotiate_handle_starttls(client, errp);
 if (!tioc) {


-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] drive_add: file names with spaces

2017-10-30 Thread Eric Blake
On 10/30/2017 03:32 PM, Dr. David Alan Gilbert wrote:

> Now, the real challenge is how to deal with a filename with a comma in;
>drive_add 1 "file=foo,bar"
>Could not open 'foo': No such file or directory
> 

Does HMP use the QemuOpts parser?  If so, try:

drive_add 1 "file=foo,,bar"

(that is, reproduce the double-comma escaping mechanism that we use on
the command line).

But I haven't personally tried this, so I have no idea if it will help.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH v2] arm: implement cache/shareability attribute bits for PAR registers

2017-10-30 Thread Peter Maydell
On 20 October 2017 at 22:49, Andrew Baumann
 wrote:
> On a successful address translation instruction, PAR is supposed to
> contain cacheability and shareability attributes determined by the
> translation. We previously returned 0 for these bits (in line with the
> general strategy of ignoring caches and memory attributes), but some
> guest OSes may depend on them.
>
> This patch collects the attribute bits in the page-table walk, and
> updates PAR with the correct attributes for all LPAE
> translations. Short descriptor formats still return 0 for these bits,
> as in the prior implementation, but now log an unimplemented message.
>
> Signed-off-by: Andrew Baumann 
> ---
> v2:
>  * return attrs via out parameter from get_phys_addr, rather than MemTxAttrs
>  * move MAIR lookup/index inline, since it turned out to be simple
>  * implement attributes for stage 2 translations
>  * combine attributes from stages 1 and 2 when required

Hi. This is looking pretty good, but I have a few comments below,
and we're pretty much at the softfreeze date (KVM Forum last week
meant I didn't get much code review done, unfortunately). Would
you be too sad if this missed 2.11 ?

> Attributes for short PTE formats remain unimplemented; there's a LOG_UNIMP for
> this case, but it's likely to be noisy for guests that trigger it -- do we 
> need
> a one-shot mechanism for the log statement?

I think we should just drop that LOG_UNIMP.

> @@ -8929,6 +8939,28 @@ static bool get_phys_addr_lpae(CPUARMState *env, 
> target_ulong address,
>   */
>  txattrs->secure = false;
>  }
> +
> +if (cacheattrs != NULL) {
> +if (mmu_idx == ARMMMUIdx_S2NS) {
> +/* Translate from the 4-bit stage 2 representation of
> + * memory attributes (without cache-allocation hints) to
> + * the 8-bit representation of the stage 1 MAIR registers
> + * (which includes allocation hints).
> + */
> +uint8_t memattr = extract32(attrs, 0, 4);
> +cacheattrs->attrs = (extract32(memattr, 2, 2) << 4)
> +  | (extract32(memattr, 0, 2) << 2);

Pseudocode S2ConvertAttrsHints() specifies some hint bit defaults
(no-allocate for NC; RW-allocate for WT or WB) -- do we want to
follow that?

> +cacheattrs->shareability = extract32(attrs, 4, 2);

Are you sure this is the right bit offset for the shareability bits?
I think 4,2 is the S2AP (access) bits, and the SH bits are in 6,2, same
as for stage 1 descriptors.

> +} else {
> +/* Index into MAIR registers for cache attributes */
> +uint8_t attrindx = extract32(attrs, 0, 3);
> +uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)];
> +assert(attrindx <= 7);
> +cacheattrs->attrs = extract64(mair, attrindx * 8, 8);
> +cacheattrs->shareability = extract32(attrs, 6, 2);
> +}
> +}
> +
>  *phys_ptr = descaddr;
>  *page_size_ptr = page_size;
>  return false;
> @@ -9490,6 +9522,89 @@ static bool get_phys_addr_pmsav5(CPUARMState *env, 
> uint32_t address,
>  return false;
>  }
>
> +/* Combine either inner or outer cacheability attributes for normal
> + * memory, according to table D4-42 of ARM DDI 0487B.b (the ARMv8 ARM).
> + *
> + * NB: only stage 1 includes allocation hints (RW bits), leading to
> + * some asymmetry.
> + */
> +static uint8_t combine_cacheattr_nibble(uint8_t s1, uint8_t s2)
> +{
> +if (s1 == 4 || s2 == 4) {
> +/* non-cacheable has precedence */
> +return 4;
> +} else if (extract32(s1, 2, 2) == 0 || extract32(s1, 2, 2) == 2) {
> +/* stage 1 write-through takes precedence */
> +return s1;
> +} else if (extract32(s2, 2, 2) == 2) {
> +/* stage 2 write-through takes precedence */
> +return s2;
> +} else { /* write-back */
> +return s1;
> +}

The v8A ARM ARM pseudocode CombineS1S2AttrHints() says that the hint
bits always come from s1 regardless of whose attrs won.

(I was hoping you could write this function as something like a
MAX or MIN, but the complexities of the writethrough-transient
encoding and the hint bits mean it doesn't work out.)

> +}
> +
> +/* Combine S1 and S2 cacheability/shareability attributes, per D4.5.4
> + *
> + * @s1:  Attributes from stage 1 walk
> + * @s2:  Attributes from stage 2 walk
> + */
> +static ARMCacheAttrs combine_cacheattrs(ARMCacheAttrs s1, ARMCacheAttrs s2)
> +{
> +uint8_t s1lo = extract32(s1.attrs, 0, 4), s2lo = extract32(s2.attrs, 0, 
> 4);
> +uint8_t s1hi = extract32(s1.attrs, 4, 4), s2hi = extract32(s2.attrs, 4, 
> 4);
> +ARMCacheAttrs ret;
> +
> +/* Combine shareability attributes (table D4-43) */
> +if (s1.shareability == 2 || s2.shareability == 2) {
> +/* if either are outer-shareable, the result is outer-shareable */
> +ret.shareability = 2;

Re: [Qemu-devel] [PATCHv4 00/13] sun4m: sparc32_dma tidy-ups

2017-10-30 Thread Philippe Mathieu-Daudé
On Mon, Oct 30, 2017 at 4:00 PM, Mark Cave-Ayland
 wrote:
> On 27/10/17 17:42, Philippe Mathieu-Daudé wrote:
[...]
>> If you don't accept my comments (or don't have time) about keeping
>> "hw/sparc/sparc32_dma.h" generic and moving network/scsi parts in
>> "hw/sparc/sun4m.h" you can still add to your series:
>>
>> Acked-by: Philippe Mathieu-Daudé 
>
> Thanks for the review, I've added your R-B tags to the individual
> patches. Note that while potentially I could move the network/scsi parts
> to hw/sparc/sun4m.h I feel that it's a slightly better match for the
> SPARC32 DMA container device to remain in sparc32_dma.c. So for these
> patches I've just added your A-B tag.

OK.

>> Also while testing your series on a Debian image, I noted your series
>> results faster, I timed:
>>
>> master: 104s
>> your series: 85s (>20% faster!)
>
> Really? Is that for just this patchset or also with the v2 IOMMU
> patchset applied on top? I can't immediately see how moving the logic
> into sparc32_dma.c could make a difference here...

Yes, I was trying with both series applied, so this comment belong to
the other series (IOMMU).

Regards,

Phil.



Re: [Qemu-devel] [PATCHv4 09/13] lance: move TYPE_LANCE and SysBusPCNetState from lance.c to lance.h

2017-10-30 Thread Mark Cave-Ayland
On 30/10/17 18:45, Philippe Mathieu-Daudé wrote:

>>> +/*
>>> + * QEMU AMD PC-Net II (Am79C970A) emulation
>>
>> Filename says this is Lance ethernet, but the comment says it's PC-Net ?
>
> According to the datasheet for Am79C970A, the original Lance is an
> Am7990 device and the Am79C970A aka PCNet-PCI II as emulated by QEMU is
> register-compatible with it.
>
> I guess the comment above is more technically correct, but I'm happy to
> adjust it in my local tree if you still feel it needs to change?
>>>
>>> I think we should have a comment that says what the file is
>>> for. Since this is a different file from pcnet.h, we should
>>> have a comment that isn't the same as the pcnet.h one.
>>> I don't particularly mind what it says, as long as it briefly
>>> explains what's in the file (and by implication what distinguishes
>>> things in this file from things in the other).
>>
>> Okay then how about something along the lines of:
>>
>>  * QEMU Lance (Am7990) device emulation
>>  *
>>  * Copyright (c) 2004 Antony T Curtis
>>  * Copyright (c) 2017 Mark Cave-Ayland
>>  *
>>  * This represents the Sparc32 lance (Am7990) ethernet device which is
>>  * an earlier register-compatible member of the AMD PC-Net II
>>  * (Am79C970A) family.
>>
>> In reality pcnet.c/pcnet.h are just the inner workings of the pcnet-pci
>> and lance devices. The comments in pcnet-pci.c and lance.c are identical
>> except for the Sparc32 reference and it was the same header from
>> pcnet-pci.c that I used as the basis for my last patch.
> 
> You are right the PCnet family is based on the Lance one, but it seems
> nobody remembers the Lance origin, the "PC-Net" took over.
> 
> Maybe we can agree with the different families having an unique
> "hw/net/pcnet_lance.h" header with all Lance/PC-Net related XXX_TYPEs,
> what do you think?

This is definitely a good idea in principle, however the lance device in
its current form cannot work for anything other than SPARC32 because of
the the word-size byte swaps for FIFO transfers which occur for DMA
transfers (see the code for ledma_memory_read/ledma_memory_write).

I think in its current form the patch allows for someone to potentially
implement this later for other architectures (e.g x86) so while I don't
want to disallow this in future, it isn't really within the scope of
this particular patchset.


ATB,

Mark.



Re: [Qemu-devel] [RFC 00/19] KVM: s390/crypto/vfio: guest dedicated crypto adapters

2017-10-30 Thread Tony Krowiak

On 10/30/2017 04:57 AM, Christian Borntraeger wrote:

adding qemu devel and add Daniel and Erik from libvirt to keep them in the loop.

On 10/29/2017 12:11 PM, Cornelia Huck wrote:

On Fri, 13 Oct 2017 13:38:45 -0400
Tony Krowiak  wrote:


Tony Krowiak (19):
   KVM: s390: SIE considerations for AP Queue virtualization
   KVM: s390: refactor crypto initialization
   s390/zcrypt: new AP matrix bus
   s390/zcrypt: create an AP matrix device on the AP matrix bus
   s390/zcrypt: base implementation of AP matrix device driver
   s390/zcrypt: register matrix device with VFIO mediated device
 framework
   KVM: s390: introduce AP matrix configuration interface
   s390/zcrypt: support for assigning adapters to matrix mdev
   s390/zcrypt: validate adapter assignment
   s390/zcrypt: sysfs interfaces supporting AP domain assignment
   s390/zcrypt: validate domain assignment
   s390/zcrypt: sysfs support for control domain assignment
   s390/zcrypt: validate control domain assignment
   KVM: s390: Connect the AP mediated matrix device to KVM
   s390/zcrypt: introduce ioctl access to VFIO AP Matrix driver
   KVM: s390: interface to configure KVM guest's AP matrix
   KVM: s390: validate input to AP matrix config interface
   KVM: s390: New ioctl to configure KVM guest's AP matrix
   s390/facilities: enable AP facilities needed by guest

I'll try to summarize all of this in my own words, both to make sure I
understand the design correctly and to give others a different view on
this.

[I'm completely disregarding control domains here.]

On s390, we have cryptographic coprocessor cards, which are modeled on
Linux as devices on the AP bus. There's also a concept called domains,
which means an individual queue of a crypto device is basically a
(card,domain) tuple. We model this something like the following
(assuming we have access to cards 3 and 4 and domains 1 and 2):

AP -> card3 -> queue (3,1)
 -> queue (3,2)
-> card4 -> queue (4,1)
 -> queue (4,2)

(The AP bus is a bit different for backwards compat.)

If we want to virtualize this, we can use a feature provided by the
hardware. We basically attach a satellite control block to our main
hardware virtualization control block and the hardware takes care of
(mostly) everything.

For this control block, we don't specify explicit tuples, but a list of
cards and a list of domains. The guest will get access to the cross
product.

Because of this, we need to take care that the lists provided to
different guests don't overlap; i.e., we need to enforce sane
configurations. Otherwise, one guest may get access to things like
secret keys for another guest.

The idea of this patch set is to introduce a new device, the matrix
device. This matrix device hangs off a different root and acts as the
node where mdev devices hang off.

If you now want to give the tuples (4,1) and (4,2), you need to do the
following:

- Unbind the (4,1) and (4,2) tuples from their ap bus driver.
- Bind the (4,1) and (4,2) tuples to the ap matrix driver.
- Create the mediated device.
- Assign card 4 and domains 1 and 2.

QEMU will now simply consume the mediated device and things should work.


This is probably the shortest possible summary I can imagine.
Tony can you double check if it matches your understanding as well?

This is a concise and accurate summary.







Re: [Qemu-devel] [PATCHv4 00/13] sun4m: sparc32_dma tidy-ups

2017-10-30 Thread Mark Cave-Ayland
On 27/10/17 17:42, Philippe Mathieu-Daudé wrote:

> Hi Mark,
> 
> On 10/25/2017 12:59 PM, Mark Cave-Ayland wrote:
>> This patchset aims to tidy-up the sparc32_dma code by improving the
>> modelling of the espdma/ledma devices using both QOM and the memory
>> API which didn't exist when the code was first written.
>>
>> The result is that it is now possible to remove both the iommu_opaque
>> and is_ledma workarounds from the code, and the code for wiring up
>> the espdma/ledma and respective devices is also a lot more readable.
>>
>> Signed-off-by: Mark Cave-Ayland 
> 
> The whole series:
> 
> Tested-by: Philippe Mathieu-Daudé 
> 
> If you don't accept my comments (or don't have time) about keeping
> "hw/sparc/sparc32_dma.h" generic and moving network/scsi parts in
> "hw/sparc/sun4m.h" you can still add to your series:
> 
> Acked-by: Philippe Mathieu-Daudé 

Thanks for the review, I've added your R-B tags to the individual
patches. Note that while potentially I could move the network/scsi parts
to hw/sparc/sun4m.h I feel that it's a slightly better match for the
SPARC32 DMA container device to remain in sparc32_dma.c. So for these
patches I've just added your A-B tag.

> Also while testing your series on a Debian image, I noted your series
> results faster, I timed:
> 
> master: 104s
> your series: 85s (>20% faster!)

Really? Is that for just this patchset or also with the v2 IOMMU
patchset applied on top? I can't immediately see how moving the logic
into sparc32_dma.c could make a difference here...


ATB,

Mark.



Re: [Qemu-devel] [PATCH v1 0/5][RFC] Refactoring of AIS support

2017-10-30 Thread Halil Pasic
  

On 10/30/2017 06:38 PM, Pierre Morel wrote:
> On 30/10/2017 18:08, Christian Borntraeger wrote:
>>
>> On 10/30/2017 05:59 PM, Cornelia Huck wrote:
>>> On Mon, 30 Oct 2017 14:48:23 +0100
>>> Christian Borntraeger  wrote:
>>>
>>>
 FWIW, I am testing a guest patch that enables zPCI without AIS. Its as 
 simple as


 diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
 index 7b30af5..9b24836 100644
 --- a/arch/s390/pci/pci.c
 +++ b/arch/s390/pci/pci.c
 @@ -953,7 +953,7 @@ static int __init pci_base_init(void)
  if (!s390_pci_probe)
  return 0;
   -   if (!test_facility(69) || !test_facility(71) || 
 !test_facility(72))
 +   if (!test_facility(69) || !test_facility(71))
  return 0;
    rc = zpci_debug_init();
 diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
 index ea34086..61f8c82 100644
 --- a/arch/s390/pci/pci_insn.c
 +++ b/arch/s390/pci/pci_insn.c
 @@ -7,6 +7,7 @@
   #include 
   #include 
   #include 
 +#include 
   #include 
   #include 
   #include 
 @@ -93,6 +94,8 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
   /* Set Interruption Controls */
   void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc)
   {
 +   if (!test_facility(72))
 +   return;
  asm volatile (
  "   .insn   rsy,0xebd1,%[ctl],%[isc],%[u]\n"
  : : [ctl] "d" (ctl), [isc] "d" (isc << 27), [u] "Q" 
 (*unused));

>>>
>>> Sounds good. Presumably this makes the adapter interrupt handling work
>>> as for virtio (and qdio)? Is there any ais-less pci hardware out in the
>>> wild?

@Connie:
Do you have something in mind, or is curiosity the only reason for asking?

>>>
>>
>> ais is z specific, not PCI specific. So PCI cards should not care as far as 
>> I can tell.
>>
>>
> 
> I bet Conny meant PCI hardware globaly, including zPCI adapters.
> I bet none but I will better ask Sebastian or Gerald if they know.
> 

I side with Christian: it is a machine thing and not a card thing. So
the question is if there is a z machine which supports zpci but does
not support AIS. My guess is that such a machine was never produced.

Halil




Re: [Qemu-devel] [PATCHv4 11/13] sparc32_dma: introduce new SPARC32_DMA type container object

2017-10-30 Thread Philippe Mathieu-Daudé
On Mon, Oct 30, 2017 at 3:51 PM, Mark Cave-Ayland
 wrote:
> On 27/10/17 17:18, Philippe Mathieu-Daudé wrote:
>> On 10/25/2017 12:59 PM, Mark Cave-Ayland wrote:
>>> Create a new SPARC32_DMA container object (including an appropriate 
>>> container
>>> memory region) and add instances of the SPARC32_ESPDMA_DEVICE and
>>> SPARC32_LEDMA_DEVICE as child objects. The benefit is that most of the gpio
>>> wiring complexity between esp/espdma and lance/ledma is now hidden within 
>>> the
>>> SPARC32_DMA realize function.
>>>
>>> Since the sun4m IOMMU is already QOMified we can find a reference to
>>> it using object_resolve_path_type() allowing us to completely remove all 
>>> external
>>> references to the iommu pointer.
>>>
>>> Finally we rework sun4m's sparc32_dma_init() to invoke the new SPARC32_DMA 
>>> object
>>> and wire up the remaining board memory regions/IRQs.
>>>
>>> Signed-off-by: Mark Cave-Ayland 
>>> Reviewed-by: Artyom Tarasenko 
>>> ---
>>>  hw/dma/sparc32_dma.c   |   70 
>>> 
>>>  hw/sparc/sun4m.c   |   66 ++---
>>>  include/hw/sparc/sparc32_dma.h |   12 +++
>>>  3 files changed, 114 insertions(+), 34 deletions(-)
>>>
>>> diff --git a/hw/dma/sparc32_dma.c b/hw/dma/sparc32_dma.c
>>> index d4cff74..582b7cc 100644
>>> --- a/hw/dma/sparc32_dma.c
>>> +++ b/hw/dma/sparc32_dma.c
>>> @@ -30,6 +30,7 @@
>>>  #include "hw/sparc/sparc32_dma.h"
>>>  #include "hw/sparc/sun4m.h"
>>>  #include "hw/sysbus.h"
>>> +#include "qapi/error.h"
>>>  #include "trace.h"
>>>
>>>  /*
>>> @@ -369,11 +370,80 @@ static const TypeInfo sparc32_ledma_device_info = {
>>>  .class_init= sparc32_ledma_device_class_init,
>>>  };
>>>
>>> +static void sparc32_dma_realize(DeviceState *dev, Error **errp)
>>> +{
>>> +SPARC32DMAState *s = SPARC32_DMA(dev);
>>> +DeviceState *espdma, *esp, *ledma, *lance;
>>> +SysBusDevice *sbd;
>>> +Object *iommu;
>>> +
>>> +iommu = object_resolve_path_type("", TYPE_SUN4M_IOMMU, NULL);
>>> +if (!iommu) {
>>> +error_setg(errp, "unable to locate sun4m IOMMU device");
>>> +return;
>>> +}
>>> +
>>> +espdma = qdev_create(NULL, TYPE_SPARC32_ESPDMA_DEVICE);
>>> +object_property_set_link(OBJECT(espdma), iommu, "iommu", errp);
>>> +object_property_add_child(OBJECT(s), "espdma", OBJECT(espdma), errp);
>>> +qdev_init_nofail(espdma);
>>> +
>>> +esp = DEVICE(object_resolve_path_component(OBJECT(espdma), "esp"));
>>
>> TYPE_ESP?
>>
>>> +sbd = SYS_BUS_DEVICE(esp);
>>> +sysbus_connect_irq(sbd, 0, qdev_get_gpio_in(espdma, 0));
>>> +qdev_connect_gpio_out(espdma, 0, qdev_get_gpio_in(esp, 0));
>>> +qdev_connect_gpio_out(espdma, 1, qdev_get_gpio_in(esp, 1));
>>> +
>>> +sbd = SYS_BUS_DEVICE(espdma);
>>> +memory_region_add_subregion(>dmamem, 0x0,
>>> +sysbus_mmio_get_region(sbd, 0));
>>> +
>>> +ledma = qdev_create(NULL, TYPE_SPARC32_LEDMA_DEVICE);
>>> +object_property_set_link(OBJECT(ledma), iommu, "iommu", errp);
>>> +object_property_add_child(OBJECT(s), "ledma", OBJECT(ledma), errp);
>>> +qdev_init_nofail(ledma);
>>> +
>>> +lance = DEVICE(object_resolve_path_component(OBJECT(ledma), "lance"));
>>
>> TYPE_LANCE?
>>
>>> +sbd = SYS_BUS_DEVICE(lance);
>>> +sysbus_connect_irq(sbd, 0, qdev_get_gpio_in(ledma, 0));
>>> +qdev_connect_gpio_out(ledma, 0, qdev_get_gpio_in(lance, 0));
>>> +
>>> +sbd = SYS_BUS_DEVICE(ledma);
>>> +memory_region_add_subregion(>dmamem, 0x10,
>>> +sysbus_mmio_get_region(sbd, 0));
>>> +}
>>> +
>>> +static void sparc32_dma_init(Object *obj)
>>> +{
>>> +SPARC32DMAState *s = SPARC32_DMA(obj);
>>> +SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
>>> +
>>> +memory_region_init(>dmamem, OBJECT(s), "dma", DMA_SIZE + 
>>> DMA_ETH_SIZE);
>>> +sysbus_init_mmio(sbd, >dmamem);
>>> +}
>>> +
>>> +static void sparc32_dma_class_init(ObjectClass *klass, void *data)
>>> +{
>>> +DeviceClass *dc = DEVICE_CLASS(klass);
>>> +
>>> +dc->realize = sparc32_dma_realize;
>>> +}
>>> +
>>> +static const TypeInfo sparc32_dma_info = {
>>> +.name  = TYPE_SPARC32_DMA,
>>> +.parent= TYPE_SYS_BUS_DEVICE,
>>> +.instance_size = sizeof(SPARC32DMAState),
>>> +.instance_init = sparc32_dma_init,
>>> +.class_init= sparc32_dma_class_init,
>>> +};
>>> +
>>> +
>>>  static void sparc32_dma_register_types(void)
>>>  {
>>>  type_register_static(_dma_device_info);
>>>  type_register_static(_espdma_device_info);
>>>  type_register_static(_ledma_device_info);
>>> +type_register_static(_dma_info);
>>>  }
>>>
>>>  type_init(sparc32_dma_register_types)
>>> diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c
>>> index ae486a4..5017ae5 100644
>>> --- a/hw/sparc/sun4m.c
>>> +++ b/hw/sparc/sun4m.c
>>> @@ -307,18 +307,36 @@ static 

Re: [Qemu-devel] [PATCHv4 11/13] sparc32_dma: introduce new SPARC32_DMA type container object

2017-10-30 Thread Mark Cave-Ayland
On 27/10/17 17:18, Philippe Mathieu-Daudé wrote:
> On 10/25/2017 12:59 PM, Mark Cave-Ayland wrote:
>> Create a new SPARC32_DMA container object (including an appropriate container
>> memory region) and add instances of the SPARC32_ESPDMA_DEVICE and
>> SPARC32_LEDMA_DEVICE as child objects. The benefit is that most of the gpio
>> wiring complexity between esp/espdma and lance/ledma is now hidden within the
>> SPARC32_DMA realize function.
>>
>> Since the sun4m IOMMU is already QOMified we can find a reference to
>> it using object_resolve_path_type() allowing us to completely remove all 
>> external
>> references to the iommu pointer.
>>
>> Finally we rework sun4m's sparc32_dma_init() to invoke the new SPARC32_DMA 
>> object
>> and wire up the remaining board memory regions/IRQs.
>>
>> Signed-off-by: Mark Cave-Ayland 
>> Reviewed-by: Artyom Tarasenko 
>> ---
>>  hw/dma/sparc32_dma.c   |   70 
>> 
>>  hw/sparc/sun4m.c   |   66 ++---
>>  include/hw/sparc/sparc32_dma.h |   12 +++
>>  3 files changed, 114 insertions(+), 34 deletions(-)
>>
>> diff --git a/hw/dma/sparc32_dma.c b/hw/dma/sparc32_dma.c
>> index d4cff74..582b7cc 100644
>> --- a/hw/dma/sparc32_dma.c
>> +++ b/hw/dma/sparc32_dma.c
>> @@ -30,6 +30,7 @@
>>  #include "hw/sparc/sparc32_dma.h"
>>  #include "hw/sparc/sun4m.h"
>>  #include "hw/sysbus.h"
>> +#include "qapi/error.h"
>>  #include "trace.h"
>>  
>>  /*
>> @@ -369,11 +370,80 @@ static const TypeInfo sparc32_ledma_device_info = {
>>  .class_init= sparc32_ledma_device_class_init,
>>  };
>>  
>> +static void sparc32_dma_realize(DeviceState *dev, Error **errp)
>> +{
>> +SPARC32DMAState *s = SPARC32_DMA(dev);
>> +DeviceState *espdma, *esp, *ledma, *lance;
>> +SysBusDevice *sbd;
>> +Object *iommu;
>> +
>> +iommu = object_resolve_path_type("", TYPE_SUN4M_IOMMU, NULL);
>> +if (!iommu) {
>> +error_setg(errp, "unable to locate sun4m IOMMU device");
>> +return;
>> +}
>> +
>> +espdma = qdev_create(NULL, TYPE_SPARC32_ESPDMA_DEVICE);
>> +object_property_set_link(OBJECT(espdma), iommu, "iommu", errp);
>> +object_property_add_child(OBJECT(s), "espdma", OBJECT(espdma), errp);
>> +qdev_init_nofail(espdma);
>> +
>> +esp = DEVICE(object_resolve_path_component(OBJECT(espdma), "esp"));
> 
> TYPE_ESP?
> 
>> +sbd = SYS_BUS_DEVICE(esp);
>> +sysbus_connect_irq(sbd, 0, qdev_get_gpio_in(espdma, 0));
>> +qdev_connect_gpio_out(espdma, 0, qdev_get_gpio_in(esp, 0));
>> +qdev_connect_gpio_out(espdma, 1, qdev_get_gpio_in(esp, 1));
>> +
>> +sbd = SYS_BUS_DEVICE(espdma);
>> +memory_region_add_subregion(>dmamem, 0x0,
>> +sysbus_mmio_get_region(sbd, 0));
>> +
>> +ledma = qdev_create(NULL, TYPE_SPARC32_LEDMA_DEVICE);
>> +object_property_set_link(OBJECT(ledma), iommu, "iommu", errp);
>> +object_property_add_child(OBJECT(s), "ledma", OBJECT(ledma), errp);
>> +qdev_init_nofail(ledma);
>> +
>> +lance = DEVICE(object_resolve_path_component(OBJECT(ledma), "lance"));
> 
> TYPE_LANCE?
> 
>> +sbd = SYS_BUS_DEVICE(lance);
>> +sysbus_connect_irq(sbd, 0, qdev_get_gpio_in(ledma, 0));
>> +qdev_connect_gpio_out(ledma, 0, qdev_get_gpio_in(lance, 0));
>> +
>> +sbd = SYS_BUS_DEVICE(ledma);
>> +memory_region_add_subregion(>dmamem, 0x10,
>> +sysbus_mmio_get_region(sbd, 0));
>> +}
>> +
>> +static void sparc32_dma_init(Object *obj)
>> +{
>> +SPARC32DMAState *s = SPARC32_DMA(obj);
>> +SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
>> +
>> +memory_region_init(>dmamem, OBJECT(s), "dma", DMA_SIZE + 
>> DMA_ETH_SIZE);
>> +sysbus_init_mmio(sbd, >dmamem);
>> +}
>> +
>> +static void sparc32_dma_class_init(ObjectClass *klass, void *data)
>> +{
>> +DeviceClass *dc = DEVICE_CLASS(klass);
>> +
>> +dc->realize = sparc32_dma_realize;
>> +}
>> +
>> +static const TypeInfo sparc32_dma_info = {
>> +.name  = TYPE_SPARC32_DMA,
>> +.parent= TYPE_SYS_BUS_DEVICE,
>> +.instance_size = sizeof(SPARC32DMAState),
>> +.instance_init = sparc32_dma_init,
>> +.class_init= sparc32_dma_class_init,
>> +};
>> +
>> +
>>  static void sparc32_dma_register_types(void)
>>  {
>>  type_register_static(_dma_device_info);
>>  type_register_static(_espdma_device_info);
>>  type_register_static(_ledma_device_info);
>> +type_register_static(_dma_info);
>>  }
>>  
>>  type_init(sparc32_dma_register_types)
>> diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c
>> index ae486a4..5017ae5 100644
>> --- a/hw/sparc/sun4m.c
>> +++ b/hw/sparc/sun4m.c
>> @@ -307,18 +307,36 @@ static void *iommu_init(hwaddr addr, uint32_t version, 
>> qemu_irq irq)
>>  return s;
>>  }
>>  
>> -static void *sparc32_dma_init(hwaddr daddr, void *iommu, int is_ledma)
>> +static void *sparc32_dma_init(hwaddr dma_base,
>> 

Re: [Qemu-devel] [PATCH v3] monitor: fix dangling CPU pointer

2017-10-30 Thread Dr. David Alan Gilbert
* Greg Kurz (gr...@kaod.org) wrote:
> If a CPU selected with the "cpu" command is hot-unplugged then "info cpus"
> causes QEMU to exit:
> 
> (qemu) device_del cpu1
> (qemu) info cpus
> qemu:qemu_cpu_kick_thread: No such process
> 
> This happens because "cpu" stores the pointer to the selected CPU into
> the monitor structure. When the CPU is hot-unplugged, we end up with a
> dangling pointer. The "info cpus" command then does:
> 
> hmp_info_cpus()
>  monitor_get_cpu_index()
>   mon_get_cpu()
>cpu_synchronize_state() <--- called with dangling pointer
> 
> This could cause a QEMU crash as well.
> 
> This patch switches the monitor to store the QOM path instead of a
> pointer to the current CPU. The path is then resolved when needed.
> If the resolution fails, we assume that the CPU was removed and the
> path is resetted to the default (ie, path of first_cpu).
> 
> Reported-by: Satheesh Rajendran 
> Suggested-by: Igor Mammedov 
> Signed-off-by: Greg Kurz 

Queued for HMP

Dave

> ---
> v3: - drop irrelevant paragraph about object_resolve_path() from the
>   changelog
> 
> v2: - use object_resolve_path_type()
> - add Reported-by tag
> ---
>  monitor.c |   23 ++-
>  1 file changed, 18 insertions(+), 5 deletions(-)
> 
> diff --git a/monitor.c b/monitor.c
> index fe0d1bdbb461..ce577e46e568 100644
> --- a/monitor.c
> +++ b/monitor.c
> @@ -200,7 +200,7 @@ struct Monitor {
>  
>  ReadLineState *rs;
>  MonitorQMP qmp;
> -CPUState *mon_cpu;
> +gchar *mon_cpu_path;
>  BlockCompletionFunc *password_completion_cb;
>  void *password_opaque;
>  mon_cmd_t *cmd_table;
> @@ -579,6 +579,7 @@ static void monitor_data_init(Monitor *mon)
>  
>  static void monitor_data_destroy(Monitor *mon)
>  {
> +g_free(mon->mon_cpu_path);
>  qemu_chr_fe_deinit(>chr, false);
>  if (monitor_is_qmp(mon)) {
>  json_message_parser_destroy(>qmp.parser);
> @@ -1047,20 +1048,32 @@ int monitor_set_cpu(int cpu_index)
>  if (cpu == NULL) {
>  return -1;
>  }
> -cur_mon->mon_cpu = cpu;
> +g_free(cur_mon->mon_cpu_path);
> +cur_mon->mon_cpu_path = object_get_canonical_path(OBJECT(cpu));
>  return 0;
>  }
>  
>  CPUState *mon_get_cpu(void)
>  {
> -if (!cur_mon->mon_cpu) {
> +CPUState *cpu;
> +
> +if (cur_mon->mon_cpu_path) {
> +cpu = (CPUState *) object_resolve_path_type(cur_mon->mon_cpu_path,
> +TYPE_CPU, NULL);
> +if (!cpu) {
> +g_free(cur_mon->mon_cpu_path);
> +cur_mon->mon_cpu_path = NULL;
> +}
> +}
> +if (!cur_mon->mon_cpu_path) {
>  if (!first_cpu) {
>  return NULL;
>  }
>  monitor_set_cpu(first_cpu->cpu_index);
> +cpu = first_cpu;
>  }
> -cpu_synchronize_state(cur_mon->mon_cpu);
> -return cur_mon->mon_cpu;
> +cpu_synchronize_state(cpu);
> +return cpu;
>  }
>  
>  CPUArchState *mon_get_cpu_env(void)
> 
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK



Re: [Qemu-devel] [PATCHv4 09/13] lance: move TYPE_LANCE and SysBusPCNetState from lance.c to lance.h

2017-10-30 Thread Philippe Mathieu-Daudé
Hi Mark,

>> +/*
>> + * QEMU AMD PC-Net II (Am79C970A) emulation
>
> Filename says this is Lance ethernet, but the comment says it's PC-Net ?

 According to the datasheet for Am79C970A, the original Lance is an
 Am7990 device and the Am79C970A aka PCNet-PCI II as emulated by QEMU is
 register-compatible with it.

 I guess the comment above is more technically correct, but I'm happy to
 adjust it in my local tree if you still feel it needs to change?
>>
>> I think we should have a comment that says what the file is
>> for. Since this is a different file from pcnet.h, we should
>> have a comment that isn't the same as the pcnet.h one.
>> I don't particularly mind what it says, as long as it briefly
>> explains what's in the file (and by implication what distinguishes
>> things in this file from things in the other).
> 
> Okay then how about something along the lines of:
> 
>  * QEMU Lance (Am7990) device emulation
>  *
>  * Copyright (c) 2004 Antony T Curtis
>  * Copyright (c) 2017 Mark Cave-Ayland
>  *
>  * This represents the Sparc32 lance (Am7990) ethernet device which is
>  * an earlier register-compatible member of the AMD PC-Net II
>  * (Am79C970A) family.
> 
> In reality pcnet.c/pcnet.h are just the inner workings of the pcnet-pci
> and lance devices. The comments in pcnet-pci.c and lance.c are identical
> except for the Sparc32 reference and it was the same header from
> pcnet-pci.c that I used as the basis for my last patch.

You are right the PCnet family is based on the Lance one, but it seems
nobody remembers the Lance origin, the "PC-Net" took over.

Maybe we can agree with the different families having an unique
"hw/net/pcnet_lance.h" header with all Lance/PC-Net related XXX_TYPEs,
what do you think?

Regards,

Phil.



Re: [Qemu-devel] [PATCH v8 00/14] Dirty bitmaps postcopy migration

2017-10-30 Thread Vladimir Sementsov-Ogievskiy

30.10.2017 19:32, Vladimir Sementsov-Ogievskiy wrote:

Hi all!

There is a new version of dirty bitmap postcopy migration series.

v8

clone: tag postcopy-v8 from https://src.openvz.org/scm/~vsementsov/qemu.git
online: https://src.openvz.org/users/vsementsov/repos/qemu/browse?at=postcopy-v8


compilation is broken for s390, so updated version is:

clone: tag postcopy-v8.1 from https://src.openvz.org/scm/~vsementsov/qemu.git
online: 
https://src.openvz.org/users/vsementsov/repos/qemu/browse?at=postcopy-v8.1



--
Best regards,
Vladimir




Re: [Qemu-devel] [PATCHv4 09/13] lance: move TYPE_LANCE and SysBusPCNetState from lance.c to lance.h

2017-10-30 Thread Mark Cave-Ayland
On 30/10/17 13:22, Peter Maydell wrote:

> On 30 October 2017 at 13:10, Mark Cave-Ayland
>  wrote:
>> On 26/10/17 11:12, Mark Cave-Ayland wrote:
>>> On 25/10/17 18:47, Peter Maydell wrote:
 On 25 October 2017 at 16:59, Mark Cave-Ayland wrote:
> --- /dev/null
> +++ b/include/hw/net/lance.h
> @@ -0,0 +1,41 @@
> +/*
> + * QEMU AMD PC-Net II (Am79C970A) emulation

 Filename says this is Lance ethernet, but the comment says it's PC-Net ?
>>>
>>> According to the datasheet for Am79C970A, the original Lance is an
>>> Am7990 device and the Am79C970A aka PCNet-PCI II as emulated by QEMU is
>>> register-compatible with it.
>>>
>>> I guess the comment above is more technically correct, but I'm happy to
>>> adjust it in my local tree if you still feel it needs to change?
> 
> I think we should have a comment that says what the file is
> for. Since this is a different file from pcnet.h, we should
> have a comment that isn't the same as the pcnet.h one.
> I don't particularly mind what it says, as long as it briefly
> explains what's in the file (and by implication what distinguishes
> things in this file from things in the other).

Okay then how about something along the lines of:

 * QEMU Lance (Am7990) device emulation
 *
 * Copyright (c) 2004 Antony T Curtis
 * Copyright (c) 2017 Mark Cave-Ayland
 *
 * This represents the Sparc32 lance (Am7990) ethernet device which is
 * an earlier register-compatible member of the AMD PC-Net II
 * (Am79C970A) family.

In reality pcnet.c/pcnet.h are just the inner workings of the pcnet-pci
and lance devices. The comments in pcnet-pci.c and lance.c are identical
except for the Sparc32 reference and it was the same header from
pcnet-pci.c that I used as the basis for my last patch.


ATB,

Mark.



[Qemu-devel] [PATCH RESEND] vl: only display available accelerators

2017-10-30 Thread Philippe Mathieu-Daudé
examples configuring with '--enable-kvm --disable-tcg'

- before

  $ qemu-system-x86_64 -accel help
  Possible accelerators: kvm, xen, hax, tcg

  $ qemu-system-x86_64 -accel tcg
  qemu-system-x86_64: -machine accel=tcg: No accelerator found

  # qemu-system-x86_64 -accel hax
  qemu-system-x86_64: -machine accel=hax: No accelerator found

- after

  $ qemu-system-x86_64 -accel help
  Possible accelerators:
xen
kvm

Suggested-by: Eduardo Habkost 
Signed-off-by: Philippe Mathieu-Daudé 
---
resend without RFC in subject...

since RFC:
  - use much cleaner object_class_get_list(TYPE_ACCEL, false)

 vl.c | 39 ++-
 1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/vl.c b/vl.c
index ec299099ff..0f13641715 100644
--- a/vl.c
+++ b/vl.c
@@ -2764,6 +2764,39 @@ static gint machine_class_cmp(gconstpointer a, 
gconstpointer b)
 exit(!name || !is_help_option(name));
 }
 
+static void accel_list_entry(gpointer data, gpointer user_data)
+{
+ObjectClass *oc = data;
+const char *typename = object_class_get_name(oc);
+int len;
+
+if (!qtest_driver() && !g_strcmp0(typename, ACCEL_CLASS_NAME("qtest"))) {
+return; /* used by test cases */
+}
+
+len = strlen(typename) - strlen("-" TYPE_ACCEL);
+if (len > 0) {
+error_printf("  %.*s\n", len, typename);
+}
+}
+
+static void accel_parse(const char *name, QemuOpts *accel_opts)
+{
+const char *optarg = qemu_opt_get(accel_opts, "accel");
+GSList *list;
+
+if (!is_help_option(optarg)) {
+return;
+}
+
+list = object_class_get_list(TYPE_ACCEL, false);
+error_printf("Possible accelerators:\n");
+g_slist_foreach(list, accel_list_entry, NULL);
+g_slist_free(list);
+
+exit(0);
+}
+
 void qemu_add_exit_notifier(Notifier *notify)
 {
 notifier_list_add(_notifiers, notify);
@@ -3881,11 +3914,7 @@ int main(int argc, char **argv, char **envp)
 case QEMU_OPTION_accel:
 accel_opts = qemu_opts_parse_noisily(qemu_find_opts("accel"),
  optarg, true);
-optarg = qemu_opt_get(accel_opts, "accel");
-if (!optarg || is_help_option(optarg)) {
-error_printf("Possible accelerators: kvm, xen, hax, 
tcg\n");
-exit(0);
-}
+accel_parse(optarg, accel_opts);
 opts = qemu_opts_create(qemu_find_opts("machine"), NULL,
 false, _abort);
 qemu_opt_set(opts, "accel", optarg, _abort);
-- 
2.15.0.rc2




[Qemu-devel] [PATCH v8.1 05/14] migration: introduce postcopy-only pending

2017-10-30 Thread Vladimir Sementsov-Ogievskiy
There would be savevm states (dirty-bitmap) which can migrate only in
postcopy stage. The corresponding pending is introduced here.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Juan Quintela 
---

8.1: add new version for cmma_save_pending too.


 include/migration/register.h | 17 +++--
 migration/savevm.h   |  5 +++--
 hw/s390x/s390-stattrib.c |  7 ---
 migration/block.c|  7 ---
 migration/migration.c| 15 ---
 migration/ram.c  |  9 +
 migration/savevm.c   | 13 -
 migration/trace-events   |  2 +-
 8 files changed, 48 insertions(+), 27 deletions(-)

diff --git a/include/migration/register.h b/include/migration/register.h
index f4f7bdc177..9436a87678 100644
--- a/include/migration/register.h
+++ b/include/migration/register.h
@@ -37,8 +37,21 @@ typedef struct SaveVMHandlers {
 int (*save_setup)(QEMUFile *f, void *opaque);
 void (*save_live_pending)(QEMUFile *f, void *opaque,
   uint64_t threshold_size,
-  uint64_t *non_postcopiable_pending,
-  uint64_t *postcopiable_pending);
+  uint64_t *res_precopy_only,
+  uint64_t *res_compatible,
+  uint64_t *res_postcopy_only);
+/* Note for save_live_pending:
+ * - res_precopy_only is for data which must be migrated in precopy phase
+ * or in stopped state, in other words - before target vm start
+ * - res_compatible is for data which may be migrated in any phase
+ * - res_postcopy_only is for data which must be migrated in postcopy phase
+ * or in stopped state, in other words - after source vm stop
+ *
+ * Sum of res_postcopy_only, res_compatible and res_postcopy_only is the
+ * whole amount of pending data.
+ */
+
+
 LoadStateHandler *load_state;
 int (*load_setup)(QEMUFile *f, void *opaque);
 int (*load_cleanup)(void *opaque);
diff --git a/migration/savevm.h b/migration/savevm.h
index 295c4a1f2c..cf4f0d37ca 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -38,8 +38,9 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f);
 int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
bool inactivate_disks);
 void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size,
-   uint64_t *res_non_postcopiable,
-   uint64_t *res_postcopiable);
+   uint64_t *res_precopy_only,
+   uint64_t *res_compatible,
+   uint64_t *res_postcopy_only);
 void qemu_savevm_send_ping(QEMUFile *f, uint32_t value);
 void qemu_savevm_send_open_return_path(QEMUFile *f);
 int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len);
diff --git a/hw/s390x/s390-stattrib.c b/hw/s390x/s390-stattrib.c
index 2902f54f11..dd3fbfd1eb 100644
--- a/hw/s390x/s390-stattrib.c
+++ b/hw/s390x/s390-stattrib.c
@@ -183,15 +183,16 @@ static int cmma_save_setup(QEMUFile *f, void *opaque)
 }
 
 static void cmma_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
- uint64_t *non_postcopiable_pending,
- uint64_t *postcopiable_pending)
+  uint64_t *res_precopy_only,
+  uint64_t *res_compatible,
+  uint64_t *res_postcopy_only)
 {
 S390StAttribState *sas = S390_STATTRIB(opaque);
 S390StAttribClass *sac = S390_STATTRIB_GET_CLASS(sas);
 long long res = sac->get_dirtycount(sas);
 
 if (res >= 0) {
-*non_postcopiable_pending += res;
+*res_precopy_only += res;
 }
 }
 
diff --git a/migration/block.c b/migration/block.c
index 3282809583..39dfa567e8 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -865,8 +865,9 @@ static int block_save_complete(QEMUFile *f, void *opaque)
 }
 
 static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
-   uint64_t *non_postcopiable_pending,
-   uint64_t *postcopiable_pending)
+   uint64_t *res_precopy_only,
+   uint64_t *res_compatible,
+   uint64_t *res_postcopy_only)
 {
 /* Estimate pending number of bytes to send */
 uint64_t pending;
@@ -887,7 +888,7 @@ static void block_save_pending(QEMUFile *f, void *opaque, 
uint64_t max_size,
 
 DPRINTF("Enter save live pending  %" PRIu64 "\n", pending);
 /* We don't do postcopy */
-*non_postcopiable_pending += pending;
+*res_precopy_only += pending;
 }
 
 static int block_load(QEMUFile *f, void *opaque, int version_id)
diff --git a/migration/migration.c 

Re: [Qemu-devel] [RFC PATCH] vl: only display available accelerators

2017-10-30 Thread Philippe Mathieu-Daudé
Oops this isn't RFC anymore, this one is v1 ... sorry!

On Mon, Oct 30, 2017 at 3:14 PM, Philippe Mathieu-Daudé  wrote:
[...]
> ---
> since RFC:
>   - use much cleaner object_class_get_list(TYPE_ACCEL, false)
>
>  vl.c | 39 ++-
>  1 file changed, 34 insertions(+), 5 deletions(-)



Re: [Qemu-devel] [PATCH v8 05/14] migration: introduce postcopy-only pending

2017-10-30 Thread Vladimir Sementsov-Ogievskiy

30.10.2017 20:31, Dr. David Alan Gilbert wrote:

* Vladimir Sementsov-Ogievskiy (vsement...@virtuozzo.com) wrote:

There would be savevm states (dirty-bitmap) which can migrate only in
postcopy stage. The corresponding pending is introduced here.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Juan Quintela 

Note the error patchew picked up on this for s390 which has
cmma_save_pending.

Dave


Thanks for pointing here, I'll update the patch.



--
Best regards,
Vladimir




[Qemu-devel] [RFC PATCH] vl: only display available accelerators

2017-10-30 Thread Philippe Mathieu-Daudé
examples configuring with '--enable-kvm --disable-tcg'

- before

  $ qemu-system-x86_64 -accel help
  Possible accelerators: kvm, xen, hax, tcg

  $ qemu-system-x86_64 -accel tcg
  qemu-system-x86_64: -machine accel=tcg: No accelerator found

  # qemu-system-x86_64 -accel hax
  qemu-system-x86_64: -machine accel=hax: No accelerator found

- after

  $ qemu-system-x86_64 -accel help
  Possible accelerators:
xen
kvm

Suggested-by: Eduardo Habkost 
Signed-off-by: Philippe Mathieu-Daudé 
---
since RFC:
  - use much cleaner object_class_get_list(TYPE_ACCEL, false)

 vl.c | 39 ++-
 1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/vl.c b/vl.c
index ec299099ff..0f13641715 100644
--- a/vl.c
+++ b/vl.c
@@ -2764,6 +2764,39 @@ static gint machine_class_cmp(gconstpointer a, 
gconstpointer b)
 exit(!name || !is_help_option(name));
 }
 
+static void accel_list_entry(gpointer data, gpointer user_data)
+{
+ObjectClass *oc = data;
+const char *typename = object_class_get_name(oc);
+int len;
+
+if (!qtest_driver() && !g_strcmp0(typename, ACCEL_CLASS_NAME("qtest"))) {
+return; /* used by test cases */
+}
+
+len = strlen(typename) - strlen("-" TYPE_ACCEL);
+if (len > 0) {
+error_printf("  %.*s\n", len, typename);
+}
+}
+
+static void accel_parse(const char *name, QemuOpts *accel_opts)
+{
+const char *optarg = qemu_opt_get(accel_opts, "accel");
+GSList *list;
+
+if (!is_help_option(optarg)) {
+return;
+}
+
+list = object_class_get_list(TYPE_ACCEL, false);
+error_printf("Possible accelerators:\n");
+g_slist_foreach(list, accel_list_entry, NULL);
+g_slist_free(list);
+
+exit(0);
+}
+
 void qemu_add_exit_notifier(Notifier *notify)
 {
 notifier_list_add(_notifiers, notify);
@@ -3881,11 +3914,7 @@ int main(int argc, char **argv, char **envp)
 case QEMU_OPTION_accel:
 accel_opts = qemu_opts_parse_noisily(qemu_find_opts("accel"),
  optarg, true);
-optarg = qemu_opt_get(accel_opts, "accel");
-if (!optarg || is_help_option(optarg)) {
-error_printf("Possible accelerators: kvm, xen, hax, 
tcg\n");
-exit(0);
-}
+accel_parse(optarg, accel_opts);
 opts = qemu_opts_create(qemu_find_opts("machine"), NULL,
 false, _abort);
 qemu_opt_set(opts, "accel", optarg, _abort);
-- 
2.15.0.rc2




Re: [Qemu-devel] [RFC PATCH v6 13/12] tweak test 83 verbosity

2017-10-30 Thread Vladimir Sementsov-Ogievskiy

27.10.2017 13:45, Eric Blake wrote:

Commenting these two lines is enough to avoid the change to 083.out
in 12/12.  That is evidence that we may want these two lines to be
trace points rather than error messages; or maybe we really do like
the extra verbosity in the case of an unexpected communication break.

This patch does not meet coding guidelines, and I'm not proud enough
of it to give S-o-b, but I'm posting it for conversation.


I think more verbosity on fail is not bad, it's rare case. In previous patch
the corresponding change looks big, but in real case it would be just one
more line in log.

However, if you unsure about it, people who want more verbosity may always
enable this particular trace, so tracing is ok too and more flexible.



---
  block/nbd-client.c | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/block/nbd-client.c b/block/nbd-client.c
index b44d4d4a01..e063b3fbc0 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -654,7 +654,8 @@ static int nbd_co_request(BlockDriverState *bs, NBDRequest 
*request,

  ret = nbd_co_receive_return_code(client, request->handle, _err);
  if (local_err) {
-error_report_err(local_err);
+assert(ret < 0);
+//error_report_err(local_err);
  }
  return ret;
  }
@@ -682,7 +683,7 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t 
offset,
  ret = nbd_co_receive_cmdread_reply(client, request.handle, offset, qiov,
 _err);
  if (ret < 0) {
-error_report_err(local_err);
+//error_report_err(local_err);
  }
  return ret;
  }



--
Best regards,
Vladimir




Re: [Qemu-devel] [PATCH v2] don't hardcode EL1 in extended_addresses_enabled

2017-10-30 Thread Peter Maydell
On 26 October 2017 at 00:28, Stefano Stabellini  wrote:
> extended_addresses_enabled calls arm_el_is_aa64, hardcoding exception
> level 1. Instead, add an additional "el" argument to
> extended_addresses_enabled.
>
> The caller will pass the right value. In most cases, it will be
> arm_current_el(env). However, arm_debug_excp_handler will
> use arm_debug_target_el(env), as the target el for a debug trap can be
> different from the current el.
>
> Signed-off-by: Stefano Stabellini 

I have some longer comments below about what a mess this whole
area is. Fixing some of that requires some heavy refactoring,
which I don't want to do just now since we're about to go into
softfreeze for the next release.

What's the specific situation/bug that you're trying to fix with
this patch? You don't say in the commit message.
We should be able to put in a point fix to deal with whatever it is,
but it's hard to suggest what that would be without the detail
of what exactly we're getting wrong. (It's the PAR format stuff,
right? But which ATS instruction are you using, from which
exception level, with which register width, for which stage
1 page table format and stage 1 guest register width?)

> diff --git a/target/arm/helper.c b/target/arm/helper.c
> index 96113fe..2298428 100644
> --- a/target/arm/helper.c
> +++ b/target/arm/helper.c
> @@ -500,7 +500,7 @@ static void contextidr_write(CPUARMState *env, const 
> ARMCPRegInfo *ri,
>  ARMCPU *cpu = arm_env_get_cpu(env);
>
>  if (raw_read(env, ri) != value && !arm_feature(env, ARM_FEATURE_PMSA)
> -&& !extended_addresses_enabled(env)) {
> +&& !extended_addresses_enabled(env, arm_current_el(env))) {
>  /* For VMSA (when not using the LPAE long descriptor page table
>   * format) this register includes the ASID, so do a TLB flush.
>   * For PMSA it is purely a process ID and no action is needed.

This isn't really right for figuring out what to do on CONTEXTIDR writes
in the general case. What we want is something along the lines of:

need_flush = true;
if (EL3 is AArch64) {
/* There is only one CONTEXTIDR, and it applies to EL1. We only need
 * to flush if EL1 is or will be AArch32 and has extended addresses
 * disabled.
 */
if (tcr_el[1].TTBCR_EAE) {
need_flush = false;
}
} else {
/* If extended addressing is enabled for the translation regime that
 * this CONTEXTIDR register applies to, then there is no ASID field
 * and we don't need to TLB flush. (If we later change the EAE bit
 * we'll flush then.)
 */
bool sec = ri->secure & ARM_CP_SECSTATE_S;
if (FEATURE_LPAE && tcr_el[sec ? 3 : 1].TTBCR_EAE) {
need_flush = false;
}
}
if (need_flush) {
/* We should be cleverer about which MMU indexes need flushing here */
tlb_flush(CPU(cpu));
}

because we need to handle the case of "EL1 is using short descriptors
and EL2 writes to CONTEXTIDR for EL1".

...but then we also need to tlb_flush when the tcr bits change, which
I don't think we do correctly. (We never notice this sort of bug because
we handle correctly the common cases of "all aarch64, or aarch64 and
LPAE aarch32" and "short descriptors in an aarch32-only EL1/EL0-only
config".)

> @@ -2162,7 +2162,7 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t 
> value,
>
>  ret = get_phys_addr(env, value, access_type, mmu_idx,
>  _addr, , , _size, , );
> -if (extended_addresses_enabled(env)) {
> +if (extended_addresses_enabled(env, arm_current_el(env))) {

I think what you want to be checking here is
   if (arm_s1_regime_using_lpae_format(env, mmu_idx)) {

(because you are trying to determine what get_phys_addr() has
just handed you, and that function looks at the state of the
translation regime specified by mmu_idx, not at the current state
of the CPU)...

...but even this isn't really correct, because get_phys_addr() has
some broken cases where for a stage1+2 lookup where stage 1 is
using short descriptors we will return a short-format FSR value
for a stage1 failure but a long-format value for a stage2 failure.

The right long term thing here is to refactor get_phys_addr() so
that instead of returning literal fsr values it should return some
kind of internal QEMU type describing the failure, which we then
convert into the FSR we want at the point when we need to (ie
when taking an exception to a given EL, or writing a PAR value
for a given ATS* instruction).

>  /* fsr is a DFSR/IFSR value for the long descriptor
>   * translation table format, but with WnR always clear.
>   * Convert it to a 64-bit PAR.
> diff --git a/target/arm/internals.h b/target/arm/internals.h
> index 43106a2..6792df2 100644
> --- a/target/arm/internals.h
> +++ b/target/arm/internals.h
> @@ -217,10 +217,10 @@ static inline unsigned int arm_pamax(ARMCPU *cpu)
>   * This is always the case if our translation regime is 64 

Re: [Qemu-devel] [PATCH v6 12/12] nbd: Minimal structured read for client

2017-10-30 Thread Vladimir Sementsov-Ogievskiy

27.10.2017 13:40, Eric Blake wrote:

From: Vladimir Sementsov-Ogievskiy 

Minimal implementation: for structured error only error_report error
message.

Note that test 83 is now more verbose, because the implementation
prints more warnings about unexpected communication errors; perhaps
future patches should tone things down by using trace messages
instead of traces, but the common case of successful communication
is no noisier than before.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Eric Blake 


ok for me, thank you!



---
v6: tweak overflow check [Vladimir], fix reads to use absolute offset
from server by tracking original offset, fix talking to old-style server,
tweak iotest 83 output to account for new verbosity
v5: fix payload_advance[32,64], return correct negative error on
structured error, rearrange size checks to not be vulnerable to
overflow, simplify payload to use g_new instead of qemu_memalign,
don't set errp when returning 0, validate that error message
length is sane



--
Best regards,
Vladimir




Re: [Qemu-devel] [PATCH v1 0/5][RFC] Refactoring of AIS support

2017-10-30 Thread Pierre Morel

On 30/10/2017 18:08, Christian Borntraeger wrote:


On 10/30/2017 05:59 PM, Cornelia Huck wrote:

On Mon, 30 Oct 2017 14:48:23 +0100
Christian Borntraeger  wrote:



FWIW, I am testing a guest patch that enables zPCI without AIS. Its as simple as


diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 7b30af5..9b24836 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -953,7 +953,7 @@ static int __init pci_base_init(void)
 if (!s390_pci_probe)
 return 0;
  
-   if (!test_facility(69) || !test_facility(71) || !test_facility(72))

+   if (!test_facility(69) || !test_facility(71))
 return 0;
  
 rc = zpci_debug_init();

diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index ea34086..61f8c82 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -7,6 +7,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
  #include 
@@ -93,6 +94,8 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
  /* Set Interruption Controls */
  void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc)
  {
+   if (!test_facility(72))
+   return;
 asm volatile (
 "   .insn   rsy,0xebd1,%[ctl],%[isc],%[u]\n"
 : : [ctl] "d" (ctl), [isc] "d" (isc << 27), [u] "Q" (*unused));



Sounds good. Presumably this makes the adapter interrupt handling work
as for virtio (and qdio)? Is there any ais-less pci hardware out in the
wild?



ais is z specific, not PCI specific. So PCI cards should not care as far as I 
can tell.




I bet Conny meant PCI hardware globaly, including zPCI adapters.
I bet none but I will better ask Sebastian or Gerald if they know.


--
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany




Re: [Qemu-devel] [PATCH v6 09/12] nbd/client: refactor nbd_receive_starttls

2017-10-30 Thread Vladimir Sementsov-Ogievskiy

27.10.2017 13:40, Eric Blake wrote:

From: Vladimir Sementsov-Ogievskiy 

Split out nbd_request_simple_option to be reused for structured reply
option.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Eric Blake 



ok for me.



---
v6: error message tweak [Vladimir]
v5: only check length for ACK responses
v4: reduce redundant traces, typo fix in commit message
---



--
Best regards,
Vladimir




Re: [Qemu-devel] [PATCH v8 05/14] migration: introduce postcopy-only pending

2017-10-30 Thread Dr. David Alan Gilbert
* Vladimir Sementsov-Ogievskiy (vsement...@virtuozzo.com) wrote:
> There would be savevm states (dirty-bitmap) which can migrate only in
> postcopy stage. The corresponding pending is introduced here.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> Reviewed-by: Juan Quintela 

Note the error patchew picked up on this for s390 which has
cmma_save_pending.

Dave

> ---
>  include/migration/register.h | 17 +++--
>  migration/savevm.h   |  5 +++--
>  migration/block.c|  7 ---
>  migration/migration.c| 15 ---
>  migration/ram.c  |  9 +
>  migration/savevm.c   | 13 -
>  migration/trace-events   |  2 +-
>  7 files changed, 44 insertions(+), 24 deletions(-)
> 
> diff --git a/include/migration/register.h b/include/migration/register.h
> index f4f7bdc177..9436a87678 100644
> --- a/include/migration/register.h
> +++ b/include/migration/register.h
> @@ -37,8 +37,21 @@ typedef struct SaveVMHandlers {
>  int (*save_setup)(QEMUFile *f, void *opaque);
>  void (*save_live_pending)(QEMUFile *f, void *opaque,
>uint64_t threshold_size,
> -  uint64_t *non_postcopiable_pending,
> -  uint64_t *postcopiable_pending);
> +  uint64_t *res_precopy_only,
> +  uint64_t *res_compatible,
> +  uint64_t *res_postcopy_only);
> +/* Note for save_live_pending:
> + * - res_precopy_only is for data which must be migrated in precopy phase
> + * or in stopped state, in other words - before target vm start
> + * - res_compatible is for data which may be migrated in any phase
> + * - res_postcopy_only is for data which must be migrated in postcopy 
> phase
> + * or in stopped state, in other words - after source vm stop
> + *
> + * Sum of res_postcopy_only, res_compatible and res_postcopy_only is the
> + * whole amount of pending data.
> + */
> +
> +
>  LoadStateHandler *load_state;
>  int (*load_setup)(QEMUFile *f, void *opaque);
>  int (*load_cleanup)(void *opaque);
> diff --git a/migration/savevm.h b/migration/savevm.h
> index 295c4a1f2c..cf4f0d37ca 100644
> --- a/migration/savevm.h
> +++ b/migration/savevm.h
> @@ -38,8 +38,9 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f);
>  int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
> bool inactivate_disks);
>  void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size,
> -   uint64_t *res_non_postcopiable,
> -   uint64_t *res_postcopiable);
> +   uint64_t *res_precopy_only,
> +   uint64_t *res_compatible,
> +   uint64_t *res_postcopy_only);
>  void qemu_savevm_send_ping(QEMUFile *f, uint32_t value);
>  void qemu_savevm_send_open_return_path(QEMUFile *f);
>  int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len);
> diff --git a/migration/block.c b/migration/block.c
> index 3282809583..39dfa567e8 100644
> --- a/migration/block.c
> +++ b/migration/block.c
> @@ -865,8 +865,9 @@ static int block_save_complete(QEMUFile *f, void *opaque)
>  }
>  
>  static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
> -   uint64_t *non_postcopiable_pending,
> -   uint64_t *postcopiable_pending)
> +   uint64_t *res_precopy_only,
> +   uint64_t *res_compatible,
> +   uint64_t *res_postcopy_only)
>  {
>  /* Estimate pending number of bytes to send */
>  uint64_t pending;
> @@ -887,7 +888,7 @@ static void block_save_pending(QEMUFile *f, void *opaque, 
> uint64_t max_size,
>  
>  DPRINTF("Enter save live pending  %" PRIu64 "\n", pending);
>  /* We don't do postcopy */
> -*non_postcopiable_pending += pending;
> +*res_precopy_only += pending;
>  }
>  
>  static int block_load(QEMUFile *f, void *opaque, int version_id)
> diff --git a/migration/migration.c b/migration/migration.c
> index 4de3b551fe..e6c9be3cca 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -2216,20 +2216,21 @@ static void *migration_thread(void *opaque)
>  uint64_t pending_size;
>  
>  if (!qemu_file_rate_limit(s->to_dst_file)) {
> -uint64_t pend_post, pend_nonpost;
> +uint64_t pend_pre, pend_compat, pend_post;
>  
> -qemu_savevm_state_pending(s->to_dst_file, threshold_size,
> -  _nonpost, _post);
> -pending_size = pend_nonpost + pend_post;
> +qemu_savevm_state_pending(s->to_dst_file, threshold_size, 
> _pre,
> + 

Re: [Qemu-devel] [PATCH v6 07/12] nbd: Minimal structured read for server

2017-10-30 Thread Vladimir Sementsov-Ogievskiy

27.10.2017 13:40, Eric Blake wrote:

From: Vladimir Sementsov-Ogievskiy 

Minimal implementation of structured read: one structured reply chunk,
no segmentation.
Minimal structured error implementation: no text message.
Support DF flag, but just ignore it, as there is no segmentation any
way.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Eric Blake 


ok for me.



--
Best regards,
Vladimir




Re: [Qemu-devel] [PATCH v6 06/12] nbd/server: Refactor zero-length option check

2017-10-30 Thread Vladimir Sementsov-Ogievskiy

27.10.2017 13:40, Eric Blake wrote:

Consolidate the response for a non-zero-length option payload
into a new function, nbd_reject_length().  This check will
also be used when introducing support for structured replies.

Note that STARTTLS response differs based on time: if the connection
is still unencrypted, we set fatal to true (a client that can't
request TLS correctly may still think that we are ready to start
the TLS handshake, so we must disconnect); while if the connection
is already encrypted, the client is sending a bogus request but
is no longer at risk of being confused by continuing the connection.

Signed-off-by: Eric Blake 

---
v6: split, rework logic to avoid subtle regression on starttls [Vladimir]
v5: new patch
---
  nbd/server.c | 74 +---
  1 file changed, 46 insertions(+), 28 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index 6af708662d..a98f5622c9 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -253,21 +253,10 @@ static int nbd_negotiate_send_rep_list(QIOChannel *ioc, 
NBDExport *exp,

  /* Process the NBD_OPT_LIST command, with a potential series of replies.
   * Return -errno on error, 0 on success. */
-static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length,
- Error **errp)
+static int nbd_negotiate_handle_list(NBDClient *client, Error **errp)
  {
  NBDExport *exp;

-if (length) {
-if (nbd_drop(client->ioc, length, errp) < 0) {
-return -EIO;
-}
-return nbd_negotiate_send_rep_err(client->ioc,
-  NBD_REP_ERR_INVALID, NBD_OPT_LIST,
-  errp,
-  "OPT_LIST should not have length");
-}
-
  /* For each export, send a NBD_REP_SERVER reply. */
  QTAILQ_FOREACH(exp, , next) {
  if (nbd_negotiate_send_rep_list(client->ioc, exp, errp)) {
@@ -531,7 +520,6 @@ static int nbd_negotiate_handle_info(NBDClient *client, 
uint32_t length,
  /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the
   * new channel for all further (now-encrypted) communication. */
  static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
- uint32_t length,
   Error **errp)
  {
  QIOChannel *ioc;
@@ -540,15 +528,6 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient 
*client,

  trace_nbd_negotiate_handle_starttls();
  ioc = client->ioc;
-if (length) {
-if (nbd_drop(ioc, length, errp) < 0) {
-return NULL;
-}
-nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS,
-   errp,
-   "OPT_STARTTLS should not have length");
-return NULL;
-}

  if (nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK,
 NBD_OPT_STARTTLS, errp) < 0) {
@@ -584,6 +563,34 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient 
*client,
  return QIO_CHANNEL(tioc);
  }

+/* nbd_reject_length: Handle any unexpected payload.
+ * @fatal requests that we quit talking to the client, even if we are able
+ * to successfully send an error to the guest.
+ * Return:
+ * -errno  transmission error occurred or @fatal was requested, errp is set
+ * 0   error message successfully sent to client, errp is not set
+ */
+static int nbd_reject_length(NBDClient *client, uint32_t length,
+ uint32_t option, bool fatal, Error **errp)
+{
+int ret;
+
+assert(length);
+if (nbd_drop(client->ioc, length, errp) < 0) {
+return -EIO;
+}
+ret = nbd_negotiate_send_rep_err(client->ioc, NBD_REP_ERR_INVALID,
+ option, errp,
+ "option '%s' should have zero length",
+ nbd_opt_lookup(option));
+if (fatal && !ret) {
+error_setg(errp, "option '%s' should have zero length",
+   nbd_opt_lookup(option));
+return -EINVAL;
+}
+return ret;
+}
+
  /* nbd_negotiate_options
   * Process all NBD_OPT_* client option commands, during fixed newstyle
   * negotiation.
@@ -674,7 +681,13 @@ static int nbd_negotiate_options(NBDClient *client, 
uint16_t myflags,
  }
  switch (option) {
  case NBD_OPT_STARTTLS:
-tioc = nbd_negotiate_handle_starttls(client, length, errp);
+if (length) {
+/* Unconditionally drop the connection if the client
+ * can't start a TLS negotiation correctly */
+nbd_reject_length(client, length, option, true, errp);
+return -EINVAL;


why not to return nbd_reject_length's result? this EINVAL may not 

[Qemu-devel] [Bug 1728661] [NEW] qemu-io segfaults at block/qcow2.h:533

2017-10-30 Thread R.Nageswara Sastry
Public bug reported:

git is at HEAD a93ece47fd9edbd4558db24300056c9a57d3bcd4
This is on ppc64le architecture.

Re-production steps:

1. Copy the attached file named test.img to a directory
2. And customize the following command to point to the above directory and run 
the same.
# mv test.img copy.img
# qemu-io /copy.img -c "truncate 66560"

from gdb:
Program terminated with signal 11, Segmentation fault.
#0  0x10054cec in get_refblock_offset (s=0x32ca3210, 
offset=9223372036854775296) at ./block/qcow2.h:533
533 return s->refcount_table[index] & REFT_OFFSET_MASK;
Missing separate debuginfos, use: debuginfo-install 
cyrus-sasl-lib-2.1.26-21.el7.ppc64le glib2-2.50.3-3.el7.ppc64le 
glibc-2.17-196.el7.ppc64le gmp-6.0.0-15.el7.ppc64le gnutls-3.3.26-9.el7.ppc64le 
keyutils-libs-1.5.8-3.el7.ppc64le krb5-libs-1.15.1-8.el7.ppc64le 
libaio-0.3.109-13.el7.ppc64le libcom_err-1.42.9-10.el7.ppc64le 
libcurl-7.29.0-42.el7.ppc64le libffi-3.0.13-18.el7.ppc64le 
libgcc-4.8.5-16.el7_4.1.ppc64le libidn-1.28-4.el7.ppc64le 
libselinux-2.5-11.el7.ppc64le libssh2-1.4.3-10.el7_2.1.ppc64le 
libstdc++-4.8.5-16.el7_4.1.ppc64le libtasn1-4.10-1.el7.ppc64le 
nettle-2.7.1-8.el7.ppc64le nspr-4.13.1-1.0.el7_3.ppc64le 
nss-3.28.4-15.el7_4.ppc64le nss-softokn-freebl-3.28.3-8.el7_4.ppc64le 
nss-util-3.28.4-3.el7.ppc64le openldap-2.4.44-5.el7.ppc64le 
openssl-libs-1.0.2k-8.el7.ppc64le p11-kit-0.23.5-3.el7.ppc64le 
pcre-8.32-17.el7.ppc64le zlib-1.2.7-17.el7.ppc64le
(gdb) bt
#0  0x10054cec in get_refblock_offset (s=0x32ca3210, 
offset=9223372036854775296) at ./block/qcow2.h:533
#1  0x1005df4c in qcow2_discard_refcount_block (bs=0x32c96f60, 
discard_block_offs=9223372036854775296) at block/qcow2-refcount.c:3070
#2  0x1005e5c4 in qcow2_shrink_reftable (bs=0x32c96f60) at 
block/qcow2-refcount.c:3169
#3  0x10051184 in qcow2_truncate (bs=0x32c96f60, offset=66560, 
prealloc=PREALLOC_MODE_OFF, errp=0x3fffc051ecd8) at block/qcow2.c:3155
#4  0x10016480 in bdrv_truncate (child=0x32ca6270, offset=66560, 
prealloc=PREALLOC_MODE_OFF, errp=0x3fffc051ecd8) at block.c:3585
#5  0x10090800 in blk_truncate (blk=0x32c89410, offset=66560, 
prealloc=PREALLOC_MODE_OFF, errp=0x3fffc051ecd8) at block/block-backend.c:1845
#6  0x10023028 in truncate_f (blk=0x32c89410, argc=2, argv=0x32c685a0) 
at qemu-io-cmds.c:1580
#7  0x1001e648 in command (blk=0x32c89410, ct=0x32c96e30, argc=2, 
argv=0x32c685a0) at qemu-io-cmds.c:117
#8  0x10024d64 in qemuio_command (blk=0x32c89410, cmd=0x3fffc052f66e 
"truncate 66560") at qemu-io-cmds.c:2291
#9  0x1000b540 in command_loop () at qemu-io.c:374
#10 0x1000c05c in main (argc=4, argv=0x3fffc051f618) at qemu-io.c:630
(gdb) bt full
#0  0x10054cec in get_refblock_offset (s=0x32ca3210, 
offset=9223372036854775296) at ./block/qcow2.h:533
index = 4294967295
#1  0x1005df4c in qcow2_discard_refcount_block (bs=0x32c96f60, 
discard_block_offs=9223372036854775296) at block/qcow2-refcount.c:3070
s = 0x32ca3210
refblock_offs = 852111520
cluster_index = 16384
block_index = 3226593616
refblock = 0x32cb9570
ret = 16384
__PRETTY_FUNCTION__ = "qcow2_discard_refcount_block"
#2  0x1005e5c4 in qcow2_shrink_reftable (bs=0x32c96f60) at 
block/qcow2-refcount.c:3169
s = 0x32ca3210
reftable_tmp = 0x32cb9570
i = 0
ret = 0
#3  0x10051184 in qcow2_truncate (bs=0x32c96f60, offset=66560, 
prealloc=PREALLOC_MODE_OFF, errp=0x3fffc051ecd8) at block/qcow2.c:3155
last_cluster = 70367675804416
old_file_size = 70367675804416
s = 0x32ca3210
old_length = 1048576
new_l1_size = 1
ret = 0
__func__ = "qcow2_truncate"
__PRETTY_FUNCTION__ = "qcow2_truncate"
__FUNCTION__ = "qcow2_truncate"
#4  0x10016480 in bdrv_truncate (child=0x32ca6270, offset=66560, 
prealloc=PREALLOC_MODE_OFF, errp=0x3fffc051ecd8) at block.c:3585
bs = 0x32c96f60
drv = 0x102036f0 
ret = 16383
__PRETTY_FUNCTION__ = "bdrv_truncate"
__func__ = "bdrv_truncate"
#5  0x10090800 in blk_truncate (blk=0x32c89410, offset=66560, 
prealloc=PREALLOC_MODE_OFF, errp=0x3fffc051ecd8) at block/block-backend.c:1845
__func__ = "blk_truncate"
#6  0x10023028 in truncate_f (blk=0x32c89410, argc=2, argv=0x32c685a0) 
at qemu-io-cmds.c:1580
local_err = 0x0
offset = 66560
ret = 0
#7  0x1001e648 in command (blk=0x32c89410, ct=0x32c96e30, argc=2, 
argv=0x32c685a0) at qemu-io-cmds.c:117
cmd = 0x32c684c0 "truncate"
#8  0x10024d64 in qemuio_command (blk=0x32c89410, cmd=0x3fffc052f66e 
"truncate 66560") at qemu-io-cmds.c:2291
ctx = 0x32c924d0
input = 0x32c684c0 "truncate"
ct = 0x32c96e30
v = 0x32c685a0
c = 2
done = false
#9  0x1000b540 in command_loop () at qemu-io.c:374
   

[Qemu-devel] [Bug 1728661] Re: qemu-io segfaults at block/qcow2.h:533

2017-10-30 Thread R.Nageswara Sastry
** Attachment added: "test.img tarred"
   
https://bugs.launchpad.net/qemu/+bug/1728661/+attachment/5000235/+files/test.img_1728661.tar.gz

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1728661

Title:
  qemu-io segfaults at block/qcow2.h:533

Status in QEMU:
  New

Bug description:
  git is at HEAD a93ece47fd9edbd4558db24300056c9a57d3bcd4
  This is on ppc64le architecture.

  Re-production steps:

  1. Copy the attached file named test.img to a directory
  2. And customize the following command to point to the above directory and 
run the same.
  # mv test.img copy.img
  # qemu-io /copy.img -c "truncate 66560"

  from gdb:
  Program terminated with signal 11, Segmentation fault.
  #0  0x10054cec in get_refblock_offset (s=0x32ca3210, 
offset=9223372036854775296) at ./block/qcow2.h:533
  533   return s->refcount_table[index] & REFT_OFFSET_MASK;
  Missing separate debuginfos, use: debuginfo-install 
cyrus-sasl-lib-2.1.26-21.el7.ppc64le glib2-2.50.3-3.el7.ppc64le 
glibc-2.17-196.el7.ppc64le gmp-6.0.0-15.el7.ppc64le gnutls-3.3.26-9.el7.ppc64le 
keyutils-libs-1.5.8-3.el7.ppc64le krb5-libs-1.15.1-8.el7.ppc64le 
libaio-0.3.109-13.el7.ppc64le libcom_err-1.42.9-10.el7.ppc64le 
libcurl-7.29.0-42.el7.ppc64le libffi-3.0.13-18.el7.ppc64le 
libgcc-4.8.5-16.el7_4.1.ppc64le libidn-1.28-4.el7.ppc64le 
libselinux-2.5-11.el7.ppc64le libssh2-1.4.3-10.el7_2.1.ppc64le 
libstdc++-4.8.5-16.el7_4.1.ppc64le libtasn1-4.10-1.el7.ppc64le 
nettle-2.7.1-8.el7.ppc64le nspr-4.13.1-1.0.el7_3.ppc64le 
nss-3.28.4-15.el7_4.ppc64le nss-softokn-freebl-3.28.3-8.el7_4.ppc64le 
nss-util-3.28.4-3.el7.ppc64le openldap-2.4.44-5.el7.ppc64le 
openssl-libs-1.0.2k-8.el7.ppc64le p11-kit-0.23.5-3.el7.ppc64le 
pcre-8.32-17.el7.ppc64le zlib-1.2.7-17.el7.ppc64le
  (gdb) bt
  #0  0x10054cec in get_refblock_offset (s=0x32ca3210, 
offset=9223372036854775296) at ./block/qcow2.h:533
  #1  0x1005df4c in qcow2_discard_refcount_block (bs=0x32c96f60, 
discard_block_offs=9223372036854775296) at block/qcow2-refcount.c:3070
  #2  0x1005e5c4 in qcow2_shrink_reftable (bs=0x32c96f60) at 
block/qcow2-refcount.c:3169
  #3  0x10051184 in qcow2_truncate (bs=0x32c96f60, offset=66560, 
prealloc=PREALLOC_MODE_OFF, errp=0x3fffc051ecd8) at block/qcow2.c:3155
  #4  0x10016480 in bdrv_truncate (child=0x32ca6270, offset=66560, 
prealloc=PREALLOC_MODE_OFF, errp=0x3fffc051ecd8) at block.c:3585
  #5  0x10090800 in blk_truncate (blk=0x32c89410, offset=66560, 
prealloc=PREALLOC_MODE_OFF, errp=0x3fffc051ecd8) at block/block-backend.c:1845
  #6  0x10023028 in truncate_f (blk=0x32c89410, argc=2, 
argv=0x32c685a0) at qemu-io-cmds.c:1580
  #7  0x1001e648 in command (blk=0x32c89410, ct=0x32c96e30, argc=2, 
argv=0x32c685a0) at qemu-io-cmds.c:117
  #8  0x10024d64 in qemuio_command (blk=0x32c89410, cmd=0x3fffc052f66e 
"truncate 66560") at qemu-io-cmds.c:2291
  #9  0x1000b540 in command_loop () at qemu-io.c:374
  #10 0x1000c05c in main (argc=4, argv=0x3fffc051f618) at qemu-io.c:630
  (gdb) bt full
  #0  0x10054cec in get_refblock_offset (s=0x32ca3210, 
offset=9223372036854775296) at ./block/qcow2.h:533
  index = 4294967295
  #1  0x1005df4c in qcow2_discard_refcount_block (bs=0x32c96f60, 
discard_block_offs=9223372036854775296) at block/qcow2-refcount.c:3070
  s = 0x32ca3210
  refblock_offs = 852111520
  cluster_index = 16384
  block_index = 3226593616
  refblock = 0x32cb9570
  ret = 16384
  __PRETTY_FUNCTION__ = "qcow2_discard_refcount_block"
  #2  0x1005e5c4 in qcow2_shrink_reftable (bs=0x32c96f60) at 
block/qcow2-refcount.c:3169
  s = 0x32ca3210
  reftable_tmp = 0x32cb9570
  i = 0
  ret = 0
  #3  0x10051184 in qcow2_truncate (bs=0x32c96f60, offset=66560, 
prealloc=PREALLOC_MODE_OFF, errp=0x3fffc051ecd8) at block/qcow2.c:3155
  last_cluster = 70367675804416
  old_file_size = 70367675804416
  s = 0x32ca3210
  old_length = 1048576
  new_l1_size = 1
  ret = 0
  __func__ = "qcow2_truncate"
  __PRETTY_FUNCTION__ = "qcow2_truncate"
  __FUNCTION__ = "qcow2_truncate"
  #4  0x10016480 in bdrv_truncate (child=0x32ca6270, offset=66560, 
prealloc=PREALLOC_MODE_OFF, errp=0x3fffc051ecd8) at block.c:3585
  bs = 0x32c96f60
  drv = 0x102036f0 
  ret = 16383
  __PRETTY_FUNCTION__ = "bdrv_truncate"
  __func__ = "bdrv_truncate"
  #5  0x10090800 in blk_truncate (blk=0x32c89410, offset=66560, 
prealloc=PREALLOC_MODE_OFF, errp=0x3fffc051ecd8) at block/block-backend.c:1845
  __func__ = "blk_truncate"
  #6  0x10023028 in truncate_f (blk=0x32c89410, argc=2, 
argv=0x32c685a0) at qemu-io-cmds.c:1580
  local_err = 0x0
  offset = 66560
  ret = 

Re: [Qemu-devel] QMP event missed during startup

2017-10-30 Thread Dr. David Alan Gilbert
* Ross Lagerwall (ross.lagerw...@citrix.com) wrote:
> Hi,
> 
> I have found an issue where QEMU emits the RESUME event during startup when
> it starts VM execution, but it is not possible to receive this event.
> 
> To repro this, run:
> qemu-system-i386 -m 256 -trace
> enable=monitor_protocol_event_emit,file=/tmp/out -qmp
> unix:/tmp/qmp,server,wait
> 
> QEMU will not start execution of the VM until something connects to the QMP
> socket (e.g. qmp-shell). Once connected, no event is received on the QMP
> connection but the tracepoint is hit indicating that an event has been
> emitted. I suspect that the event is emitted while the QMP client is doing
> the initial negotiation.
> 
> The reason I want to receive this event is that QEMU currently uses xenstore
> to communicate this information to the Xen toolstack (see
> xen-common.c:xen_change_state_handler) but we want to move to using QMP
> rather than xenstore for this kind of thing.
> 
> Is this a known issue or just a bug that should be fixed?

I'll leave it to Markus to say if it's a bug or not, but can't
you work around this by starting qemu with -S which leaves the guest
paused, and then continuing the guest when you have your QMP ?

Dave

> Thanks,
> -- 
> Ross Lagerwall
> 
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK



[Qemu-devel] [Bug 1728660] [NEW] qemu-io segfaults at block/io.c:2545

2017-10-30 Thread R.Nageswara Sastry
Public bug reported:

git is at HEAD a93ece47fd9edbd4558db24300056c9a57d3bcd4
This is on ppc64le architecture.

Re-production steps:

1. Copy the attached file named test.img to a directory
2. And customize the following command to point to the above directory and run 
the same.
# mv test.img copy.img
# qemu-io /copy.img -c "discard 108544 97792"

from gdb:
Program terminated with signal 11, Segmentation fault.
#0  0x100af254 in bdrv_co_pdiscard (bs=0x3ee89ad0, offset=196608, 
bytes=9728) at block/io.c:2545
2545if (bs->drv->bdrv_co_pdiscard) {
Missing separate debuginfos, use: debuginfo-install 
cyrus-sasl-lib-2.1.26-21.el7.ppc64le glib2-2.50.3-3.el7.ppc64le 
glibc-2.17-196.el7.ppc64le gmp-6.0.0-15.el7.ppc64le gnutls-3.3.26-9.el7.ppc64le 
keyutils-libs-1.5.8-3.el7.ppc64le krb5-libs-1.15.1-8.el7.ppc64le 
libaio-0.3.109-13.el7.ppc64le libcom_err-1.42.9-10.el7.ppc64le 
libcurl-7.29.0-42.el7.ppc64le libffi-3.0.13-18.el7.ppc64le 
libgcc-4.8.5-16.el7_4.1.ppc64le libidn-1.28-4.el7.ppc64le 
libselinux-2.5-11.el7.ppc64le libssh2-1.4.3-10.el7_2.1.ppc64le 
libstdc++-4.8.5-16.el7_4.1.ppc64le libtasn1-4.10-1.el7.ppc64le 
nettle-2.7.1-8.el7.ppc64le nspr-4.13.1-1.0.el7_3.ppc64le 
nss-3.28.4-15.el7_4.ppc64le nss-softokn-freebl-3.28.3-8.el7_4.ppc64le 
nss-util-3.28.4-3.el7.ppc64le openldap-2.4.44-5.el7.ppc64le 
openssl-libs-1.0.2k-8.el7.ppc64le p11-kit-0.23.5-3.el7.ppc64le 
pcre-8.32-17.el7.ppc64le zlib-1.2.7-17.el7.ppc64le
(gdb) bt
#0  0x100af254 in bdrv_co_pdiscard (bs=0x3ee89ad0, offset=196608, 
bytes=9728) at block/io.c:2545
#1  0x1008f260 in blk_co_pdiscard (blk=0x3ee79410, offset=108544, 
bytes=97792) at block/block-backend.c:1447
#2  0x10090884 in blk_pdiscard_entry (opaque=0x3fffd7402c58) at 
block/block-backend.c:1851
#3  0x101aa444 in coroutine_trampoline (i0=1055521728, i1=0) at 
util/coroutine-ucontext.c:79
#4  0x3fff7a3d2b9c in makecontext () from /lib64/libc.so.6
#5  0x in ?? ()
(gdb) bt full
#0  0x100af254 in bdrv_co_pdiscard (bs=0x3ee89ad0, offset=196608, 
bytes=9728) at block/io.c:2545
num = 9728
req = {bs = 0x3ee89ad0, offset = 108544, bytes = 97792, type = 
BDRV_TRACKED_DISCARD, serialising = false, overlap_offset = 108544,
  overlap_bytes = 97792, list = {le_next = 0x0, le_prev = 0x3ee8cd48}, 
co = 0x3ee9fbc0, wait_queue = {entries = {sqh_first = 0x0,
  sqh_last = 0x3fff7823fe10}}, waiting_for = 0x0}
max_pdiscard = 2147467264
ret = 0
head = 0
tail = 9728
align = 16384
__PRETTY_FUNCTION__ = "bdrv_co_pdiscard"
#1  0x1008f260 in blk_co_pdiscard (blk=0x3ee79410, offset=108544, 
bytes=97792) at block/block-backend.c:1447
ret = 0
#2  0x10090884 in blk_pdiscard_entry (opaque=0x3fffd7402c58) at 
block/block-backend.c:1851
rwco = 0x3fffd7402c58
#3  0x101aa444 in coroutine_trampoline (i0=1055521728, i1=0) at 
util/coroutine-ucontext.c:79
arg = {p = 0x3ee9fbc0, i = {1055521728, 0}}
self = 0x3ee9fbc0
co = 0x3ee9fbc0
#4  0x3fff7a3d2b9c in makecontext () from /lib64/libc.so.6
No symbol table info available.
#5  0x in ?? ()
No symbol table info available.

** Affects: qemu
 Importance: Undecided
 Status: New

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1728660

Title:
  qemu-io segfaults at block/io.c:2545

Status in QEMU:
  New

Bug description:
  git is at HEAD a93ece47fd9edbd4558db24300056c9a57d3bcd4
  This is on ppc64le architecture.

  Re-production steps:

  1. Copy the attached file named test.img to a directory
  2. And customize the following command to point to the above directory and 
run the same.
  # mv test.img copy.img
  # qemu-io /copy.img -c "discard 108544 97792"

  from gdb:
  Program terminated with signal 11, Segmentation fault.
  #0  0x100af254 in bdrv_co_pdiscard (bs=0x3ee89ad0, offset=196608, 
bytes=9728) at block/io.c:2545
  2545  if (bs->drv->bdrv_co_pdiscard) {
  Missing separate debuginfos, use: debuginfo-install 
cyrus-sasl-lib-2.1.26-21.el7.ppc64le glib2-2.50.3-3.el7.ppc64le 
glibc-2.17-196.el7.ppc64le gmp-6.0.0-15.el7.ppc64le gnutls-3.3.26-9.el7.ppc64le 
keyutils-libs-1.5.8-3.el7.ppc64le krb5-libs-1.15.1-8.el7.ppc64le 
libaio-0.3.109-13.el7.ppc64le libcom_err-1.42.9-10.el7.ppc64le 
libcurl-7.29.0-42.el7.ppc64le libffi-3.0.13-18.el7.ppc64le 
libgcc-4.8.5-16.el7_4.1.ppc64le libidn-1.28-4.el7.ppc64le 
libselinux-2.5-11.el7.ppc64le libssh2-1.4.3-10.el7_2.1.ppc64le 
libstdc++-4.8.5-16.el7_4.1.ppc64le libtasn1-4.10-1.el7.ppc64le 
nettle-2.7.1-8.el7.ppc64le nspr-4.13.1-1.0.el7_3.ppc64le 
nss-3.28.4-15.el7_4.ppc64le nss-softokn-freebl-3.28.3-8.el7_4.ppc64le 
nss-util-3.28.4-3.el7.ppc64le openldap-2.4.44-5.el7.ppc64le 
openssl-libs-1.0.2k-8.el7.ppc64le p11-kit-0.23.5-3.el7.ppc64le 
pcre-8.32-17.el7.ppc64le 

[Qemu-devel] [Bug 1728657] Re: qemu-io: block/qcow2-cluster.c:1109: handle_copied: Assertion failed

2017-10-30 Thread R.Nageswara Sastry
** Attachment added: "test.img tarred"
   
https://bugs.launchpad.net/qemu/+bug/1728657/+attachment/5000204/+files/test.img_1728657.tar.gz

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1728657

Title:
  qemu-io: block/qcow2-cluster.c:1109: handle_copied: Assertion failed

Status in QEMU:
  New

Bug description:
  git is at HEAD a93ece47fd9edbd4558db24300056c9a57d3bcd4
  This is on ppc64le architecture.

  Re-production steps:

  1. Copy the attached file test.img to a directory
  2. And customize the following command to point to the above directory and 
run the same.
  # mv test.img copy.img
  # qemu-io /copy.img -c "write 4105728 2791936"

  from gdb:
  (gdb) bt
  #0  0x3fffb17eeff0 in raise () from /lib64/libc.so.6
  #1  0x3fffb17f136c in abort () from /lib64/libc.so.6
  #2  0x3fffb17e4c44 in __assert_fail_base () from /lib64/libc.so.6
  #3  0x3fffb17e4d34 in __assert_fail () from /lib64/libc.so.6
  #4  0x100631fc in handle_copied (bs=0x42ba9ad0, guest_offset=4210688, 
host_offset=0x3fffaf4bfab0, bytes=0x3fffaf4bfab8, m=0x3fffaf4bfb60)
  at block/qcow2-cluster.c:1108
  #5  0x10064118 in qcow2_alloc_cluster_offset (bs=0x42ba9ad0, 
offset=4194304, bytes=0x3fffaf4bfb4c, host_offset=0x3fffaf4bfb58, 
m=0x3fffaf4bfb60)
  at block/qcow2-cluster.c:1498
  #6  0x1004d3f4 in qcow2_co_pwritev (bs=0x42ba9ad0, offset=4194304, 
bytes=2703360, qiov=0x3fffc7cc9ee0, flags=0) at block/qcow2.c:1919
  #7  0x100a9648 in bdrv_driver_pwritev (bs=0x42ba9ad0, offset=4105728, 
bytes=2791936, qiov=0x3fffc7cc9ee0, flags=16) at block/io.c:898
  #8  0x100ab630 in bdrv_aligned_pwritev (child=0x42bb8250, 
req=0x3fffaf4bfdd8, offset=4105728, bytes=2791936, align=1, 
qiov=0x3fffc7cc9ee0, flags=16)
  at block/io.c:1440
  #9  0x100ac4ac in bdrv_co_pwritev (child=0x42bb8250, offset=4105728, 
bytes=2791936, qiov=0x3fffc7cc9ee0, flags=BDRV_REQ_FUA) at block/io.c:1691
  #10 0x1008da0c in blk_co_pwritev (blk=0x42b99410, offset=4105728, 
bytes=2791936, qiov=0x3fffc7cc9ee0, flags=BDRV_REQ_FUA) at 
block/block-backend.c:1085
  #11 0x1008db68 in blk_write_entry (opaque=0x3fffc7cc9ef8) at 
block/block-backend.c:1110
  #12 0x101aa444 in coroutine_trampoline (i0=1119572144, i1=0) at 
util/coroutine-ucontext.c:79
  #13 0x3fffb1802b9c in makecontext () from /lib64/libc.so.6
  #14 0x in ?? ()
  (gdb) bt full
  #0  0x3fffb17eeff0 in raise () from /lib64/libc.so.6
  No symbol table info available.
  #1  0x3fffb17f136c in abort () from /lib64/libc.so.6
  No symbol table info available.
  #2  0x3fffb17e4c44 in __assert_fail_base () from /lib64/libc.so.6
  No symbol table info available.
  #3  0x3fffb17e4d34 in __assert_fail () from /lib64/libc.so.6
  No symbol table info available.
  #4  0x100631fc in handle_copied (bs=0x42ba9ad0, guest_offset=4210688, 
host_offset=0x3fffaf4bfab0, bytes=0x3fffaf4bfab8, m=0x3fffaf4bfb60)
  at block/qcow2-cluster.c:1108
  s = 0x42bb5d80
  l2_index = 0
  cluster_offset = 4210688
  l2_table = 0x0
  nb_clusters = 1119575424
  keep_clusters = 0
  ret = 0
  __PRETTY_FUNCTION__ = "handle_copied"
  #5  0x10064118 in qcow2_alloc_cluster_offset (bs=0x42ba9ad0, 
offset=4194304, bytes=0x3fffaf4bfb4c, host_offset=0x3fffaf4bfb58, 
m=0x3fffaf4bfb60)
  at block/qcow2-cluster.c:1498
  s = 0x42bb5d80
  start = 4210688
  remaining = 2686976
  cluster_offset = 4294983168
  cur_bytes = 2686976
  ret = 0
  __PRETTY_FUNCTION__ = "qcow2_alloc_cluster_offset"
  #6  0x1004d3f4 in qcow2_co_pwritev (bs=0x42ba9ad0, offset=4194304, 
bytes=2703360, qiov=0x3fffc7cc9ee0, flags=0) at block/qcow2.c:1919
  s = 0x42bb5d80
  offset_in_cluster = 0
  ret = 0
  cur_bytes = 2703360
  cluster_offset = 4294950912
  hd_qiov = {iov = 0x42b74fb0, niov = 1, nalloc = 1, size = 16384}
  bytes_done = 88576
  cluster_data = 0x0
  l2meta = 0x42bb5d20
  __PRETTY_FUNCTION__ = "qcow2_co_pwritev"
  #7  0x100a9648 in bdrv_driver_pwritev (bs=0x42ba9ad0, offset=4105728, 
bytes=2791936, qiov=0x3fffc7cc9ee0, flags=16) at block/io.c:898
  drv = 0x102036f0 
  sector_num = 1119538320
  nb_sectors = 2841469356
  ret = 2116577536
  __PRETTY_FUNCTION__ = "bdrv_driver_pwritev"
  #8  0x100ab630 in bdrv_aligned_pwritev (child=0x42bb8250, 
req=0x3fffaf4bfdd8, offset=4105728, bytes=2791936, align=1, 
qiov=0x3fffc7cc9ee0, flags=16)
  at block/io.c:1440
  bs = 0x42ba9ad0
  drv = 0x102036f0 
  waited = false
  ret = 0
  ---Type  to continue, or q  to quit---
  end_sector = 13472
  bytes_remaining = 2791936
  max_transfer = 

[Qemu-devel] [Bug 1728657] [NEW] qemu-io: block/qcow2-cluster.c:1109: handle_copied: Assertion failed

2017-10-30 Thread R.Nageswara Sastry
Public bug reported:

git is at HEAD a93ece47fd9edbd4558db24300056c9a57d3bcd4
This is on ppc64le architecture.

Re-production steps:

1. Copy the attached file test.img to a directory
2. And customize the following command to point to the above directory and run 
the same.
# mv test.img copy.img
# qemu-io /copy.img -c "write 4105728 2791936"

from gdb:
(gdb) bt
#0  0x3fffb17eeff0 in raise () from /lib64/libc.so.6
#1  0x3fffb17f136c in abort () from /lib64/libc.so.6
#2  0x3fffb17e4c44 in __assert_fail_base () from /lib64/libc.so.6
#3  0x3fffb17e4d34 in __assert_fail () from /lib64/libc.so.6
#4  0x100631fc in handle_copied (bs=0x42ba9ad0, guest_offset=4210688, 
host_offset=0x3fffaf4bfab0, bytes=0x3fffaf4bfab8, m=0x3fffaf4bfb60)
at block/qcow2-cluster.c:1108
#5  0x10064118 in qcow2_alloc_cluster_offset (bs=0x42ba9ad0, 
offset=4194304, bytes=0x3fffaf4bfb4c, host_offset=0x3fffaf4bfb58, 
m=0x3fffaf4bfb60)
at block/qcow2-cluster.c:1498
#6  0x1004d3f4 in qcow2_co_pwritev (bs=0x42ba9ad0, offset=4194304, 
bytes=2703360, qiov=0x3fffc7cc9ee0, flags=0) at block/qcow2.c:1919
#7  0x100a9648 in bdrv_driver_pwritev (bs=0x42ba9ad0, offset=4105728, 
bytes=2791936, qiov=0x3fffc7cc9ee0, flags=16) at block/io.c:898
#8  0x100ab630 in bdrv_aligned_pwritev (child=0x42bb8250, 
req=0x3fffaf4bfdd8, offset=4105728, bytes=2791936, align=1, 
qiov=0x3fffc7cc9ee0, flags=16)
at block/io.c:1440
#9  0x100ac4ac in bdrv_co_pwritev (child=0x42bb8250, offset=4105728, 
bytes=2791936, qiov=0x3fffc7cc9ee0, flags=BDRV_REQ_FUA) at block/io.c:1691
#10 0x1008da0c in blk_co_pwritev (blk=0x42b99410, offset=4105728, 
bytes=2791936, qiov=0x3fffc7cc9ee0, flags=BDRV_REQ_FUA) at 
block/block-backend.c:1085
#11 0x1008db68 in blk_write_entry (opaque=0x3fffc7cc9ef8) at 
block/block-backend.c:1110
#12 0x101aa444 in coroutine_trampoline (i0=1119572144, i1=0) at 
util/coroutine-ucontext.c:79
#13 0x3fffb1802b9c in makecontext () from /lib64/libc.so.6
#14 0x in ?? ()
(gdb) bt full
#0  0x3fffb17eeff0 in raise () from /lib64/libc.so.6
No symbol table info available.
#1  0x3fffb17f136c in abort () from /lib64/libc.so.6
No symbol table info available.
#2  0x3fffb17e4c44 in __assert_fail_base () from /lib64/libc.so.6
No symbol table info available.
#3  0x3fffb17e4d34 in __assert_fail () from /lib64/libc.so.6
No symbol table info available.
#4  0x100631fc in handle_copied (bs=0x42ba9ad0, guest_offset=4210688, 
host_offset=0x3fffaf4bfab0, bytes=0x3fffaf4bfab8, m=0x3fffaf4bfb60)
at block/qcow2-cluster.c:1108
s = 0x42bb5d80
l2_index = 0
cluster_offset = 4210688
l2_table = 0x0
nb_clusters = 1119575424
keep_clusters = 0
ret = 0
__PRETTY_FUNCTION__ = "handle_copied"
#5  0x10064118 in qcow2_alloc_cluster_offset (bs=0x42ba9ad0, 
offset=4194304, bytes=0x3fffaf4bfb4c, host_offset=0x3fffaf4bfb58, 
m=0x3fffaf4bfb60)
at block/qcow2-cluster.c:1498
s = 0x42bb5d80
start = 4210688
remaining = 2686976
cluster_offset = 4294983168
cur_bytes = 2686976
ret = 0
__PRETTY_FUNCTION__ = "qcow2_alloc_cluster_offset"
#6  0x1004d3f4 in qcow2_co_pwritev (bs=0x42ba9ad0, offset=4194304, 
bytes=2703360, qiov=0x3fffc7cc9ee0, flags=0) at block/qcow2.c:1919
s = 0x42bb5d80
offset_in_cluster = 0
ret = 0
cur_bytes = 2703360
cluster_offset = 4294950912
hd_qiov = {iov = 0x42b74fb0, niov = 1, nalloc = 1, size = 16384}
bytes_done = 88576
cluster_data = 0x0
l2meta = 0x42bb5d20
__PRETTY_FUNCTION__ = "qcow2_co_pwritev"
#7  0x100a9648 in bdrv_driver_pwritev (bs=0x42ba9ad0, offset=4105728, 
bytes=2791936, qiov=0x3fffc7cc9ee0, flags=16) at block/io.c:898
drv = 0x102036f0 
sector_num = 1119538320
nb_sectors = 2841469356
ret = 2116577536
__PRETTY_FUNCTION__ = "bdrv_driver_pwritev"
#8  0x100ab630 in bdrv_aligned_pwritev (child=0x42bb8250, 
req=0x3fffaf4bfdd8, offset=4105728, bytes=2791936, align=1, 
qiov=0x3fffc7cc9ee0, flags=16)
at block/io.c:1440
bs = 0x42ba9ad0
drv = 0x102036f0 
waited = false
ret = 0
---Type  to continue, or q  to quit---
end_sector = 13472
bytes_remaining = 2791936
max_transfer = 2147483647
__PRETTY_FUNCTION__ = "bdrv_aligned_pwritev"
#9  0x100ac4ac in bdrv_co_pwritev (child=0x42bb8250, offset=4105728, 
bytes=2791936, qiov=0x3fffc7cc9ee0, flags=BDRV_REQ_FUA) at block/io.c:1691
bs = 0x42ba9ad0
req = {bs = 0x42ba9ad0, offset = 4105728, bytes = 2791936, type = 
BDRV_TRACKED_WRITE, serialising = false, overlap_offset = 4105728,
  overlap_bytes = 2791936, list = {le_next = 0x0, le_prev = 
0x42bacd48}, co = 0x42bb50b0, wait_queue = {entries = {sqh_first = 0x0,
  sqh_last = 

[Qemu-devel] [Bug 1728660] Re: qemu-io segfaults at block/io.c:2545

2017-10-30 Thread R.Nageswara Sastry
** Attachment added: "test.img tarred"
   
https://bugs.launchpad.net/qemu/+bug/1728660/+attachment/5000223/+files/test.img_1728660.tar.gz

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1728660

Title:
  qemu-io segfaults at block/io.c:2545

Status in QEMU:
  New

Bug description:
  git is at HEAD a93ece47fd9edbd4558db24300056c9a57d3bcd4
  This is on ppc64le architecture.

  Re-production steps:

  1. Copy the attached file named test.img to a directory
  2. And customize the following command to point to the above directory and 
run the same.
  # mv test.img copy.img
  # qemu-io /copy.img -c "discard 108544 97792"

  from gdb:
  Program terminated with signal 11, Segmentation fault.
  #0  0x100af254 in bdrv_co_pdiscard (bs=0x3ee89ad0, offset=196608, 
bytes=9728) at block/io.c:2545
  2545  if (bs->drv->bdrv_co_pdiscard) {
  Missing separate debuginfos, use: debuginfo-install 
cyrus-sasl-lib-2.1.26-21.el7.ppc64le glib2-2.50.3-3.el7.ppc64le 
glibc-2.17-196.el7.ppc64le gmp-6.0.0-15.el7.ppc64le gnutls-3.3.26-9.el7.ppc64le 
keyutils-libs-1.5.8-3.el7.ppc64le krb5-libs-1.15.1-8.el7.ppc64le 
libaio-0.3.109-13.el7.ppc64le libcom_err-1.42.9-10.el7.ppc64le 
libcurl-7.29.0-42.el7.ppc64le libffi-3.0.13-18.el7.ppc64le 
libgcc-4.8.5-16.el7_4.1.ppc64le libidn-1.28-4.el7.ppc64le 
libselinux-2.5-11.el7.ppc64le libssh2-1.4.3-10.el7_2.1.ppc64le 
libstdc++-4.8.5-16.el7_4.1.ppc64le libtasn1-4.10-1.el7.ppc64le 
nettle-2.7.1-8.el7.ppc64le nspr-4.13.1-1.0.el7_3.ppc64le 
nss-3.28.4-15.el7_4.ppc64le nss-softokn-freebl-3.28.3-8.el7_4.ppc64le 
nss-util-3.28.4-3.el7.ppc64le openldap-2.4.44-5.el7.ppc64le 
openssl-libs-1.0.2k-8.el7.ppc64le p11-kit-0.23.5-3.el7.ppc64le 
pcre-8.32-17.el7.ppc64le zlib-1.2.7-17.el7.ppc64le
  (gdb) bt
  #0  0x100af254 in bdrv_co_pdiscard (bs=0x3ee89ad0, offset=196608, 
bytes=9728) at block/io.c:2545
  #1  0x1008f260 in blk_co_pdiscard (blk=0x3ee79410, offset=108544, 
bytes=97792) at block/block-backend.c:1447
  #2  0x10090884 in blk_pdiscard_entry (opaque=0x3fffd7402c58) at 
block/block-backend.c:1851
  #3  0x101aa444 in coroutine_trampoline (i0=1055521728, i1=0) at 
util/coroutine-ucontext.c:79
  #4  0x3fff7a3d2b9c in makecontext () from /lib64/libc.so.6
  #5  0x in ?? ()
  (gdb) bt full
  #0  0x100af254 in bdrv_co_pdiscard (bs=0x3ee89ad0, offset=196608, 
bytes=9728) at block/io.c:2545
  num = 9728
  req = {bs = 0x3ee89ad0, offset = 108544, bytes = 97792, type = 
BDRV_TRACKED_DISCARD, serialising = false, overlap_offset = 108544,
overlap_bytes = 97792, list = {le_next = 0x0, le_prev = 
0x3ee8cd48}, co = 0x3ee9fbc0, wait_queue = {entries = {sqh_first = 0x0,
sqh_last = 0x3fff7823fe10}}, waiting_for = 0x0}
  max_pdiscard = 2147467264
  ret = 0
  head = 0
  tail = 9728
  align = 16384
  __PRETTY_FUNCTION__ = "bdrv_co_pdiscard"
  #1  0x1008f260 in blk_co_pdiscard (blk=0x3ee79410, offset=108544, 
bytes=97792) at block/block-backend.c:1447
  ret = 0
  #2  0x10090884 in blk_pdiscard_entry (opaque=0x3fffd7402c58) at 
block/block-backend.c:1851
  rwco = 0x3fffd7402c58
  #3  0x101aa444 in coroutine_trampoline (i0=1055521728, i1=0) at 
util/coroutine-ucontext.c:79
  arg = {p = 0x3ee9fbc0, i = {1055521728, 0}}
  self = 0x3ee9fbc0
  co = 0x3ee9fbc0
  #4  0x3fff7a3d2b9c in makecontext () from /lib64/libc.so.6
  No symbol table info available.
  #5  0x in ?? ()
  No symbol table info available.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1728660/+subscriptions



Re: [Qemu-devel] [PATCH v1 0/5][RFC] Refactoring of AIS support

2017-10-30 Thread Christian Borntraeger

On 10/30/2017 05:59 PM, Cornelia Huck wrote:
> On Mon, 30 Oct 2017 14:48:23 +0100
> Christian Borntraeger  wrote:
> 
> 
>> FWIW, I am testing a guest patch that enables zPCI without AIS. Its as 
>> simple as
>>
>>
>> diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
>> index 7b30af5..9b24836 100644
>> --- a/arch/s390/pci/pci.c
>> +++ b/arch/s390/pci/pci.c
>> @@ -953,7 +953,7 @@ static int __init pci_base_init(void)
>> if (!s390_pci_probe)
>> return 0;
>>  
>> -   if (!test_facility(69) || !test_facility(71) || !test_facility(72))
>> +   if (!test_facility(69) || !test_facility(71))
>> return 0;
>>  
>> rc = zpci_debug_init();
>> diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
>> index ea34086..61f8c82 100644
>> --- a/arch/s390/pci/pci_insn.c
>> +++ b/arch/s390/pci/pci_insn.c
>> @@ -7,6 +7,7 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>  #include 
>>  #include 
>>  #include 
>> @@ -93,6 +94,8 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
>>  /* Set Interruption Controls */
>>  void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc)
>>  {
>> +   if (!test_facility(72))
>> +   return;
>> asm volatile (
>> "   .insn   rsy,0xebd1,%[ctl],%[isc],%[u]\n"
>> : : [ctl] "d" (ctl), [isc] "d" (isc << 27), [u] "Q" 
>> (*unused));
>>
> 
> Sounds good. Presumably this makes the adapter interrupt handling work
> as for virtio (and qdio)? Is there any ais-less pci hardware out in the
> wild?
> 

ais is z specific, not PCI specific. So PCI cards should not care as far as I 
can tell.




Re: [Qemu-devel] drive_add: file names with spaces

2017-10-30 Thread Dr. David Alan Gilbert
* Programmingkid (programmingk...@gmail.com) wrote:
> 
> > On Oct 30, 2017, at 10:32 AM, Dr. David Alan Gilbert  
> > wrote:
> > 
> > * Programmingkid (programmingk...@gmail.com) wrote:
> >> 
> >>> On Oct 29, 2017, at 6:21 AM, Dr. David Alan Gilbert  
> >>> wrote:
> >>> 
> >>> * Eric Blake (ebl...@redhat.com) wrote:
>  On 10/28/2017 06:44 AM, Programmingkid wrote:
> > I'm trying to use an image file that has spaces in its file name (and 
> > sometimes in the path) to be used as a USB flash drive. When I try 
> > adding the image file using the drive_add command I see this error: 
> > drive_add: extraneous characters at the end of the line
> > 
> > I have tried using single and double quotes around the file name but 
> > this does not help. Is there a way to use files that have spaces in 
> > it's name with drive_add?
>  
>  Not in HMP, which lacks quoting.  (Unless you want to patch HMP to learn
>  quoting)
> >>> 
> >>> I think it's already got that code at least in some places.
> >>> monitor_parse_arguments calls get_str, and get_str understands " wrapped
> >>> arguments.
> >>> 
> >>> drive_add 1 "file=foo bar"
> >>> 
> >>> gives me:
> >>> Could not open 'foo bar': No such file or directory
> >>> 
> >>> so I think that's doing the right thing.
> >>> 
> >>> Dave
> >>> 
>  But using spaces in filenames works fine in QMP.
>  
>  -- 
>  Eric Blake, Principal Software Engineer
>  Red Hat, Inc.   +1-919-301-3266
>  Virtualization:  qemu.org | libvirt.org
>  
> >>> 
> >>> 
> >>> --
> >>> Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK
> >> 
> >> Thank you so much for this info. It solved my problem :)
> > 
> > Now, the real challenge is how to deal with a filename with a comma in;
> >   drive_add 1 "file=foo,bar"
> >   Could not open 'foo': No such file or directory
> 
> I think the logical thing to do is not to depend on characters like spaces 
> and commas to indicate a change in field. This would mean a change to how the 
> user could specify arguments to a command. My first thought is to use a 
> json-like syntax like this:
> 
> CMD: drive_add
> File: 
> ID: mydrive
> 
> but this would still have the problem with newline characters in the file 
> name. Spaces, commas, single and double quotes would be usable. Newline 
> characters in file names are rare enough that we might consider ignoring this 
> case altogether. 

Well then you may as well almost use JSON.

> Another option is using a tag based system that would be able to handle every 
> character. Something like this:
> 
> drive_add 0  /users/user/some path   mydrive 
> 
> The only way this system would fail if the text "" was in the file 
> name. 
> 
> Any solutions you wish to share?

There aren't many simple solutions; almost everything requires
a way to escape whatever character/tag you choose because that
could validly be in a filename.

If there's a way to escape the , so you could do something like
drive_add 1 "file=foo\,bar"

(I'm not sure if that's \, or \\, or whatever) then that would
work; but I've not dug to find the code that splits the file= opts
to find if it already has an escape mechanism)

Dave

--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK



Re: [Qemu-devel] [PATCH v1 0/5][RFC] Refactoring of AIS support

2017-10-30 Thread Cornelia Huck
On Mon, 30 Oct 2017 14:48:23 +0100
Christian Borntraeger  wrote:


> FWIW, I am testing a guest patch that enables zPCI without AIS. Its as simple 
> as
> 
> 
> diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
> index 7b30af5..9b24836 100644
> --- a/arch/s390/pci/pci.c
> +++ b/arch/s390/pci/pci.c
> @@ -953,7 +953,7 @@ static int __init pci_base_init(void)
> if (!s390_pci_probe)
> return 0;
>  
> -   if (!test_facility(69) || !test_facility(71) || !test_facility(72))
> +   if (!test_facility(69) || !test_facility(71))
> return 0;
>  
> rc = zpci_debug_init();
> diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
> index ea34086..61f8c82 100644
> --- a/arch/s390/pci/pci_insn.c
> +++ b/arch/s390/pci/pci_insn.c
> @@ -7,6 +7,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -93,6 +94,8 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
>  /* Set Interruption Controls */
>  void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc)
>  {
> +   if (!test_facility(72))
> +   return;
> asm volatile (
> "   .insn   rsy,0xebd1,%[ctl],%[isc],%[u]\n"
> : : [ctl] "d" (ctl), [isc] "d" (isc << 27), [u] "Q" 
> (*unused));
> 

Sounds good. Presumably this makes the adapter interrupt handling work
as for virtio (and qdio)? Is there any ais-less pci hardware out in the
wild?



Re: [Qemu-devel] [PATCH v4] nvme: Add tracing

2017-10-30 Thread Philippe Mathieu-Daudé
Cc'ing Stefan and Trivial

On 10/30/2017 01:07 PM, Doug Gale wrote:
> From 0e27b5dca8f4f32a1b194e1b3544be77dd4f45d9 Mon Sep 17 00:00:00 2001
> From: Doug Gale 
> Date: Mon, 30 Oct 2017 09:28:43 -0400
> Subject: [PATCH] nvme: Add tracing
> 

^ to remove from commit description, maybe maintainer taking this can
cleanup.
> Add trace output for commands, errors, and undefined behavior.
> Add guest error log output for undefined behavior.
> Report invalid undefined accesses to MMIO.
> Annotate unlikely error checks with unlikely.
> 
> Signed-off-by: Doug Gale 

Reviewed-by: Philippe Mathieu-Daudé 

> ---
>  hw/block/nvme.c   | 349 
> ++
>  hw/block/trace-events |  93 ++
>  2 files changed, 390 insertions(+), 52 deletions(-)



Re: [Qemu-devel] [PATCH v6 05/12] nbd/server: Simplify nbd_negotiate_options loop

2017-10-30 Thread Vladimir Sementsov-Ogievskiy

27.10.2017 13:40, Eric Blake wrote:

Instead of making each caller check whether a transmission error
occurred, we can sink a common error check to the end of the loop.

Signed-off-by: Eric Blake 


Reviewed-by: Vladimir Sementsov-Ogievskiy 

--
Best regards,
Vladimir




[Qemu-devel] [PATCH v8 05/14] migration: introduce postcopy-only pending

2017-10-30 Thread Vladimir Sementsov-Ogievskiy
There would be savevm states (dirty-bitmap) which can migrate only in
postcopy stage. The corresponding pending is introduced here.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Juan Quintela 
---
 include/migration/register.h | 17 +++--
 migration/savevm.h   |  5 +++--
 migration/block.c|  7 ---
 migration/migration.c| 15 ---
 migration/ram.c  |  9 +
 migration/savevm.c   | 13 -
 migration/trace-events   |  2 +-
 7 files changed, 44 insertions(+), 24 deletions(-)

diff --git a/include/migration/register.h b/include/migration/register.h
index f4f7bdc177..9436a87678 100644
--- a/include/migration/register.h
+++ b/include/migration/register.h
@@ -37,8 +37,21 @@ typedef struct SaveVMHandlers {
 int (*save_setup)(QEMUFile *f, void *opaque);
 void (*save_live_pending)(QEMUFile *f, void *opaque,
   uint64_t threshold_size,
-  uint64_t *non_postcopiable_pending,
-  uint64_t *postcopiable_pending);
+  uint64_t *res_precopy_only,
+  uint64_t *res_compatible,
+  uint64_t *res_postcopy_only);
+/* Note for save_live_pending:
+ * - res_precopy_only is for data which must be migrated in precopy phase
+ * or in stopped state, in other words - before target vm start
+ * - res_compatible is for data which may be migrated in any phase
+ * - res_postcopy_only is for data which must be migrated in postcopy phase
+ * or in stopped state, in other words - after source vm stop
+ *
+ * Sum of res_postcopy_only, res_compatible and res_postcopy_only is the
+ * whole amount of pending data.
+ */
+
+
 LoadStateHandler *load_state;
 int (*load_setup)(QEMUFile *f, void *opaque);
 int (*load_cleanup)(void *opaque);
diff --git a/migration/savevm.h b/migration/savevm.h
index 295c4a1f2c..cf4f0d37ca 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -38,8 +38,9 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f);
 int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
bool inactivate_disks);
 void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size,
-   uint64_t *res_non_postcopiable,
-   uint64_t *res_postcopiable);
+   uint64_t *res_precopy_only,
+   uint64_t *res_compatible,
+   uint64_t *res_postcopy_only);
 void qemu_savevm_send_ping(QEMUFile *f, uint32_t value);
 void qemu_savevm_send_open_return_path(QEMUFile *f);
 int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len);
diff --git a/migration/block.c b/migration/block.c
index 3282809583..39dfa567e8 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -865,8 +865,9 @@ static int block_save_complete(QEMUFile *f, void *opaque)
 }
 
 static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
-   uint64_t *non_postcopiable_pending,
-   uint64_t *postcopiable_pending)
+   uint64_t *res_precopy_only,
+   uint64_t *res_compatible,
+   uint64_t *res_postcopy_only)
 {
 /* Estimate pending number of bytes to send */
 uint64_t pending;
@@ -887,7 +888,7 @@ static void block_save_pending(QEMUFile *f, void *opaque, 
uint64_t max_size,
 
 DPRINTF("Enter save live pending  %" PRIu64 "\n", pending);
 /* We don't do postcopy */
-*non_postcopiable_pending += pending;
+*res_precopy_only += pending;
 }
 
 static int block_load(QEMUFile *f, void *opaque, int version_id)
diff --git a/migration/migration.c b/migration/migration.c
index 4de3b551fe..e6c9be3cca 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2216,20 +2216,21 @@ static void *migration_thread(void *opaque)
 uint64_t pending_size;
 
 if (!qemu_file_rate_limit(s->to_dst_file)) {
-uint64_t pend_post, pend_nonpost;
+uint64_t pend_pre, pend_compat, pend_post;
 
-qemu_savevm_state_pending(s->to_dst_file, threshold_size,
-  _nonpost, _post);
-pending_size = pend_nonpost + pend_post;
+qemu_savevm_state_pending(s->to_dst_file, threshold_size, 
_pre,
+  _compat, _post);
+pending_size = pend_pre + pend_compat + pend_post;
 trace_migrate_pending(pending_size, threshold_size,
-  pend_post, pend_nonpost);
+  pend_pre, pend_compat, pend_post);
 if (pending_size && pending_size >= threshold_size) 

[Qemu-devel] [PATCH v8 03/14] block/dirty-bitmap: add bdrv_dirty_bitmap_release_successor

2017-10-30 Thread Vladimir Sementsov-Ogievskiy
To just release successor and unfreeze bitmap without any additional
work.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Denis V. Lunev 
---
 include/block/dirty-bitmap.h |  2 ++
 block/dirty-bitmap.c | 14 ++
 2 files changed, 16 insertions(+)

diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 93d4336505..a9e2a92e4f 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -20,6 +20,8 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState 
*bs,
 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap,
Error **errp);
+void bdrv_dirty_bitmap_release_successor(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap);
 void bdrv_dirty_bitmap_enable_successor(BdrvDirtyBitmap *bitmap);
 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs,
 const char *name);
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 981f99d362..7578863aa1 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -431,6 +431,20 @@ void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
 bitmap->disabled = false;
 }
 
+/* Called with BQL taken. */
+void bdrv_dirty_bitmap_release_successor(BlockDriverState *bs,
+ BdrvDirtyBitmap *parent)
+{
+qemu_mutex_lock(parent->mutex);
+
+if (parent->successor) {
+bdrv_release_dirty_bitmap_locked(bs, parent->successor);
+parent->successor = NULL;
+}
+
+qemu_mutex_unlock(parent->mutex);
+}
+
 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
 {
 BdrvDirtyBitmap *bm;
-- 
2.11.1




[Qemu-devel] [PATCH v8 02/14] block/dirty-bitmap: add locked version of bdrv_release_dirty_bitmap

2017-10-30 Thread Vladimir Sementsov-Ogievskiy
It is needed to realize bdrv_dirty_bitmap_release_successor in
the following patch.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/dirty-bitmap.c | 25 -
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 81adbeb6d4..981f99d362 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -326,13 +326,13 @@ static bool bdrv_dirty_bitmap_has_name(BdrvDirtyBitmap 
*bitmap)
 return !!bdrv_dirty_bitmap_name(bitmap);
 }
 
-/* Called with BQL taken.  */
-static void bdrv_do_release_matching_dirty_bitmap(
+/* Called within bdrv_dirty_bitmap_lock..unlock */
+static void bdrv_do_release_matching_dirty_bitmap_locked(
 BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
 bool (*cond)(BdrvDirtyBitmap *bitmap))
 {
 BdrvDirtyBitmap *bm, *next;
-bdrv_dirty_bitmaps_lock(bs);
+
 QLIST_FOREACH_SAFE(bm, >dirty_bitmaps, list, next) {
 if ((!bitmap || bm == bitmap) && (!cond || cond(bm))) {
 assert(!bm->active_iterators);
@@ -344,18 +344,33 @@ static void bdrv_do_release_matching_dirty_bitmap(
 g_free(bm);
 
 if (bitmap) {
-goto out;
+return;
 }
 }
 }
+
 if (bitmap) {
 abort();
 }
+}
 
-out:
+/* Called with BQL taken.  */
+static void bdrv_do_release_matching_dirty_bitmap(
+BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+bool (*cond)(BdrvDirtyBitmap *bitmap))
+{
+bdrv_dirty_bitmaps_lock(bs);
+bdrv_do_release_matching_dirty_bitmap_locked(bs, bitmap, cond);
 bdrv_dirty_bitmaps_unlock(bs);
 }
 
+/* Called within bdrv_dirty_bitmap_lock..unlock */
+static void bdrv_release_dirty_bitmap_locked(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap)
+{
+bdrv_do_release_matching_dirty_bitmap_locked(bs, bitmap, NULL);
+}
+
 /* Called with BQL taken.  */
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
 {
-- 
2.11.1




[Qemu-devel] [PATCH v8 00/14] Dirty bitmaps postcopy migration

2017-10-30 Thread Vladimir Sementsov-Ogievskiy
Hi all!

There is a new version of dirty bitmap postcopy migration series.

v8

clone: tag postcopy-v8 from https://src.openvz.org/scm/~vsementsov/qemu.git
online: https://src.openvz.org/users/vsementsov/repos/qemu/browse?at=postcopy-v8

- rebased on master
- patches 01-03 from v7 are already merged to master
- patch order is changed to make it possible to merge block/dirty-bitmap patches
  in separate if is needed
01: new patch
03: fixed to use _locked version of bdrv_release_dirty_bitmap
06: qapi-schema.json -> qapi/migration.json
2.9 -> 2.11
10: protocol changed a bit:
  instead of 1 byte "bitmap enabled flag" this byte becomes just "flags"
  and have "enabled", "persistent" and "autoloading" flags inside.
  also, make all migrated bitmaps to be not persistent (to prevent their
  storing on source vm)
14: new patch


patches status:
01-04 - are only about block/dirty-bitmap and have no r-b. Fam, John, Paolo 
(about bitmap lock),
please look at. These patches are ok to be merged in separate (but before 
05-14)
other patches are about migration
05-09 has Juan's r-b (and some of them has John's and Eric's r-bs)
10 - the main patch (dirty bitmaps migration), has no r-b.
11 - preparation for tests, not related to migration directly, has Max's r-b, 
ok to be merged
separately (but before 12-14)
12-14 - tests, 12 and 13 have Max's r-b, 14 is new


v7

clone: tag postcopy-v7 from https://src.openvz.org/scm/~vsementsov/qemu.git
online: https://src.openvz.org/users/vsementsov/repos/qemu/browse?at=postcopy-v7

- rebased on dirty-bitmap byte-based interfaces
(based on git://repo.or.cz/qemu/ericb.git branch nbd-byte-dirty-v4)
- migration of persistent bitmaps should fail for shared storage migration for 
now,
  as persistent dirty bitmaps are stored/load on inactivate/invalidate-cache.
  also, even for non-shared storage migration there would be useless saving of 
dirty
  bitmaps on source. This all will be optimized later.

01: staff from include/migration/vmstate.h moved to 
include/migration/register.h (rebase)
03: some structural changes due to rebase - drop r-b
04: staff from include/migration/vmstate.h moved to 
include/migration/register.h (rebase)
staff from include/sysemu/sysemu.h moved to migration/savevm.h (rebase)
05: fix patch header: block -> block/dirty-bitmap
add locking, drop r-b
06: staff from include/migration/migration.h moved to migration/migration.h 
(rebase)
07: add locking, drop r-b
09: staff from include/migration/qemu-file.h moved to migration/qemu-file.h 
(rebase)
10: staff from include/migration/vmstate.h moved to 
include/migration/register.h (rebase)
11: new patch
12: a lot of changes/fixes (mostly by Fam's comments) + rebase
header-definition movement
remove include 
add some includes
fix/refactor bitmap flags send
byte-based interface for dirty bitmaps (rebase)
froze bitmaps on source
init_dirty_bitmap_migration can return error, if some of bitmaps are already
  frozen
bdrv_ref drives with bitmaps
fprintf -> error_report
check version_id in _load function

v6:

clone: tag postcopy-v6 from https://src.openvz.org/scm/~vsementsov/qemu.git
online: https://src.openvz.org/users/vsementsov/repos/qemu/browse?at=postcopy-v6

rebase on master.

03 - tiny contextual change

12 - little change, but it should be reviewed. Call of 
init_dirty_bitmap_incoming_migration()
(which only initialize mutex) moved from start of 
process_incoming_migration_co (it was
immediately after "mis = migration_incoming_state_new(f)") to 
migration_incoming_get_current()
to stay with initialization code.
I remove r-b's, but hope that this will not be a problem. The only change 
in this patch - is moved
call of init_dirty_bitmap_incoming_migration.
I do so because of recent

commit b4b076daf324894dd288cbdb67ff1e3c7434df7b
Author: Juan Quintela 
Date:   Mon Jan 23 22:32:06 2017 +0100

migration: create Migration Incoming State at init time

15 - add Max's r-b

v5:

clone: tag postcopy-v5 from https://src.openvz.org/scm/~vsementsov/qemu.git
online: https://src.openvz.org/users/vsementsov/repos/qemu/browse?at=postcopy-v5

- move 'signed-off' over 'reviewed-by' in patches.

03,04 - add comments. Hope they will be ok for you, so add Juan's r-b.
If not ok - let me know and I'll resend.

06,08,12 - add Max's r-b
07,09,10,11,12 - add Juan's r-b

14 - used last version of this patch from qcow2-bitmap series with 
 Max's r-b. It has contextual changes due to different base.

15 - fix 041 iotest, add default node-name only if path is specified and
 node-name is not specified
16 - handle whitespaces
   s/"exec: cat " + fifo/"exec: cat '" + fifo + "'"/
 fix indentation
 add Max's r-b
17 - fix typos, wrong size in comment, s/md5/sha256/
 add Max's r-b

v4:

clone: tag postcopy-v4 from https://src.openvz.org/scm/~vsementsov/qemu.git
online: 

[Qemu-devel] [PATCH v8 10/14] migration: add postcopy migration of dirty bitmaps

2017-10-30 Thread Vladimir Sementsov-Ogievskiy
Postcopy migration of dirty bitmaps. Only named dirty bitmaps,
associated with root nodes and non-root named nodes are migrated.

If destination qemu is already containing a dirty bitmap with the same name
as a migrated bitmap (for the same node), then, if their granularities are
the same the migration will be done, otherwise the error will be generated.

If destination qemu doesn't contain such bitmap it will be created.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 include/migration/misc.h   |   3 +
 migration/migration.h  |   3 +
 migration/block-dirty-bitmap.c | 734 +
 migration/migration.c  |   3 +
 migration/savevm.c |   2 +
 vl.c   |   1 +
 migration/Makefile.objs|   1 +
 migration/trace-events |  14 +
 8 files changed, 761 insertions(+)
 create mode 100644 migration/block-dirty-bitmap.c

diff --git a/include/migration/misc.h b/include/migration/misc.h
index c079b7771b..9cc539e232 100644
--- a/include/migration/misc.h
+++ b/include/migration/misc.h
@@ -55,4 +55,7 @@ bool migration_has_failed(MigrationState *);
 bool migration_in_postcopy_after_devices(MigrationState *);
 void migration_global_dump(Monitor *mon);
 
+/* migration/block-dirty-bitmap.c */
+void dirty_bitmap_mig_init(void);
+
 #endif
diff --git a/migration/migration.h b/migration/migration.h
index 50d1f01346..4e3ad04664 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -211,4 +211,7 @@ void migrate_send_rp_pong(MigrationIncomingState *mis,
 void migrate_send_rp_req_pages(MigrationIncomingState *mis, const char* rbname,
   ram_addr_t start, size_t len);
 
+void dirty_bitmap_mig_before_vm_start(void);
+void init_dirty_bitmap_incoming_migration(void);
+
 #endif
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
new file mode 100644
index 00..53cb20045d
--- /dev/null
+++ b/migration/block-dirty-bitmap.c
@@ -0,0 +1,734 @@
+/*
+ * Block dirty bitmap postcopy migration
+ *
+ * Copyright IBM, Corp. 2009
+ * Copyright (c) 2016-2017 Parallels International GmbH
+ *
+ * Authors:
+ *  Liran Schour   
+ *  Vladimir Sementsov-Ogievskiy 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ * This file is derived from migration/block.c, so it's author and IBM 
copyright
+ * are here, although content is quite different.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ *
+ ****
+ *
+ * Here postcopy migration of dirty bitmaps is realized. Only named dirty
+ * bitmaps, associated with root nodes and non-root named nodes are migrated.
+ *
+ * If destination qemu is already containing a dirty bitmap with the same name
+ * as a migrated bitmap (for the same node), then, if their granularities are
+ * the same the migration will be done, otherwise the error will be generated.
+ *
+ * If destination qemu doesn't contain such bitmap it will be created.
+ *
+ * format of migration:
+ *
+ * # Header (shared for different chunk types)
+ * 1, 2 or 4 bytes: flags (see qemu_{put,put}_flags)
+ * [ 1 byte: node name size ] \  flags & DEVICE_NAME
+ * [ n bytes: node name ] /
+ * [ 1 byte: bitmap name size ] \  flags & BITMAP_NAME
+ * [ n bytes: bitmap name ] /
+ *
+ * # Start of bitmap migration (flags & START)
+ * header
+ * be64: granularity
+ * 1 byte: bitmap flags (corresponds to BdrvDirtyBitmap)
+ *   bit 0-  bitmap is enabled
+ *   bit 1-  bitmap is persistent
+ *   bit 2-  bitmap is autoloading
+ *   bits 3-7 - reserved, must be zero
+ *
+ * # Complete of bitmap migration (flags & COMPLETE)
+ * header
+ *
+ * # Data chunk of bitmap migration
+ * header
+ * be64: start sector
+ * be32: number of sectors
+ * [ be64: buffer size  ] \ ! (flags & ZEROES)
+ * [ n bytes: buffer] /
+ *
+ * The last chunk in stream should contain flags & EOS. The chunk may skip
+ * device and/or bitmap names, assuming them to be the same with the previous
+ * chunk.
+ */
+
+#include "qemu/osdep.h"
+#include "block/block.h"
+#include "block/block_int.h"
+#include "sysemu/block-backend.h"
+#include "qemu/main-loop.h"
+#include "qemu/error-report.h"
+#include "migration/misc.h"
+#include "migration/migration.h"
+#include "migration/qemu-file.h"
+#include "migration/vmstate.h"
+#include "migration/register.h"
+#include "qemu/hbitmap.h"
+#include "sysemu/sysemu.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "trace.h"
+
+#define CHUNK_SIZE (1 << 10)
+
+/* Flags occupy one, two or four bytes (Big Endian). The size is determined as
+ * follows:
+ * in first (most significant) byte bit 8 is clear  -->  one byte
+ * in first byte bit 8 is set-->  two or four bytes, depending on second
+ *   

[Qemu-devel] [PATCH v8 09/14] migration: add is_active_iterate handler

2017-10-30 Thread Vladimir Sementsov-Ogievskiy
Only-postcopy savevm states (dirty-bitmap) don't need live iteration, so
to disable them and stop transporting empty sections there is a new
savevm handler.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Juan Quintela 
---
 include/migration/register.h | 1 +
 migration/savevm.c   | 5 +
 2 files changed, 6 insertions(+)

diff --git a/include/migration/register.h b/include/migration/register.h
index 9436a87678..cafbeb64b5 100644
--- a/include/migration/register.h
+++ b/include/migration/register.h
@@ -25,6 +25,7 @@ typedef struct SaveVMHandlers {
 /* This runs both outside and inside the iothread lock.  */
 bool (*is_active)(void *opaque);
 bool (*has_postcopy)(void *opaque);
+bool (*is_active_iterate)(void *opaque);
 
 /* This runs outside the iothread lock in the migration case, and
  * within the lock in the savevm case.  The callback had better only
diff --git a/migration/savevm.c b/migration/savevm.c
index f6b62cb202..9bbfb3fa1b 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1026,6 +1026,11 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
 continue;
 }
 }
+if (se->ops && se->ops->is_active_iterate) {
+if (!se->ops->is_active_iterate(se->opaque)) {
+continue;
+}
+}
 /*
  * In the postcopy phase, any device that doesn't know how to
  * do postcopy should have saved it's state in the _complete
-- 
2.11.1




[Qemu-devel] [PATCH v8 11/14] iotests: add default node-name

2017-10-30 Thread Vladimir Sementsov-Ogievskiy
When testing migration, auto-generated by qemu node-names differs in
source and destination qemu and migration fails. After this patch,
auto-generated by iotest nodenames will be the same.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 tests/qemu-iotests/iotests.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 6f057904a9..95454c1893 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -216,6 +216,8 @@ class VM(qtest.QEMUQtestMachine):
 options.append('file=%s' % path)
 options.append('format=%s' % format)
 options.append('cache=%s' % cachemode)
+if 'node-name' not in opts:
+options.append('node-name=drivenode%d' % self._num_drives)
 
 if opts:
 options.append(opts)
-- 
2.11.1




[Qemu-devel] [PATCH v8 12/14] iotests: add dirty bitmap migration test

2017-10-30 Thread Vladimir Sementsov-Ogievskiy
The test starts two vms (vm_a, vm_b), create dirty bitmap in
the first one, do several writes to corresponding device and
then migrate vm_a to vm_b with dirty bitmaps.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 tests/qemu-iotests/169 | 86 ++
 tests/qemu-iotests/169.out |  5 +++
 tests/qemu-iotests/group   |  1 +
 3 files changed, 92 insertions(+)
 create mode 100755 tests/qemu-iotests/169
 create mode 100644 tests/qemu-iotests/169.out

diff --git a/tests/qemu-iotests/169 b/tests/qemu-iotests/169
new file mode 100755
index 00..7630ecbe51
--- /dev/null
+++ b/tests/qemu-iotests/169
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+#
+# Tests for dirty bitmaps migration.
+#
+# Copyright (C) Vladimir Sementsov-Ogievskiy 2015-2016
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import os
+import iotests
+import time
+from iotests import qemu_img
+
+disk_a = os.path.join(iotests.test_dir, 'disk_a')
+disk_b = os.path.join(iotests.test_dir, 'disk_b')
+fifo = os.path.join(iotests.test_dir, 'mig_fifo')
+
+class TestDirtyBitmapMigration(iotests.QMPTestCase):
+
+def setUp(self):
+size = 0x4 # 1G
+os.mkfifo(fifo)
+qemu_img('create', '-f', iotests.imgfmt, disk_a, str(size))
+qemu_img('create', '-f', iotests.imgfmt, disk_b, str(size))
+self.vm_a = iotests.VM(path_suffix='a').add_drive(disk_a)
+self.vm_b = iotests.VM(path_suffix='b').add_drive(disk_b)
+self.vm_b.add_incoming("exec: cat '" + fifo + "'")
+self.vm_a.launch()
+self.vm_b.launch()
+
+def tearDown(self):
+self.vm_a.shutdown()
+self.vm_b.shutdown()
+os.remove(disk_a)
+os.remove(disk_b)
+os.remove(fifo)
+
+def test_migration(self):
+granularity = 512
+regions = [
+{ 'start': 0,   'count': 0x10 },
+{ 'start': 0x1, 'count': 0x20  },
+{ 'start': 0x39990, 'count': 0x10  }
+]
+
+result = self.vm_a.qmp('block-dirty-bitmap-add', node='drive0',
+   name='bitmap', granularity=granularity)
+self.assert_qmp(result, 'return', {});
+
+for r in regions:
+self.vm_a.hmp_qemu_io('drive0',
+  'write %d %d' % (r['start'], r['count']))
+
+result = self.vm_a.qmp('x-debug-block-dirty-bitmap-sha256',
+   node='drive0', name='bitmap')
+sha256 = result['return']['sha256']
+
+result = self.vm_a.qmp('migrate-set-capabilities',
+   capabilities=[{'capability': 'dirty-bitmaps',
+  'state': True}])
+self.assert_qmp(result, 'return', {})
+
+result = self.vm_a.qmp('migrate', uri='exec:cat>' + fifo)
+self.assertNotEqual(self.vm_a.event_wait("STOP"), None)
+self.assertNotEqual(self.vm_b.event_wait("RESUME"), None)
+time.sleep(2)
+
+result = self.vm_b.qmp('x-debug-block-dirty-bitmap-sha256',
+   node='drive0', name='bitmap')
+self.assert_qmp(result, 'return/sha256', sha256);
+
+
+if __name__ == '__main__':
+iotests.main()
diff --git a/tests/qemu-iotests/169.out b/tests/qemu-iotests/169.out
new file mode 100644
index 00..ae1213e6f8
--- /dev/null
+++ b/tests/qemu-iotests/169.out
@@ -0,0 +1,5 @@
+.
+--
+Ran 1 tests
+
+OK
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 24e5ad1b79..96e173abb3 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -169,6 +169,7 @@
 162 auto quick
 163 rw auto quick
 165 rw auto quick
+169 rw auto quick
 170 rw auto quick
 171 rw auto quick
 172 auto
-- 
2.11.1




[Qemu-devel] [PATCH v8 06/14] qapi: add dirty-bitmaps migration capability

2017-10-30 Thread Vladimir Sementsov-Ogievskiy
Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: John Snow 
Reviewed-by: Eric Blake 
Reviewed-by: Juan Quintela 
---
 qapi/migration.json   | 6 +-
 migration/migration.h | 1 +
 migration/migration.c | 9 +
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/qapi/migration.json b/qapi/migration.json
index bbc4671ded..ee0ad0b3ad 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -352,12 +352,16 @@
 #
 # @x-multifd: Use more than one fd for migration (since 2.11)
 #
+# @dirty-bitmaps: If enabled, QEMU will migrate named dirty bitmaps.
+# (since 2.11)
+#
 # Since: 1.2
 ##
 { 'enum': 'MigrationCapability',
   'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram',
-   'block', 'return-path', 'pause-before-switchover', 'x-multifd' ] }
+   'block', 'return-path', 'pause-before-switchover', 'x-multifd',
+   'dirty-bitmaps' ] }
 
 ##
 # @MigrationCapabilityStatus:
diff --git a/migration/migration.h b/migration/migration.h
index 663415fe48..50d1f01346 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -181,6 +181,7 @@ bool migrate_postcopy(void);
 bool migrate_release_ram(void);
 bool migrate_postcopy_ram(void);
 bool migrate_zero_blocks(void);
+bool migrate_dirty_bitmaps(void);
 
 bool migrate_auto_converge(void);
 bool migrate_use_multifd(void);
diff --git a/migration/migration.c b/migration/migration.c
index e6c9be3cca..1526cd4bff 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1544,6 +1544,15 @@ int migrate_decompress_threads(void)
 return s->parameters.decompress_threads;
 }
 
+bool migrate_dirty_bitmaps(void)
+{
+MigrationState *s;
+
+s = migrate_get_current();
+
+return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS];
+}
+
 bool migrate_use_events(void)
 {
 MigrationState *s;
-- 
2.11.1




[Qemu-devel] [PATCH v8 04/14] block/dirty-bitmap: add bdrv_dirty_bitmap_set_frozen

2017-10-30 Thread Vladimir Sementsov-Ogievskiy
Make it possible to set bitmap 'frozen' without a successor.
This is needed to protect the bitmap during outgoing bitmap postcopy
migration.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 include/block/dirty-bitmap.h |  1 +
 block/dirty-bitmap.c | 22 --
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index a9e2a92e4f..ae6d697850 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -39,6 +39,7 @@ uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState 
*bs);
 uint32_t bdrv_dirty_bitmap_granularity(const BdrvDirtyBitmap *bitmap);
 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap);
 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap);
+void bdrv_dirty_bitmap_set_frozen(BdrvDirtyBitmap *bitmap, bool frozen);
 const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap);
 int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap);
 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap);
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 7578863aa1..67fc6bd6e0 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -40,6 +40,8 @@ struct BdrvDirtyBitmap {
 QemuMutex *mutex;
 HBitmap *bitmap;/* Dirty bitmap implementation */
 HBitmap *meta;  /* Meta dirty bitmap */
+bool frozen;/* Bitmap is frozen, it can't be modified
+   through QMP */
 BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
 char *name; /* Optional non-empty unique ID */
 int64_t size;   /* Size of the bitmap, in bytes */
@@ -183,13 +185,22 @@ const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap 
*bitmap)
 /* Called with BQL taken.  */
 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
 {
-return bitmap->successor;
+return bitmap->frozen;
+}
+
+/* Called with BQL taken.  */
+void bdrv_dirty_bitmap_set_frozen(BdrvDirtyBitmap *bitmap, bool frozen)
+{
+qemu_mutex_lock(bitmap->mutex);
+assert(bitmap->successor == NULL);
+bitmap->frozen = frozen;
+qemu_mutex_unlock(bitmap->mutex);
 }
 
 /* Called with BQL taken.  */
 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
 {
-return !(bitmap->disabled || bitmap->successor);
+return !(bitmap->disabled || (bitmap->successor != NULL));
 }
 
 /* Called with BQL taken.  */
@@ -234,6 +245,7 @@ int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
 
 /* Install the successor and freeze the parent */
 bitmap->successor = child;
+bitmap->frozen = true;
 return 0;
 }
 
@@ -266,6 +278,8 @@ BdrvDirtyBitmap 
*bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
 name = bitmap->name;
 bitmap->name = NULL;
 successor->name = name;
+assert(bitmap->frozen);
+bitmap->frozen = false;
 bitmap->successor = NULL;
 successor->persistent = bitmap->persistent;
 bitmap->persistent = false;
@@ -298,6 +312,8 @@ BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState 
*bs,
 return NULL;
 }
 bdrv_release_dirty_bitmap(bs, successor);
+assert(parent->frozen);
+parent->frozen = false;
 parent->successor = NULL;
 
 return parent;
@@ -439,6 +455,8 @@ void bdrv_dirty_bitmap_release_successor(BlockDriverState 
*bs,
 
 if (parent->successor) {
 bdrv_release_dirty_bitmap_locked(bs, parent->successor);
+assert(parent->frozen);
+parent->frozen = false;
 parent->successor = NULL;
 }
 
-- 
2.11.1




[Qemu-devel] [PATCH v8 01/14] block/dirty-bitmap: add bdrv_dirty_bitmap_enable_successor()

2017-10-30 Thread Vladimir Sementsov-Ogievskiy
Enabling bitmap successor is necessary to enable successors of bitmaps
being migrated before target vm start.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 include/block/dirty-bitmap.h | 1 +
 block/dirty-bitmap.c | 8 
 2 files changed, 9 insertions(+)

diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 3579a7597c..93d4336505 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -20,6 +20,7 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState 
*bs,
 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap,
Error **errp);
+void bdrv_dirty_bitmap_enable_successor(BdrvDirtyBitmap *bitmap);
 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs,
 const char *name);
 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap);
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index bd04e991b1..81adbeb6d4 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -237,6 +237,14 @@ int bdrv_dirty_bitmap_create_successor(BlockDriverState 
*bs,
 return 0;
 }
 
+/* Called with BQL taken. */
+void bdrv_dirty_bitmap_enable_successor(BdrvDirtyBitmap *bitmap)
+{
+qemu_mutex_lock(bitmap->mutex);
+bdrv_enable_dirty_bitmap(bitmap->successor);
+qemu_mutex_unlock(bitmap->mutex);
+}
+
 /**
  * For a bitmap with a successor, yield our name to the successor,
  * delete the old bitmap, and return a handle to the new bitmap.
-- 
2.11.1




[Qemu-devel] [PATCH v8 14/14] iotests: add persistent bitmap migration test

2017-10-30 Thread Vladimir Sementsov-Ogievskiy
Test that persistent bitmap migrates and its persistance property
migrates too.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 tests/qemu-iotests/169 | 21 ++---
 tests/qemu-iotests/169.out |  4 ++--
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/tests/qemu-iotests/169 b/tests/qemu-iotests/169
index 4ecef2f23f..b0270f40f7 100755
--- a/tests/qemu-iotests/169
+++ b/tests/qemu-iotests/169
@@ -46,7 +46,7 @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
 self.vm_a.launch()
 self.vm_b.launch()
 
-def test_migration(self):
+def do_test_migration(self, persistent=False):
 self.init(0x4) # 1G
 granularity = 512
 regions = [
@@ -55,8 +55,13 @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
 { 'start': 0x39990, 'count': 0x10  }
 ]
 
-result = self.vm_a.qmp('block-dirty-bitmap-add', node='drive0',
-   name='bitmap', granularity=granularity)
+if persistent:
+result = self.vm_a.qmp('block-dirty-bitmap-add', node='drive0',
+   name='bitmap', granularity=granularity,
+   persistent=True, autoload=True)
+else:
+result = self.vm_a.qmp('block-dirty-bitmap-add', node='drive0',
+   name='bitmap', granularity=granularity)
 self.assert_qmp(result, 'return', {});
 
 for r in regions:
@@ -77,10 +82,20 @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
 self.assertNotEqual(self.vm_b.event_wait("RESUME"), None)
 time.sleep(2)
 
+if persistent:
+self.vm_b.shutdown()
+self.vm_b.launch()
+
 result = self.vm_b.qmp('x-debug-block-dirty-bitmap-sha256',
node='drive0', name='bitmap')
 self.assert_qmp(result, 'return/sha256', sha256);
 
+def test_migration(self):
+self.do_test_migration()
+
+def test_migration_persistent(self):
+self.do_test_migration(persistent=True)
+
 def test_postcopy(self):
 self.init(0x40) # 256G
 write_size = 0x4000
diff --git a/tests/qemu-iotests/169.out b/tests/qemu-iotests/169.out
index fbc63e62f8..8d7e996700 100644
--- a/tests/qemu-iotests/169.out
+++ b/tests/qemu-iotests/169.out
@@ -1,5 +1,5 @@
-..
+...
 --
-Ran 2 tests
+Ran 3 tests
 
 OK
-- 
2.11.1




[Qemu-devel] [PATCH v8 13/14] iotests: add dirty bitmap postcopy test

2017-10-30 Thread Vladimir Sementsov-Ogievskiy
Test
- start two vms (vm_a, vm_b)

- in a
- do writes from set A
- do writes from set B
- fix bitmap sha256
- clear bitmap
- do writes from set A
- start migration
- than, in b
- wait vm start (postcopy should start)
- do writes from set B
- check bitmap sha256

The test should verify postcopy migration and then merging with delta
(changes in target, during postcopy process).

Reduce supported cache modes to only 'none', because with cache on time
from source.STOP to target.RESUME is unpredictable and we can fail with
timout while waiting for target.RESUME.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 tests/qemu-iotests/169| 74 +--
 tests/qemu-iotests/169.out|  4 +--
 tests/qemu-iotests/iotests.py |  7 +++-
 3 files changed, 72 insertions(+), 13 deletions(-)

diff --git a/tests/qemu-iotests/169 b/tests/qemu-iotests/169
index 7630ecbe51..4ecef2f23f 100755
--- a/tests/qemu-iotests/169
+++ b/tests/qemu-iotests/169
@@ -29,8 +29,14 @@ fifo = os.path.join(iotests.test_dir, 'mig_fifo')
 
 class TestDirtyBitmapMigration(iotests.QMPTestCase):
 
-def setUp(self):
-size = 0x4 # 1G
+def tearDown(self):
+self.vm_a.shutdown()
+self.vm_b.shutdown()
+os.remove(disk_a)
+os.remove(disk_b)
+os.remove(fifo)
+
+def init(self, size):
 os.mkfifo(fifo)
 qemu_img('create', '-f', iotests.imgfmt, disk_a, str(size))
 qemu_img('create', '-f', iotests.imgfmt, disk_b, str(size))
@@ -40,14 +46,8 @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
 self.vm_a.launch()
 self.vm_b.launch()
 
-def tearDown(self):
-self.vm_a.shutdown()
-self.vm_b.shutdown()
-os.remove(disk_a)
-os.remove(disk_b)
-os.remove(fifo)
-
 def test_migration(self):
+self.init(0x4) # 1G
 granularity = 512
 regions = [
 { 'start': 0,   'count': 0x10 },
@@ -81,6 +81,60 @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
node='drive0', name='bitmap')
 self.assert_qmp(result, 'return/sha256', sha256);
 
+def test_postcopy(self):
+self.init(0x40) # 256G
+write_size = 0x4000
+granularity = 512
+chunk = 4096
+
+result = self.vm_a.qmp('block-dirty-bitmap-add', node='drive0',
+   name='bitmap', granularity=granularity)
+self.assert_qmp(result, 'return', {});
+
+s = 0
+while s < write_size:
+self.vm_a.hmp_qemu_io('drive0', 'write %d %d' % (s, chunk))
+s += 0x1
+s = 0x8000
+while s < write_size:
+self.vm_a.hmp_qemu_io('drive0', 'write %d %d' % (s, chunk))
+s += 0x1
+
+result = self.vm_a.qmp('x-debug-block-dirty-bitmap-sha256',
+   node='drive0', name='bitmap')
+sha256 = result['return']['sha256']
+
+result = self.vm_a.qmp('block-dirty-bitmap-clear', node='drive0',
+   name='bitmap')
+self.assert_qmp(result, 'return', {});
+s = 0
+while s < write_size:
+self.vm_a.hmp_qemu_io('drive0', 'write %d %d' % (s, chunk))
+s += 0x1
+
+result = self.vm_a.qmp('migrate-set-capabilities',
+   capabilities=[{'capability': 'dirty-bitmaps',
+  'state': True}])
+self.assert_qmp(result, 'return', {})
+
+result = self.vm_a.qmp('migrate', uri='exec:cat>' + fifo)
+self.assertNotEqual(self.vm_a.event_wait("STOP"), None)
+self.assertNotEqual(self.vm_b.event_wait("RESUME"), None)
+
+s = 0x8000
+while s < write_size:
+self.vm_b.hmp_qemu_io('drive0', 'write %d %d' % (s, chunk))
+s += 0x1
+
+result = self.vm_b.qmp('query-block');
+while len(result['return'][0]['dirty-bitmaps']) > 1:
+time.sleep(2)
+result = self.vm_b.qmp('query-block');
+
+result = self.vm_b.qmp('x-debug-block-dirty-bitmap-sha256',
+   node='drive0', name='bitmap')
+
+self.assert_qmp(result, 'return/sha256', sha256);
 
 if __name__ == '__main__':
-iotests.main()
+iotests.main(supported_fmts=['qcow2'], supported_cache_modes=['none'])
diff --git a/tests/qemu-iotests/169.out b/tests/qemu-iotests/169.out
index ae1213e6f8..fbc63e62f8 100644
--- a/tests/qemu-iotests/169.out
+++ b/tests/qemu-iotests/169.out
@@ -1,5 +1,5 @@
-.
+..
 --
-Ran 1 tests
+Ran 2 tests
 
 OK
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 95454c1893..88f73d6441 100644
--- a/tests/qemu-iotests/iotests.py
+++ 

[Qemu-devel] [PATCH v8 07/14] migration: include migrate_dirty_bitmaps in migrate_postcopy

2017-10-30 Thread Vladimir Sementsov-Ogievskiy
Enable postcopy if dirty bitmap migration is endabled.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Juan Quintela 
---
 migration/migration.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/migration/migration.c b/migration/migration.c
index 1526cd4bff..e973837bfd 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1487,7 +1487,7 @@ bool migrate_postcopy_ram(void)
 
 bool migrate_postcopy(void)
 {
-return migrate_postcopy_ram();
+return migrate_postcopy_ram() || migrate_dirty_bitmaps();
 }
 
 bool migrate_auto_converge(void)
-- 
2.11.1




[Qemu-devel] [PATCH v8 08/14] migration/qemu-file: add qemu_put_counted_string()

2017-10-30 Thread Vladimir Sementsov-Ogievskiy
Add function opposite to qemu_get_counted_string.
qemu_put_counted_string puts one-byte length of the string (string
should not be longer than 255 characters), and then it puts the string,
without last zero byte.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: John Snow 
Reviewed-by: Juan Quintela 
---
 migration/qemu-file.h |  2 ++
 migration/qemu-file.c | 13 +
 2 files changed, 15 insertions(+)

diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index aae4e5ed36..f4f356ab12 100644
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
@@ -174,4 +174,6 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t 
block_offset,
  ram_addr_t offset, size_t size,
  uint64_t *bytes_sent);
 
+void qemu_put_counted_string(QEMUFile *f, const char *name);
+
 #endif
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 2ab2bf362d..e85f501f86 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -734,6 +734,19 @@ size_t qemu_get_counted_string(QEMUFile *f, char buf[256])
 }
 
 /*
+ * Put a string with one preceding byte containing its length. The length of
+ * the string should be less than 256.
+ */
+void qemu_put_counted_string(QEMUFile *f, const char *str)
+{
+size_t len = strlen(str);
+
+assert(len < 256);
+qemu_put_byte(f, len);
+qemu_put_buffer(f, (const uint8_t *)str, len);
+}
+
+/*
  * Set the blocking state of the QEMUFile.
  * Note: On some transports the OS only keeps a single blocking state for
  *   both directions, and thus changing the blocking on the main
-- 
2.11.1




[Qemu-devel] [Bug 1728643] Re: qemu-io fails with Assertion `*host_offset != 0' failed

2017-10-30 Thread R.Nageswara Sastry
** Attachment added: "test.img tarred"
   
https://bugs.launchpad.net/qemu/+bug/1728643/+attachment/5000185/+files/test.img_1728643.tar.gz

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1728643

Title:
  qemu-io fails with Assertion `*host_offset != 0' failed

Status in QEMU:
  New

Bug description:
  git is at HEAD a93ece47fd9edbd4558db24300056c9a57d3bcd4
  This is on ppc64le architecture.

  Re-production steps:

  1. Copy the attached files named test.img to a directory
  2. And customize the following command to point to the above directory and 
run the same.
  # cp test.img copy.img
  # qemu-io /copy.img -c "write 884736 34816"

  from gdb:
  (gdb) bt
  #0  0x3fffad63eff0 in raise () from /lib64/libc.so.6
  #1  0x3fffad64136c in abort () from /lib64/libc.so.6
  #2  0x3fffad634c44 in __assert_fail_base () from /lib64/libc.so.6
  #3  0x3fffad634d34 in __assert_fail () from /lib64/libc.so.6
  #4  0x1006426c in qcow2_alloc_cluster_offset (bs=0x391e9ad0, 
offset=884736, bytes=0x3fffaa89fb4c, host_offset=0x3fffaa89fb58, 
m=0x3fffaa89fb60)
  at block/qcow2-cluster.c:1524
  #5  0x1004d3f4 in qcow2_co_pwritev (bs=0x391e9ad0, offset=884736, 
bytes=34816, qiov=0x3fffce0e2940, flags=0) at block/qcow2.c:1919
  #6  0x100a9648 in bdrv_driver_pwritev (bs=0x391e9ad0, offset=884736, 
bytes=34816, qiov=0x3fffce0e2940, flags=16) at block/io.c:898
  #7  0x100ab630 in bdrv_aligned_pwritev (child=0x391f51a0, 
req=0x3fffaa89fdd8, offset=884736, bytes=34816, align=1, qiov=0x3fffce0e2940, 
flags=16)
  at block/io.c:1440
  #8  0x100ac4ac in bdrv_co_pwritev (child=0x391f51a0, offset=884736, 
bytes=34816, qiov=0x3fffce0e2940, flags=BDRV_REQ_FUA) at block/io.c:1691
  #9  0x1008da0c in blk_co_pwritev (blk=0x391d9410, offset=884736, 
bytes=34816, qiov=0x3fffce0e2940, flags=BDRV_REQ_FUA) at 
block/block-backend.c:1085
  #10 0x1008db68 in blk_write_entry (opaque=0x3fffce0e2958) at 
block/block-backend.c:1110
  #11 0x101aa444 in coroutine_trampoline (i0=958427472, i1=0) at 
util/coroutine-ucontext.c:79
  #12 0x3fffad652b9c in makecontext () from /lib64/libc.so.6
  #13 0x in ?? ()
  (gdb) bt full
  #0  0x3fffad63eff0 in raise () from /lib64/libc.so.6
  No symbol table info available.
  #1  0x3fffad64136c in abort () from /lib64/libc.so.6
  No symbol table info available.
  #2  0x3fffad634c44 in __assert_fail_base () from /lib64/libc.so.6
  No symbol table info available.
  #3  0x3fffad634d34 in __assert_fail () from /lib64/libc.so.6
  No symbol table info available.
  #4  0x1006426c in qcow2_alloc_cluster_offset (bs=0x391e9ad0, 
offset=884736, bytes=0x3fffaa89fb4c, host_offset=0x3fffaa89fb58, 
m=0x3fffaa89fb60)
  at block/qcow2-cluster.c:1524
  s = 0x391f5d80
  start = 919552
  remaining = 0
  cluster_offset = 399360
  cur_bytes = 34816
  ret = 1
  __PRETTY_FUNCTION__ = "qcow2_alloc_cluster_offset"
  #5  0x1004d3f4 in qcow2_co_pwritev (bs=0x391e9ad0, offset=884736, 
bytes=34816, qiov=0x3fffce0e2940, flags=0) at block/qcow2.c:1919
  s = 0x391f5d80
  offset_in_cluster = 360448
  ret = 0
  cur_bytes = 34816
  cluster_offset = 0
  hd_qiov = {iov = 0x391b85a0, niov = 0, nalloc = 1, size = 0}
  bytes_done = 0
  cluster_data = 0x0
  l2meta = 0x392074c0
  __PRETTY_FUNCTION__ = "qcow2_co_pwritev"
  #6  0x100a9648 in bdrv_driver_pwritev (bs=0x391e9ad0, offset=884736, 
bytes=34816, qiov=0x3fffce0e2940, flags=16) at block/io.c:898
  drv = 0x102036f0 
  sector_num = 958319760
  nb_sectors = 2340082071
  ret = 743104256
  __PRETTY_FUNCTION__ = "bdrv_driver_pwritev"
  #7  0x100ab630 in bdrv_aligned_pwritev (child=0x391f51a0, 
req=0x3fffaa89fdd8, offset=884736, bytes=34816, align=1, qiov=0x3fffce0e2940, 
flags=16)
  at block/io.c:1440
  bs = 0x391e9ad0
  drv = 0x102036f0 
  waited = false
  ret = 0
  end_sector = 1796
  bytes_remaining = 34816
  max_transfer = 2147483647
  __PRETTY_FUNCTION__ = "bdrv_aligned_pwritev"
  #8  0x100ac4ac in bdrv_co_pwritev (child=0x391f51a0, offset=884736, 
bytes=34816, qiov=0x3fffce0e2940, flags=BDRV_REQ_FUA) at block/io.c:1691
  bs = 0x391e9ad0
  req = {bs = 0x391e9ad0, offset = 884736, bytes = 34816, type = 
BDRV_TRACKED_WRITE, serialising = false, overlap_offset = 884736,
overlap_bytes = 34816, list = {le_next = 0x0, le_prev = 
0x391ecd48}, co = 0x39207150, wait_queue = {entries = {sqh_first = 0x0,
sqh_last = 0x3fffaa89fe20}}, waiting_for = 0x0}
  align = 1
  ---Type  to continue, or q  to quit---
  head_buf = 0x0
  tail_buf = 0x0
  local_qiov 

[Qemu-devel] [Bug 1728643] [NEW] qemu-io fails with Assertion `*host_offset != 0' failed

2017-10-30 Thread R.Nageswara Sastry
Public bug reported:

git is at HEAD a93ece47fd9edbd4558db24300056c9a57d3bcd4
This is on ppc64le architecture.

Re-production steps:

1. Copy the attached files named test.img to a directory
2. And customize the following command to point to the above directory and run 
the same.
# cp test.img copy.img
# qemu-io /copy.img -c "write 884736 34816"

from gdb:
(gdb) bt
#0  0x3fffad63eff0 in raise () from /lib64/libc.so.6
#1  0x3fffad64136c in abort () from /lib64/libc.so.6
#2  0x3fffad634c44 in __assert_fail_base () from /lib64/libc.so.6
#3  0x3fffad634d34 in __assert_fail () from /lib64/libc.so.6
#4  0x1006426c in qcow2_alloc_cluster_offset (bs=0x391e9ad0, 
offset=884736, bytes=0x3fffaa89fb4c, host_offset=0x3fffaa89fb58, 
m=0x3fffaa89fb60)
at block/qcow2-cluster.c:1524
#5  0x1004d3f4 in qcow2_co_pwritev (bs=0x391e9ad0, offset=884736, 
bytes=34816, qiov=0x3fffce0e2940, flags=0) at block/qcow2.c:1919
#6  0x100a9648 in bdrv_driver_pwritev (bs=0x391e9ad0, offset=884736, 
bytes=34816, qiov=0x3fffce0e2940, flags=16) at block/io.c:898
#7  0x100ab630 in bdrv_aligned_pwritev (child=0x391f51a0, 
req=0x3fffaa89fdd8, offset=884736, bytes=34816, align=1, qiov=0x3fffce0e2940, 
flags=16)
at block/io.c:1440
#8  0x100ac4ac in bdrv_co_pwritev (child=0x391f51a0, offset=884736, 
bytes=34816, qiov=0x3fffce0e2940, flags=BDRV_REQ_FUA) at block/io.c:1691
#9  0x1008da0c in blk_co_pwritev (blk=0x391d9410, offset=884736, 
bytes=34816, qiov=0x3fffce0e2940, flags=BDRV_REQ_FUA) at 
block/block-backend.c:1085
#10 0x1008db68 in blk_write_entry (opaque=0x3fffce0e2958) at 
block/block-backend.c:1110
#11 0x101aa444 in coroutine_trampoline (i0=958427472, i1=0) at 
util/coroutine-ucontext.c:79
#12 0x3fffad652b9c in makecontext () from /lib64/libc.so.6
#13 0x in ?? ()
(gdb) bt full
#0  0x3fffad63eff0 in raise () from /lib64/libc.so.6
No symbol table info available.
#1  0x3fffad64136c in abort () from /lib64/libc.so.6
No symbol table info available.
#2  0x3fffad634c44 in __assert_fail_base () from /lib64/libc.so.6
No symbol table info available.
#3  0x3fffad634d34 in __assert_fail () from /lib64/libc.so.6
No symbol table info available.
#4  0x1006426c in qcow2_alloc_cluster_offset (bs=0x391e9ad0, 
offset=884736, bytes=0x3fffaa89fb4c, host_offset=0x3fffaa89fb58, 
m=0x3fffaa89fb60)
at block/qcow2-cluster.c:1524
s = 0x391f5d80
start = 919552
remaining = 0
cluster_offset = 399360
cur_bytes = 34816
ret = 1
__PRETTY_FUNCTION__ = "qcow2_alloc_cluster_offset"
#5  0x1004d3f4 in qcow2_co_pwritev (bs=0x391e9ad0, offset=884736, 
bytes=34816, qiov=0x3fffce0e2940, flags=0) at block/qcow2.c:1919
s = 0x391f5d80
offset_in_cluster = 360448
ret = 0
cur_bytes = 34816
cluster_offset = 0
hd_qiov = {iov = 0x391b85a0, niov = 0, nalloc = 1, size = 0}
bytes_done = 0
cluster_data = 0x0
l2meta = 0x392074c0
__PRETTY_FUNCTION__ = "qcow2_co_pwritev"
#6  0x100a9648 in bdrv_driver_pwritev (bs=0x391e9ad0, offset=884736, 
bytes=34816, qiov=0x3fffce0e2940, flags=16) at block/io.c:898
drv = 0x102036f0 
sector_num = 958319760
nb_sectors = 2340082071
ret = 743104256
__PRETTY_FUNCTION__ = "bdrv_driver_pwritev"
#7  0x100ab630 in bdrv_aligned_pwritev (child=0x391f51a0, 
req=0x3fffaa89fdd8, offset=884736, bytes=34816, align=1, qiov=0x3fffce0e2940, 
flags=16)
at block/io.c:1440
bs = 0x391e9ad0
drv = 0x102036f0 
waited = false
ret = 0
end_sector = 1796
bytes_remaining = 34816
max_transfer = 2147483647
__PRETTY_FUNCTION__ = "bdrv_aligned_pwritev"
#8  0x100ac4ac in bdrv_co_pwritev (child=0x391f51a0, offset=884736, 
bytes=34816, qiov=0x3fffce0e2940, flags=BDRV_REQ_FUA) at block/io.c:1691
bs = 0x391e9ad0
req = {bs = 0x391e9ad0, offset = 884736, bytes = 34816, type = 
BDRV_TRACKED_WRITE, serialising = false, overlap_offset = 884736,
  overlap_bytes = 34816, list = {le_next = 0x0, le_prev = 0x391ecd48}, 
co = 0x39207150, wait_queue = {entries = {sqh_first = 0x0,
  sqh_last = 0x3fffaa89fe20}}, waiting_for = 0x0}
align = 1
---Type  to continue, or q  to quit---
head_buf = 0x0
tail_buf = 0x0
local_qiov = {iov = 0x3fffaa89fdb0, niov = -1433797136, nalloc = 16383, 
size = 884736}
use_local_qiov = false
ret = 0
__PRETTY_FUNCTION__ = "bdrv_co_pwritev"
#9  0x1008da0c in blk_co_pwritev (blk=0x391d9410, offset=884736, 
bytes=34816, qiov=0x3fffce0e2940, flags=BDRV_REQ_FUA) at 
block/block-backend.c:1085
ret = 0
bs = 0x391e9ad0
#10 0x1008db68 in blk_write_entry (opaque=0x3fffce0e2958) at 
block/block-backend.c:1110
rwco = 0x3fffce0e2958
#11 0x101aa444 in 

Re: [Qemu-devel] [PATCH v3 5/5] RFC: fw_cfg: add DMA write operation in sysfs

2017-10-30 Thread Michael S. Tsirkin
On Mon, Oct 30, 2017 at 11:07:20AM +, Hatayama, Daisuke wrote:
> > From: Marc-André Lureau 
> >
> > Since qemu 2.9, DMA write operations are allowed. However, usage of this
> > interface from kernel or user-space is strongly discouraged by the
> > maintainers. This patch is meant for experimentations for now.
> >
> 
> Could you (or maintainers?) tell me how experimental the DMA write
> operations from kernel are?
> 
> From some technical reason?
> Or simply there has not been enough test yet so far?

The concern is security, talking from userspace to hypervisor
might become a trivial DOS vector.

If there's need for a specific entry to be accessible from userspace,
I'd rather white-list it.

> > Signed-off-by: Marc-André Lureau 
> > ---
> >  drivers/firmware/qemu_fw_cfg.c | 21 -
> >  1 file changed, 20 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
> > index 54b569da3257..e2f2ad1c9c0c 100644
> > --- a/drivers/firmware/qemu_fw_cfg.c
> > +++ b/drivers/firmware/qemu_fw_cfg.c
> > @@ -524,9 +524,28 @@ static ssize_t fw_cfg_sysfs_read_raw(struct file 
> > *filp, struct kobject *kobj,
> >   return fw_cfg_read_blob(entry->f.select, buf, pos, count, true);
> >  }
> >
> > +static ssize_t fw_cfg_sysfs_write_raw(struct file *filp, struct kobject 
> > *kobj,
> > +   struct bin_attribute *bin_attr,
> > +   char *buf, loff_t pos, size_t count)
> > +{
> > + struct fw_cfg_sysfs_entry *entry = to_entry(kobj);
> > +
> > + if (!fw_cfg_dma_enabled())
> > + return -ENOTSUPP;
> > +
> > + if (pos > entry->f.size)
> > + return -EINVAL;
> > +
> > + if (count > entry->f.size - pos)
> > + count = entry->f.size - pos;
> > +
> > + return fw_cfg_write_blob(entry->f.select, buf, pos, count);
> > +}
> > +
> >  static struct bin_attribute fw_cfg_sysfs_attr_raw = {
> > - .attr = { .name = "raw", .mode = S_IRUSR },
> > + .attr = { .name = "raw", .mode = S_IRUSR | S_IWUSR },
> >   .read = fw_cfg_sysfs_read_raw,
> > + .write = fw_cfg_sysfs_write_raw,
> >  };
> >
> >  /*
> > --
> > 2.14.1.146.gd35faa819
> 
> Thanks.
> HATAYAMA, Daisuke
> 



[Qemu-devel] [PATCH v4] nvme: Add tracing

2017-10-30 Thread Doug Gale
>From 0e27b5dca8f4f32a1b194e1b3544be77dd4f45d9 Mon Sep 17 00:00:00 2001
From: Doug Gale 
Date: Mon, 30 Oct 2017 09:28:43 -0400
Subject: [PATCH] nvme: Add tracing

Add trace output for commands, errors, and undefined behavior.
Add guest error log output for undefined behavior.
Report invalid undefined accesses to MMIO.
Annotate unlikely error checks with unlikely.

Signed-off-by: Doug Gale 
---
 hw/block/nvme.c   | 349 ++
 hw/block/trace-events |  93 ++
 2 files changed, 390 insertions(+), 52 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 441e21ed1f..4d98ed9fba 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -34,8 +34,17 @@
 #include "qapi/visitor.h"
 #include "sysemu/block-backend.h"

+#include "qemu/log.h"
+#include "trace.h"
 #include "nvme.h"

+#define NVME_GUEST_ERR(trace, fmt, ...) \
+do { \
+(trace_##trace)(__VA_ARGS__); \
+qemu_log_mask(LOG_GUEST_ERROR, #trace \
+" in %s: " fmt "\n", __func__, ## __VA_ARGS__); \
+} while (0)
+
 static void nvme_process_sq(void *opaque);

 static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
@@ -86,10 +95,14 @@ static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue *cq)
 {
 if (cq->irq_enabled) {
 if (msix_enabled(&(n->parent_obj))) {
+trace_nvme_irq_msix(cq->vector);
 msix_notify(&(n->parent_obj), cq->vector);
 } else {
+trace_nvme_irq_pin();
 pci_irq_pulse(>parent_obj);
 }
+} else {
+trace_nvme_irq_masked();
 }
 }

@@ -100,7 +113,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg,
QEMUIOVector *iov, uint64_t prp1,
 trans_len = MIN(len, trans_len);
 int num_prps = (len >> n->page_bits) + 1;

-if (!prp1) {
+if (unlikely(!prp1)) {
+trace_nvme_err_invalid_prp();
 return NVME_INVALID_FIELD | NVME_DNR;
 } else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
@@ -113,7 +127,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg,
QEMUIOVector *iov, uint64_t prp1,
 }
 len -= trans_len;
 if (len) {
-if (!prp2) {
+if (unlikely(!prp2)) {
+trace_nvme_err_invalid_prp2_missing();
 goto unmap;
 }
 if (len > n->page_size) {
@@ -128,7 +143,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg,
QEMUIOVector *iov, uint64_t prp1,
 uint64_t prp_ent = le64_to_cpu(prp_list[i]);

 if (i == n->max_prp_ents - 1 && len > n->page_size) {
-if (!prp_ent || prp_ent & (n->page_size - 1)) {
+if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
+trace_nvme_err_invalid_prplist_ent(prp_ent);
 goto unmap;
 }

@@ -140,7 +156,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg,
QEMUIOVector *iov, uint64_t prp1,
 prp_ent = le64_to_cpu(prp_list[i]);
 }

-if (!prp_ent || prp_ent & (n->page_size - 1)) {
+if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
+trace_nvme_err_invalid_prplist_ent(prp_ent);
 goto unmap;
 }

@@ -154,7 +171,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg,
QEMUIOVector *iov, uint64_t prp1,
 i++;
 }
 } else {
-if (prp2 & (n->page_size - 1)) {
+if (unlikely(prp2 & (n->page_size - 1))) {
+trace_nvme_err_invalid_prp2_align(prp2);
 goto unmap;
 }
 if (qsg->nsg) {
@@ -178,16 +196,20 @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n,
uint8_t *ptr, uint32_t len,
 QEMUIOVector iov;
 uint16_t status = NVME_SUCCESS;

+trace_nvme_dma_read(prp1, prp2);
+
 if (nvme_map_prp(, , prp1, prp2, len, n)) {
 return NVME_INVALID_FIELD | NVME_DNR;
 }
 if (qsg.nsg > 0) {
-if (dma_buf_read(ptr, len, )) {
+if (unlikely(dma_buf_read(ptr, len, ))) {
+trace_nvme_err_invalid_dma();
 status = NVME_INVALID_FIELD | NVME_DNR;
 }
 qemu_sglist_destroy();
 } else {
-if (qemu_iovec_to_buf(, 0, ptr, len) != len) {
+if (unlikely(qemu_iovec_to_buf(, 0, ptr, len) != len)) {
+trace_nvme_err_invalid_dma();
 status = NVME_INVALID_FIELD | NVME_DNR;
 }
 qemu_iovec_destroy();
@@ -273,7 +295,8 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n,
NvmeNamespace *ns, NvmeCmd *cmd,
 uint64_t aio_slba = slba << (data_shift - BDRV_SECTOR_BITS);
 uint32_t aio_nlb = nlb << (data_shift - BDRV_SECTOR_BITS);

-if (slba + nlb > ns->id_ns.nsze) {
+if (unlikely(slba + nlb > ns->id_ns.nsze)) {
+trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
 return 

[Qemu-devel] [Bug 1728639] [NEW] qemu-io crashes with SIGSEGV when did -c truncate 320000 on a image_fuzzer image

2017-10-30 Thread R.Nageswara Sastry
Public bug reported:

git is at HEAD a93ece47fd9edbd4558db24300056c9a57d3bcd4
This is on ppc64le architecture.

Re-production steps:

1. Copy the attached files named test.img to a directory
2. And customize the following command to point to the above directory and run 
the same.
# mv test.img copy.img
# qemu-io /copy.img -c "truncate 32"

from gdb:
Program terminated with signal 11, Segmentation fault.
#0  0x1000e444 in refresh_total_sectors (bs=0x1fe86f60, hint=11648) at 
block.c:723
723 if (drv->bdrv_getlength) {
Missing separate debuginfos, use: debuginfo-install 
cyrus-sasl-lib-2.1.26-21.el7.ppc64le glib2-2.50.3-3.el7.ppc64le 
glibc-2.17-196.el7.ppc64le gmp-6.0.0-15.el7.ppc64le gnutls-3.3.26-9.el7.ppc64le 
keyutils-libs-1.5.8-3.el7.ppc64le krb5-libs-1.15.1-8.el7.ppc64le 
libaio-0.3.109-13.el7.ppc64le libcom_err-1.42.9-10.el7.ppc64le 
libcurl-7.29.0-42.el7.ppc64le libffi-3.0.13-18.el7.ppc64le 
libgcc-4.8.5-16.el7_4.1.ppc64le libidn-1.28-4.el7.ppc64le 
libselinux-2.5-11.el7.ppc64le libssh2-1.4.3-10.el7_2.1.ppc64le 
libstdc++-4.8.5-16.el7_4.1.ppc64le libtasn1-4.10-1.el7.ppc64le 
nettle-2.7.1-8.el7.ppc64le nspr-4.13.1-1.0.el7_3.ppc64le 
nss-3.28.4-15.el7_4.ppc64le nss-softokn-freebl-3.28.3-8.el7_4.ppc64le 
nss-util-3.28.4-3.el7.ppc64le openldap-2.4.44-5.el7.ppc64le 
openssl-libs-1.0.2k-8.el7.ppc64le p11-kit-0.23.5-3.el7.ppc64le 
pcre-8.32-17.el7.ppc64le zlib-1.2.7-17.el7.ppc64le
(gdb) bt
#0  0x1000e444 in refresh_total_sectors (bs=0x1fe86f60, hint=11648) at 
block.c:723
#1  0x1000fa10 in bdrv_open_driver (bs=0x1fe86f60, drv=0x102036f0 
, node_name=0x0, options=0x1fe8c240, open_flags=24578,
errp=0x3fffea0fc920) at block.c:1153
#2  0x10010480 in bdrv_open_common (bs=0x1fe86f60, file=0x1fe92540, 
options=0x1fe8c240, errp=0x3fffea0fc920) at block.c:1395
#3  0x10013ac8 in bdrv_open_inherit (filename=0x3fffea0ff661 
"copy.img", reference=0x0, options=0x1fe8c240, flags=24578, parent=0x0, 
child_role=0x0,
errp=0x3fffea0fcae0) at block.c:2616
#4  0x10013e8c in bdrv_open (filename=0x3fffea0ff661 "copy.img", 
reference=0x0, options=0x0, flags=16386, errp=0x3fffea0fcae0) at block.c:2698
#5  0x1008b6d4 in blk_new_open (filename=0x3fffea0ff661 "copy.img", 
reference=0x0, options=0x0, flags=16386, errp=0x3fffea0fcae0)
at block/block-backend.c:321
#6  0x1000a6ec in openfile (name=0x3fffea0ff661 "copy.img", 
flags=16386, writethrough=true, force_share=false, opts=0x0) at qemu-io.c:81
#7  0x1000c040 in main (argc=4, argv=0x3fffea0fd208) at qemu-io.c:624
(gdb) bt full
#0  0x1000e444 in refresh_total_sectors (bs=0x1fe86f60, hint=11648) at 
block.c:723
drv = 0x0
#1  0x1000fa10 in bdrv_open_driver (bs=0x1fe86f60, drv=0x102036f0 
, node_name=0x0, options=0x1fe8c240, open_flags=24578,
errp=0x3fffea0fc920) at block.c:1153
local_err = 0x0
ret = 0
__PRETTY_FUNCTION__ = "bdrv_open_driver"
__func__ = "bdrv_open_driver"
#2  0x10010480 in bdrv_open_common (bs=0x1fe86f60, file=0x1fe92540, 
options=0x1fe8c240, errp=0x3fffea0fc920) at block.c:1395
ret = 16383
open_flags = 24578
filename = 0x1fe8e2b1 "copy.img"
driver_name = 0x1fe54810 "qcow2"
node_name = 0x0
discard = 0x0
detect_zeroes = 0x0
opts = 0x1fe93100
drv = 0x102036f0 
local_err = 0x0
__PRETTY_FUNCTION__ = "bdrv_open_common"
__func__ = "bdrv_open_common"
#3  0x10013ac8 in bdrv_open_inherit (filename=0x3fffea0ff661 
"copy.img", reference=0x0, options=0x1fe8c240, flags=24578, parent=0x0, 
child_role=0x0,
errp=0x3fffea0fcae0) at block.c:2616
ret = 512
file = 0x1fe92540
bs = 0x1fe86f60
drv = 0x102036f0 
drvname = 0x0
backing = 0x0
local_err = 0x0
snapshot_options = 0x0
snapshot_flags = 0
__PRETTY_FUNCTION__ = "bdrv_open_inherit"
__func__ = "bdrv_open_inherit"
#4  0x10013e8c in bdrv_open (filename=0x3fffea0ff661 "copy.img", 
reference=0x0, options=0x0, flags=16386, errp=0x3fffea0fcae0) at block.c:2698
No locals.
#5  0x1008b6d4 in blk_new_open (filename=0x3fffea0ff661 "copy.img", 
reference=0x0, options=0x0, flags=16386, errp=0x3fffea0fcae0)
at block/block-backend.c:321
blk = 0x1fe79410
bs = 0x0
perm = 3
#6  0x1000a6ec in openfile (name=0x3fffea0ff661 "copy.img", 
flags=16386, writethrough=true, force_share=false, opts=0x0) at qemu-io.c:81
local_err = 0x0
#7  0x1000c040 in main (argc=4, argv=0x3fffea0fd208) at qemu-io.c:624
readonly = 0
sopt = 0x101b2608 "hVc:d:f:rsnCmkt:T:U"
lopt = {{name = 0x101b26d0 "driver", has_arg = 0, flag = 0x0, val = 
104}, {name = 0x101b26d8 "help", has_arg = 0, flag = 0x0, val = 86}, {
name = 0x101b26e0 "version", has_arg = 1, flag = 0x0, val = 99}, 
{name = 0x101b26e8 "cmd", has_arg = 1, flag = 0x0, 

[Qemu-devel] [Bug 1728639] Re: qemu-io crashes with SIGSEGV when did -c truncate 320000 on a image_fuzzer image

2017-10-30 Thread R.Nageswara Sastry
** Attachment added: "test.img tarred"
   
https://bugs.launchpad.net/qemu/+bug/1728639/+attachment/5000182/+files/test.img.tar.gz

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1728639

Title:
  qemu-io crashes with SIGSEGV when did  -c truncate 32 on a
  image_fuzzer image

Status in QEMU:
  New

Bug description:
  git is at HEAD a93ece47fd9edbd4558db24300056c9a57d3bcd4
  This is on ppc64le architecture.

  Re-production steps:

  1. Copy the attached files named test.img to a directory
  2. And customize the following command to point to the above directory and 
run the same.
  # mv test.img copy.img
  # qemu-io /copy.img -c "truncate 32"

  from gdb:
  Program terminated with signal 11, Segmentation fault.
  #0  0x1000e444 in refresh_total_sectors (bs=0x1fe86f60, hint=11648) 
at block.c:723
  723   if (drv->bdrv_getlength) {
  Missing separate debuginfos, use: debuginfo-install 
cyrus-sasl-lib-2.1.26-21.el7.ppc64le glib2-2.50.3-3.el7.ppc64le 
glibc-2.17-196.el7.ppc64le gmp-6.0.0-15.el7.ppc64le gnutls-3.3.26-9.el7.ppc64le 
keyutils-libs-1.5.8-3.el7.ppc64le krb5-libs-1.15.1-8.el7.ppc64le 
libaio-0.3.109-13.el7.ppc64le libcom_err-1.42.9-10.el7.ppc64le 
libcurl-7.29.0-42.el7.ppc64le libffi-3.0.13-18.el7.ppc64le 
libgcc-4.8.5-16.el7_4.1.ppc64le libidn-1.28-4.el7.ppc64le 
libselinux-2.5-11.el7.ppc64le libssh2-1.4.3-10.el7_2.1.ppc64le 
libstdc++-4.8.5-16.el7_4.1.ppc64le libtasn1-4.10-1.el7.ppc64le 
nettle-2.7.1-8.el7.ppc64le nspr-4.13.1-1.0.el7_3.ppc64le 
nss-3.28.4-15.el7_4.ppc64le nss-softokn-freebl-3.28.3-8.el7_4.ppc64le 
nss-util-3.28.4-3.el7.ppc64le openldap-2.4.44-5.el7.ppc64le 
openssl-libs-1.0.2k-8.el7.ppc64le p11-kit-0.23.5-3.el7.ppc64le 
pcre-8.32-17.el7.ppc64le zlib-1.2.7-17.el7.ppc64le
  (gdb) bt
  #0  0x1000e444 in refresh_total_sectors (bs=0x1fe86f60, hint=11648) 
at block.c:723
  #1  0x1000fa10 in bdrv_open_driver (bs=0x1fe86f60, drv=0x102036f0 
, node_name=0x0, options=0x1fe8c240, open_flags=24578,
  errp=0x3fffea0fc920) at block.c:1153
  #2  0x10010480 in bdrv_open_common (bs=0x1fe86f60, file=0x1fe92540, 
options=0x1fe8c240, errp=0x3fffea0fc920) at block.c:1395
  #3  0x10013ac8 in bdrv_open_inherit (filename=0x3fffea0ff661 
"copy.img", reference=0x0, options=0x1fe8c240, flags=24578, parent=0x0, 
child_role=0x0,
  errp=0x3fffea0fcae0) at block.c:2616
  #4  0x10013e8c in bdrv_open (filename=0x3fffea0ff661 "copy.img", 
reference=0x0, options=0x0, flags=16386, errp=0x3fffea0fcae0) at block.c:2698
  #5  0x1008b6d4 in blk_new_open (filename=0x3fffea0ff661 "copy.img", 
reference=0x0, options=0x0, flags=16386, errp=0x3fffea0fcae0)
  at block/block-backend.c:321
  #6  0x1000a6ec in openfile (name=0x3fffea0ff661 "copy.img", 
flags=16386, writethrough=true, force_share=false, opts=0x0) at qemu-io.c:81
  #7  0x1000c040 in main (argc=4, argv=0x3fffea0fd208) at qemu-io.c:624
  (gdb) bt full
  #0  0x1000e444 in refresh_total_sectors (bs=0x1fe86f60, hint=11648) 
at block.c:723
  drv = 0x0
  #1  0x1000fa10 in bdrv_open_driver (bs=0x1fe86f60, drv=0x102036f0 
, node_name=0x0, options=0x1fe8c240, open_flags=24578,
  errp=0x3fffea0fc920) at block.c:1153
  local_err = 0x0
  ret = 0
  __PRETTY_FUNCTION__ = "bdrv_open_driver"
  __func__ = "bdrv_open_driver"
  #2  0x10010480 in bdrv_open_common (bs=0x1fe86f60, file=0x1fe92540, 
options=0x1fe8c240, errp=0x3fffea0fc920) at block.c:1395
  ret = 16383
  open_flags = 24578
  filename = 0x1fe8e2b1 "copy.img"
  driver_name = 0x1fe54810 "qcow2"
  node_name = 0x0
  discard = 0x0
  detect_zeroes = 0x0
  opts = 0x1fe93100
  drv = 0x102036f0 
  local_err = 0x0
  __PRETTY_FUNCTION__ = "bdrv_open_common"
  __func__ = "bdrv_open_common"
  #3  0x10013ac8 in bdrv_open_inherit (filename=0x3fffea0ff661 
"copy.img", reference=0x0, options=0x1fe8c240, flags=24578, parent=0x0, 
child_role=0x0,
  errp=0x3fffea0fcae0) at block.c:2616
  ret = 512
  file = 0x1fe92540
  bs = 0x1fe86f60
  drv = 0x102036f0 
  drvname = 0x0
  backing = 0x0
  local_err = 0x0
  snapshot_options = 0x0
  snapshot_flags = 0
  __PRETTY_FUNCTION__ = "bdrv_open_inherit"
  __func__ = "bdrv_open_inherit"
  #4  0x10013e8c in bdrv_open (filename=0x3fffea0ff661 "copy.img", 
reference=0x0, options=0x0, flags=16386, errp=0x3fffea0fcae0) at block.c:2698
  No locals.
  #5  0x1008b6d4 in blk_new_open (filename=0x3fffea0ff661 "copy.img", 
reference=0x0, options=0x0, flags=16386, errp=0x3fffea0fcae0)
  at block/block-backend.c:321
  blk = 0x1fe79410
  bs = 0x0
  perm = 3
  #6  0x1000a6ec in openfile (name=0x3fffea0ff661 "copy.img", 

Re: [Qemu-devel] [PATCH v3] monitor: fix dangling CPU pointer

2017-10-30 Thread Igor Mammedov
On Tue, 17 Oct 2017 10:16:22 +0200
Greg Kurz  wrote:

> If a CPU selected with the "cpu" command is hot-unplugged then "info cpus"
> causes QEMU to exit:
> 
> (qemu) device_del cpu1
> (qemu) info cpus
> qemu:qemu_cpu_kick_thread: No such process
> 
> This happens because "cpu" stores the pointer to the selected CPU into
> the monitor structure. When the CPU is hot-unplugged, we end up with a
> dangling pointer. The "info cpus" command then does:
> 
> hmp_info_cpus()
>  monitor_get_cpu_index()
>   mon_get_cpu()
>cpu_synchronize_state() <--- called with dangling pointer
> 
> This could cause a QEMU crash as well.
> 
> This patch switches the monitor to store the QOM path instead of a
> pointer to the current CPU. The path is then resolved when needed.
> If the resolution fails, we assume that the CPU was removed and the
> path is resetted to the default (ie, path of first_cpu).
> 
> Reported-by: Satheesh Rajendran 
> Suggested-by: Igor Mammedov 
> Signed-off-by: Greg Kurz 

Reviewed-by: Igor Mammedov 

> ---
> v3: - drop irrelevant paragraph about object_resolve_path() from the
>   changelog
> 
> v2: - use object_resolve_path_type()
> - add Reported-by tag
> ---
>  monitor.c |   23 ++-
>  1 file changed, 18 insertions(+), 5 deletions(-)
> 
> diff --git a/monitor.c b/monitor.c
> index fe0d1bdbb461..ce577e46e568 100644
> --- a/monitor.c
> +++ b/monitor.c
> @@ -200,7 +200,7 @@ struct Monitor {
>  
>  ReadLineState *rs;
>  MonitorQMP qmp;
> -CPUState *mon_cpu;
> +gchar *mon_cpu_path;
>  BlockCompletionFunc *password_completion_cb;
>  void *password_opaque;
>  mon_cmd_t *cmd_table;
> @@ -579,6 +579,7 @@ static void monitor_data_init(Monitor *mon)
>  
>  static void monitor_data_destroy(Monitor *mon)
>  {
> +g_free(mon->mon_cpu_path);
>  qemu_chr_fe_deinit(>chr, false);
>  if (monitor_is_qmp(mon)) {
>  json_message_parser_destroy(>qmp.parser);
> @@ -1047,20 +1048,32 @@ int monitor_set_cpu(int cpu_index)
>  if (cpu == NULL) {
>  return -1;
>  }
> -cur_mon->mon_cpu = cpu;
> +g_free(cur_mon->mon_cpu_path);
> +cur_mon->mon_cpu_path = object_get_canonical_path(OBJECT(cpu));
>  return 0;
>  }
>  
>  CPUState *mon_get_cpu(void)
>  {
> -if (!cur_mon->mon_cpu) {
> +CPUState *cpu;
> +
> +if (cur_mon->mon_cpu_path) {
> +cpu = (CPUState *) object_resolve_path_type(cur_mon->mon_cpu_path,
> +TYPE_CPU, NULL);
> +if (!cpu) {
> +g_free(cur_mon->mon_cpu_path);
> +cur_mon->mon_cpu_path = NULL;
> +}
> +}
> +if (!cur_mon->mon_cpu_path) {
>  if (!first_cpu) {
>  return NULL;
>  }
>  monitor_set_cpu(first_cpu->cpu_index);
> +cpu = first_cpu;
>  }
> -cpu_synchronize_state(cur_mon->mon_cpu);
> -return cur_mon->mon_cpu;
> +cpu_synchronize_state(cpu);
> +return cpu;
>  }
>  
>  CPUArchState *mon_get_cpu_env(void)
> 
> 




[Qemu-devel] [Bug 1728635] Re: qemu-io crashes with SIGSEGV when did -c aio_write 9233408 28160 on a image_fuzzer image

2017-10-30 Thread R.Nageswara Sastry
** Attachment added: "test.img tarred"
   
https://bugs.launchpad.net/qemu/+bug/1728635/+attachment/5000176/+files/test.img.tar.gz

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1728635

Title:
  qemu-io crashes with SIGSEGV when did  -c aio_write 9233408 28160 on a
  image_fuzzer image

Status in QEMU:
  New

Bug description:
  git is at HEAD a93ece47fd9edbd4558db24300056c9a57d3bcd4
  This is on ppc64le architecture.

  Re-production steps:

  1. Copy the attached file named test.img to a directory
  2. And customize the following command to point to the above directory and 
run the same.
  # cp test.img copy.img
  # qemu/qemu-io /copy.img -c "aio_write 9233408 28160"

  from gdb:
  Program terminated with signal 11, Segmentation fault.
  #0  0x3fffa0077644 in __memcpy_power7 () from /lib64/libc.so.6
  Missing separate debuginfos, use: debuginfo-install 
cyrus-sasl-lib-2.1.26-21.el7.ppc64le glib2-2.50.3-3.el7.ppc64le 
glibc-2.17-196.el7.ppc64le gmp-6.0.0-15.el7.ppc64le gnutls-3.3.26-9.el7.ppc64le 
keyutils-libs-1.5.8-3.el7.ppc64le krb5-libs-1.15.1-8.el7.ppc64le 
libaio-0.3.109-13.el7.ppc64le libcom_err-1.42.9-10.el7.ppc64le 
libcurl-7.29.0-42.el7.ppc64le libffi-3.0.13-18.el7.ppc64le 
libgcc-4.8.5-16.el7_4.1.ppc64le libidn-1.28-4.el7.ppc64le 
libselinux-2.5-11.el7.ppc64le libssh2-1.4.3-10.el7_2.1.ppc64le 
libstdc++-4.8.5-16.el7_4.1.ppc64le libtasn1-4.10-1.el7.ppc64le 
nettle-2.7.1-8.el7.ppc64le nspr-4.13.1-1.0.el7_3.ppc64le 
nss-3.28.4-15.el7_4.ppc64le nss-softokn-freebl-3.28.3-8.el7_4.ppc64le 
nss-util-3.28.4-3.el7.ppc64le openldap-2.4.44-5.el7.ppc64le 
openssl-libs-1.0.2k-8.el7.ppc64le p11-kit-0.23.5-3.el7.ppc64le 
pcre-8.32-17.el7.ppc64le zlib-1.2.7-17.el7.ppc64le
  (gdb) bt
  #0  0x3fffa0077644 in __memcpy_power7 () from /lib64/libc.so.6
  #1  0x10056738 in qcow2_refcount_area (bs=0x25f56f60, 
start_offset=137438953472, additional_clusters=0, exact_size=false, 
new_refblock_index=0,
  new_refblock_offset=524288) at block/qcow2-refcount.c:573
  #2  0x10056374 in alloc_refcount_block (bs=0x25f56f60, 
cluster_index=0, refcount_block=0x3fff9dadf838) at block/qcow2-refcount.c:479
  #3  0x10057520 in update_refcount (bs=0x25f56f60, offset=0, 
length=524288, addend=1, decrease=false, type=QCOW2_DISCARD_NEVER)
  at block/qcow2-refcount.c:834
  #4  0x10057c24 in qcow2_alloc_clusters (bs=0x25f56f60, size=524288) 
at block/qcow2-refcount.c:996
  #5  0x10063684 in do_alloc_cluster_offset (bs=0x25f56f60, 
guest_offset=9233408, host_offset=0x3fff9dadf9e0, nb_clusters=0x3fff9dadf9d8)
  at block/qcow2-cluster.c:1213
  #6  0x10063afc in handle_alloc (bs=0x25f56f60, guest_offset=9233408, 
host_offset=0x3fff9dadfab0, bytes=0x3fff9dadfab8, m=0x3fff9dadfb60)
  at block/qcow2-cluster.c:1324
  #7  0x10064178 in qcow2_alloc_cluster_offset (bs=0x25f56f60, 
offset=9233408, bytes=0x3fff9dadfb4c, host_offset=0x3fff9dadfb58, 
m=0x3fff9dadfb60)
  at block/qcow2-cluster.c:1511
  #8  0x1004d3f4 in qcow2_co_pwritev (bs=0x25f56f60, offset=9233408, 
bytes=28160, qiov=0x25f6fa08, flags=0) at block/qcow2.c:1919
  #9  0x100a9648 in bdrv_driver_pwritev (bs=0x25f56f60, offset=9233408, 
bytes=28160, qiov=0x25f6fa08, flags=16) at block/io.c:898
  #10 0x100ab630 in bdrv_aligned_pwritev (child=0x25f627f0, 
req=0x3fff9dadfdd8, offset=9233408, bytes=28160, align=1, qiov=0x25f6fa08, 
flags=16)
  at block/io.c:1440
  #11 0x100ac4ac in bdrv_co_pwritev (child=0x25f627f0, offset=9233408, 
bytes=28160, qiov=0x25f6fa08, flags=BDRV_REQ_FUA) at block/io.c:1691
  #12 0x1008da0c in blk_co_pwritev (blk=0x25f49410, offset=9233408, 
bytes=28160, qiov=0x25f6fa08, flags=BDRV_REQ_FUA) at block/block-backend.c:1085
  #13 0x1008e718 in blk_aio_write_entry (opaque=0x25f6fa70) at 
block/block-backend.c:1276
  #14 0x101aa444 in coroutine_trampoline (i0=636902032, i1=0) at 
util/coroutine-ucontext.c:79
  #15 0x3fffa0022b9c in makecontext () from /lib64/libc.so.6
  #16 0x in ?? ()
  (gdb) bt full
  #0  0x3fffa0077644 in __memcpy_power7 () from /lib64/libc.so.6
  No symbol table info available.
  #1  0x10056738 in qcow2_refcount_area (bs=0x25f56f60, 
start_offset=137438953472, additional_clusters=0, exact_size=false, 
new_refblock_index=0,
  new_refblock_offset=524288) at block/qcow2-refcount.c:573
  s = 0x25f63210
  total_refblock_count_u64 = 2
  additional_refblock_count = 0
  total_refblock_count = 2
  table_size = 65536
  area_reftable_index = 1
  table_clusters = 1
  i = 0
  table_offset = 268870620
  block_offset = 70367094634128
  end_offset = 636891296
  ret = 636786432
  new_table = 0x3fff9d940010
  __PRETTY_FUNCTION__ = "qcow2_refcount_area"
  data = {d64 = 636841824, 

[Qemu-devel] [Bug 1728635] [NEW] qemu-io crashes with SIGSEGV when did -c aio_write 9233408 28160 on a image_fuzzer image

2017-10-30 Thread R.Nageswara Sastry
Public bug reported:

git is at HEAD a93ece47fd9edbd4558db24300056c9a57d3bcd4
This is on ppc64le architecture.

Re-production steps:

1. Copy the attached file named test.img to a directory
2. And customize the following command to point to the above directory and run 
the same.
# cp test.img copy.img
# qemu/qemu-io /copy.img -c "aio_write 9233408 28160"

from gdb:
Program terminated with signal 11, Segmentation fault.
#0  0x3fffa0077644 in __memcpy_power7 () from /lib64/libc.so.6
Missing separate debuginfos, use: debuginfo-install 
cyrus-sasl-lib-2.1.26-21.el7.ppc64le glib2-2.50.3-3.el7.ppc64le 
glibc-2.17-196.el7.ppc64le gmp-6.0.0-15.el7.ppc64le gnutls-3.3.26-9.el7.ppc64le 
keyutils-libs-1.5.8-3.el7.ppc64le krb5-libs-1.15.1-8.el7.ppc64le 
libaio-0.3.109-13.el7.ppc64le libcom_err-1.42.9-10.el7.ppc64le 
libcurl-7.29.0-42.el7.ppc64le libffi-3.0.13-18.el7.ppc64le 
libgcc-4.8.5-16.el7_4.1.ppc64le libidn-1.28-4.el7.ppc64le 
libselinux-2.5-11.el7.ppc64le libssh2-1.4.3-10.el7_2.1.ppc64le 
libstdc++-4.8.5-16.el7_4.1.ppc64le libtasn1-4.10-1.el7.ppc64le 
nettle-2.7.1-8.el7.ppc64le nspr-4.13.1-1.0.el7_3.ppc64le 
nss-3.28.4-15.el7_4.ppc64le nss-softokn-freebl-3.28.3-8.el7_4.ppc64le 
nss-util-3.28.4-3.el7.ppc64le openldap-2.4.44-5.el7.ppc64le 
openssl-libs-1.0.2k-8.el7.ppc64le p11-kit-0.23.5-3.el7.ppc64le 
pcre-8.32-17.el7.ppc64le zlib-1.2.7-17.el7.ppc64le
(gdb) bt
#0  0x3fffa0077644 in __memcpy_power7 () from /lib64/libc.so.6
#1  0x10056738 in qcow2_refcount_area (bs=0x25f56f60, 
start_offset=137438953472, additional_clusters=0, exact_size=false, 
new_refblock_index=0,
new_refblock_offset=524288) at block/qcow2-refcount.c:573
#2  0x10056374 in alloc_refcount_block (bs=0x25f56f60, cluster_index=0, 
refcount_block=0x3fff9dadf838) at block/qcow2-refcount.c:479
#3  0x10057520 in update_refcount (bs=0x25f56f60, offset=0, 
length=524288, addend=1, decrease=false, type=QCOW2_DISCARD_NEVER)
at block/qcow2-refcount.c:834
#4  0x10057c24 in qcow2_alloc_clusters (bs=0x25f56f60, size=524288) at 
block/qcow2-refcount.c:996
#5  0x10063684 in do_alloc_cluster_offset (bs=0x25f56f60, 
guest_offset=9233408, host_offset=0x3fff9dadf9e0, nb_clusters=0x3fff9dadf9d8)
at block/qcow2-cluster.c:1213
#6  0x10063afc in handle_alloc (bs=0x25f56f60, guest_offset=9233408, 
host_offset=0x3fff9dadfab0, bytes=0x3fff9dadfab8, m=0x3fff9dadfb60)
at block/qcow2-cluster.c:1324
#7  0x10064178 in qcow2_alloc_cluster_offset (bs=0x25f56f60, 
offset=9233408, bytes=0x3fff9dadfb4c, host_offset=0x3fff9dadfb58, 
m=0x3fff9dadfb60)
at block/qcow2-cluster.c:1511
#8  0x1004d3f4 in qcow2_co_pwritev (bs=0x25f56f60, offset=9233408, 
bytes=28160, qiov=0x25f6fa08, flags=0) at block/qcow2.c:1919
#9  0x100a9648 in bdrv_driver_pwritev (bs=0x25f56f60, offset=9233408, 
bytes=28160, qiov=0x25f6fa08, flags=16) at block/io.c:898
#10 0x100ab630 in bdrv_aligned_pwritev (child=0x25f627f0, 
req=0x3fff9dadfdd8, offset=9233408, bytes=28160, align=1, qiov=0x25f6fa08, 
flags=16)
at block/io.c:1440
#11 0x100ac4ac in bdrv_co_pwritev (child=0x25f627f0, offset=9233408, 
bytes=28160, qiov=0x25f6fa08, flags=BDRV_REQ_FUA) at block/io.c:1691
#12 0x1008da0c in blk_co_pwritev (blk=0x25f49410, offset=9233408, 
bytes=28160, qiov=0x25f6fa08, flags=BDRV_REQ_FUA) at block/block-backend.c:1085
#13 0x1008e718 in blk_aio_write_entry (opaque=0x25f6fa70) at 
block/block-backend.c:1276
#14 0x101aa444 in coroutine_trampoline (i0=636902032, i1=0) at 
util/coroutine-ucontext.c:79
#15 0x3fffa0022b9c in makecontext () from /lib64/libc.so.6
#16 0x in ?? ()
(gdb) bt full
#0  0x3fffa0077644 in __memcpy_power7 () from /lib64/libc.so.6
No symbol table info available.
#1  0x10056738 in qcow2_refcount_area (bs=0x25f56f60, 
start_offset=137438953472, additional_clusters=0, exact_size=false, 
new_refblock_index=0,
new_refblock_offset=524288) at block/qcow2-refcount.c:573
s = 0x25f63210
total_refblock_count_u64 = 2
additional_refblock_count = 0
total_refblock_count = 2
table_size = 65536
area_reftable_index = 1
table_clusters = 1
i = 0
table_offset = 268870620
block_offset = 70367094634128
end_offset = 636891296
ret = 636786432
new_table = 0x3fff9d940010
__PRETTY_FUNCTION__ = "qcow2_refcount_area"
data = {d64 = 636841824, d32 = 1}
old_table_offset = 70367094634552
old_table_size = 636786432
#2  0x10056374 in alloc_refcount_block (bs=0x25f56f60, cluster_index=0, 
refcount_block=0x3fff9dadf838) at block/qcow2-refcount.c:479
s = 0x25f63210
refcount_table_index = 0
ret = 0
new_block = 524288
blocks_used = 1
meta_offset = 137438953472
#3  0x10057520 in update_refcount (bs=0x25f56f60, offset=0, 
length=524288, addend=1, decrease=false, 

Re: [Qemu-devel] [RFC 00/19] KVM: s390/crypto/vfio: guest dedicated crypto adapters

2017-10-30 Thread Tony Krowiak

On 10/30/2017 04:57 AM, Christian Borntraeger wrote:

adding qemu devel and add Daniel and Erik from libvirt to keep them in the loop.

On 10/29/2017 12:11 PM, Cornelia Huck wrote:

On Fri, 13 Oct 2017 13:38:45 -0400
Tony Krowiak  wrote:


Tony Krowiak (19):
   KVM: s390: SIE considerations for AP Queue virtualization
   KVM: s390: refactor crypto initialization
   s390/zcrypt: new AP matrix bus
   s390/zcrypt: create an AP matrix device on the AP matrix bus
   s390/zcrypt: base implementation of AP matrix device driver
   s390/zcrypt: register matrix device with VFIO mediated device
 framework
   KVM: s390: introduce AP matrix configuration interface
   s390/zcrypt: support for assigning adapters to matrix mdev
   s390/zcrypt: validate adapter assignment
   s390/zcrypt: sysfs interfaces supporting AP domain assignment
   s390/zcrypt: validate domain assignment
   s390/zcrypt: sysfs support for control domain assignment
   s390/zcrypt: validate control domain assignment
   KVM: s390: Connect the AP mediated matrix device to KVM
   s390/zcrypt: introduce ioctl access to VFIO AP Matrix driver
   KVM: s390: interface to configure KVM guest's AP matrix
   KVM: s390: validate input to AP matrix config interface
   KVM: s390: New ioctl to configure KVM guest's AP matrix
   s390/facilities: enable AP facilities needed by guest

I'll try to summarize all of this in my own words, both to make sure I
understand the design correctly and to give others a different view on
this.

[I'm completely disregarding control domains here.]

On s390, we have cryptographic coprocessor cards, which are modeled on
Linux as devices on the AP bus. There's also a concept called domains,
which means an individual queue of a crypto device is basically a
(card,domain) tuple. We model this something like the following
(assuming we have access to cards 3 and 4 and domains 1 and 2):

AP -> card3 -> queue (3,1)
 -> queue (3,2)
-> card4 -> queue (4,1)
 -> queue (4,2)

(The AP bus is a bit different for backwards compat.)

If we want to virtualize this, we can use a feature provided by the
hardware. We basically attach a satellite control block to our main
hardware virtualization control block and the hardware takes care of
(mostly) everything.

For this control block, we don't specify explicit tuples, but a list of
cards and a list of domains. The guest will get access to the cross
product.

Because of this, we need to take care that the lists provided to
different guests don't overlap; i.e., we need to enforce sane
configurations. Otherwise, one guest may get access to things like
secret keys for another guest.

The idea of this patch set is to introduce a new device, the matrix
device. This matrix device hangs off a different root and acts as the
node where mdev devices hang off.

If you now want to give the tuples (4,1) and (4,2), you need to do the
following:

- Unbind the (4,1) and (4,2) tuples from their ap bus driver.
- Bind the (4,1) and (4,2) tuples to the ap matrix driver.
- Create the mediated device.
- Assign card 4 and domains 1 and 2.

QEMU will now simply consume the mediated device and things should work.


This is probably the shortest possible summary I can imagine.
Tony can you double check if it matches your understanding as well?


Yes, this is a concise summary of the patch set.



  1   2   >