date:20150924

Re: [Qemu-devel] [PATCH v11 02/12] init/cleanup of netfilter object

2015-09-24 Thread Jason Wang



On 09/24/2015 07:52 PM, Markus Armbruster wrote:
> Yang Hongyang  writes:
>
>> On 09/24/2015 04:41 PM, Markus Armbruster wrote:
>>> Yang Hongyang  writes:
>>>
 Add a netfilter object based on QOM.

 A netfilter is attached to a netdev, captures all network packets
 that pass through the netdev. When we delete the netdev, we also
 delete the netfilter object attached to it, because if the netdev is
 removed, the filter which attached to it is useless.

 QTAILQ_ENTRY next used by netdev, filter belongs to the specific netdev is
 in this queue.
>>> I don't get this paragraph.  Not sure it's needed.
>>>
 Also init delayed object after net_init_clients, because netfilters need
 to be initialized after net clients initialized.
>>> A paragraph starting with "Also" in a commit message is a pretty good
>>> sign the patch should be split :)
>>>
 Signed-off-by: Yang Hongyang 
 ---
 v11: no need to free nf->netdev_id, it will be auto freeed while object 
 deleted
   remove global_list net_filters, will add back when needed
 v10: use QOM for netfilter
 v9: use flat union instead of simple union in QAPI schema
 v8: include vhost_net header
 v7: add check for vhost
  fix error propagate bug
 v6: add multiqueue support (net_filter_init1)
 v5: remove model from NetFilterState
  add a sent_cb param to receive_iov API
 ---
   include/net/filter.h|  60 +
   include/net/net.h   |   1 +
   include/qemu/typedefs.h |   1 +
   net/Makefile.objs   |   1 +
   net/filter.c| 138 
 
   net/net.c   |   7 +++
   qapi-schema.json|  18 +++
   vl.c|  13 ++---
   8 files changed, 233 insertions(+), 6 deletions(-)
   create mode 100644 include/net/filter.h
   create mode 100644 net/filter.c

 diff --git a/include/net/filter.h b/include/net/filter.h
 new file mode 100644
 index 000..226f2f7
 --- /dev/null
 +++ b/include/net/filter.h
 @@ -0,0 +1,60 @@
 +/*
 + * Copyright (c) 2015 FUJITSU LIMITED
 + * Author: Yang Hongyang 
 + *
 + * This work is licensed under the terms of the GNU GPL, version 2 or
 + * later.  See the COPYING file in the top-level directory.
 + */
 +
 +#ifndef QEMU_NET_FILTER_H
 +#define QEMU_NET_FILTER_H
 +
 +#include "qom/object.h"
 +#include "qemu-common.h"
 +#include "qemu/typedefs.h"
 +#include "net/queue.h"
 +
 +#define TYPE_NETFILTER "netfilter"
 +#define NETFILTER(obj) \
 +OBJECT_CHECK(NetFilterState, (obj), TYPE_NETFILTER)
 +#define NETFILTER_GET_CLASS(obj) \
 +OBJECT_GET_CLASS(NetFilterClass, (obj), TYPE_NETFILTER)
 +#define NETFILTER_CLASS(klass) \
 +OBJECT_CLASS_CHECK(NetFilterClass, (klass), TYPE_NETFILTER)
 +
 +typedef void (FilterSetup) (NetFilterState *nf, Error **errp);
 +typedef void (FilterCleanup) (NetFilterState *nf);
 +/*
 + * Return:
 + *   0: finished handling the packet, we should continue
 + *   size: filter stolen this packet, we stop pass this packet further
 + */
 +typedef ssize_t (FilterReceiveIOV)(NetFilterState *nc,
 +   NetClientState *sender,
 +   unsigned flags,
 +   const struct iovec *iov,
 +   int iovcnt,
 +   NetPacketSent *sent_cb);
 +
 +struct NetFilterClass {
 +ObjectClass parent_class;
 +
 +FilterSetup *setup;
 +FilterCleanup *cleanup;
 +FilterReceiveIOV *receive_iov;
 +};
 +typedef struct NetFilterClass NetFilterClass;
>>> Not splitting the declaration is more concise:
>>>
>>>  typedef struct {
>>>  ObjectClass parent_class;
>>>  FilterSetup *setup;
>>>  FilterCleanup *cleanup;
>>>  FilterReceiveIOV *receive_iov;
>>>  } NetFilterClass;
>>>
>>> Are any of the methods optional?  If yes, please add suitable comments.
>>>
 +
 +
 +struct NetFilterState {
 +/* private */
 +Object parent;
 +
 +/* protected */
 +char *netdev_id;
 +NetClientState *netdev;
 +NetFilterChain chain;
 +QTAILQ_ENTRY(NetFilterState) next;
 +};
 +
 +#endif /* QEMU_NET_FILTER_H */
 diff --git a/include/net/net.h b/include/net/net.h
 index 6a6cbef..36e5fab 100644
 --- a/include/net/net.h
 +++ b/include/net/net.h
 @@ -92,6 +92,7 @@ struct NetClientState {
   NetClientDestructor *destructor;
   unsigned int queue_index;
   unsigned rxfilter_notify_enabled:1;
 +QTAILQ_HEAD(, NetFilterState) filters;
   };

   typedef struct NICState {
 diff --gi

Re: [Qemu-devel] [PULL 17/22] intel_iommu: Add support for translation for devices behind bridges

2015-09-24 Thread Michael S. Tsirkin

On Thu, Sep 24, 2015 at 04:20:53PM +0300, Michael S. Tsirkin wrote:
> From: Knut Omang 
> 
> - Use a hash table indexed on bus pointers to store information about buses
>   instead of using the bus numbers.
>   Bus pointers are stored in a new VTDBus struct together with the vector
>   of device address space pointers indexed by devfn.
> - The bus number is still used for lookup for selective SID based invalidate,
>   in which case the bus number is lazily resolved from the bus hash table and
>   cached in a separate index.
> 
> Signed-off-by: Knut Omang 
> Reviewed-by: Michael S. Tsirkin 
> Signed-off-by: Michael S. Tsirkin 

This fails to build with our minimal glib version:
Undefined symbols for architecture x86_64:
  "_g_hash_table_add", referenced from:
  _vtd_find_add_as in intel_iommu.o
  SETFILE lm32-softmmu/qemu-system-lm32

g_hash_table_add only appeared in glib 2.32; our minimum
is 2.22.

Dropped this patch for now, please fix and repost.

> ---
>  include/hw/i386/intel_iommu.h | 16 +++-
>  hw/i386/intel_iommu.c | 90 
> +++
>  hw/pci-host/q35.c | 25 ++--
>  3 files changed, 91 insertions(+), 40 deletions(-)
> 
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index e321ee4..5dbadb7 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -49,6 +49,7 @@ typedef struct VTDContextCacheEntry VTDContextCacheEntry;
>  typedef struct IntelIOMMUState IntelIOMMUState;
>  typedef struct VTDAddressSpace VTDAddressSpace;
>  typedef struct VTDIOTLBEntry VTDIOTLBEntry;
> +typedef struct VTDBus VTDBus;
>  
>  /* Context-Entry */
>  struct VTDContextEntry {
> @@ -65,7 +66,7 @@ struct VTDContextCacheEntry {
>  };
>  
>  struct VTDAddressSpace {
> -uint8_t bus_num;
> +PCIBus *bus;
>  uint8_t devfn;
>  AddressSpace as;
>  MemoryRegion iommu;
> @@ -73,6 +74,11 @@ struct VTDAddressSpace {
>  VTDContextCacheEntry context_cache_entry;
>  };
>  
> +struct VTDBus {
> +PCIBus* bus; /* A reference to the bus to provide 
> translation for */
> +VTDAddressSpace *dev_as[0];  /* A table of VTDAddressSpace objects 
> indexed by devfn */
> +};
> +
>  struct VTDIOTLBEntry {
>  uint64_t gfn;
>  uint16_t domain_id;
> @@ -114,7 +120,13 @@ struct IntelIOMMUState {
>  GHashTable *iotlb;  /* IOTLB */
>  
>  MemoryRegionIOMMUOps iommu_ops;
> -VTDAddressSpace **address_spaces[VTD_PCI_BUS_MAX];
> +GHashTable *vtd_as_by_busptr;   /* VTDBus objects indexed by PCIBus* 
> reference */
> +VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects indexed by 
> bus number */
>  };
>  
> +/* Find the VTD Address space associated with the given bus pointer,
> + * create a new one if none exists
> + */
> +VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn);
> +
>  #endif
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 08055a8..da67c36 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -22,6 +22,7 @@
>  #include "hw/sysbus.h"
>  #include "exec/address-spaces.h"
>  #include "intel_iommu_internal.h"
> +#include "hw/pci/pci.h"
>  
>  /*#define DEBUG_INTEL_IOMMU*/
>  #ifdef DEBUG_INTEL_IOMMU
> @@ -166,19 +167,17 @@ static gboolean vtd_hash_remove_by_page(gpointer key, 
> gpointer value,
>   */
>  static void vtd_reset_context_cache(IntelIOMMUState *s)
>  {
> -VTDAddressSpace **pvtd_as;
>  VTDAddressSpace *vtd_as;
> -uint32_t bus_it;
> +VTDBus *vtd_bus;
> +GHashTableIter bus_it;
>  uint32_t devfn_it;
>  
> +g_hash_table_iter_init(&bus_it, s->vtd_as_by_busptr);
> +
>  VTD_DPRINTF(CACHE, "global context_cache_gen=1");
> -for (bus_it = 0; bus_it < VTD_PCI_BUS_MAX; ++bus_it) {
> -pvtd_as = s->address_spaces[bus_it];
> -if (!pvtd_as) {
> -continue;
> -}
> +while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) {
>  for (devfn_it = 0; devfn_it < VTD_PCI_DEVFN_MAX; ++devfn_it) {
> -vtd_as = pvtd_as[devfn_it];
> +vtd_as = vtd_bus->dev_as[devfn_it];
>  if (!vtd_as) {
>  continue;
>  }
> @@ -754,12 +753,13 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr)
>   * @is_write: The access is a write operation
>   * @entry: IOMMUTLBEntry that contain the addr to be translated and result
>   */
> -static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, uint8_t bus_num,
> +static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
> uint8_t devfn, hwaddr addr, bool is_write,
> IOMMUTLBEntry *entry)
>  {
>  IntelIOMMUState *s = vtd_as->iommu_state;
>  VTDContextEntry ce;
> +uint8_t bus_num = pci_bus_num(bus);
>  VTDContextCacheEntry *cc_entry = &vtd_as->context_cache_entry;
>  uint64_t slpte;
>  uint32_t level

[Qemu-devel] [PULL v3] virtio,pc features, fixes

2015-09-24 Thread Michael S. Tsirkin

Dropped the offending iommu patch.

The following changes since commit fefa4b128de06cec6d513f00ee61e8208aed4a87:

  Merge remote-tracking branch 'remotes/awilliam/tags/vfio-update-20150923.0' 
into staging (2015-09-23 21:39:46 +0100)

are available in the git repository at:

  git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream

for you to fetch changes up to f178bc6b68e6c65cda7354ec4a671860b3123f7a:

  MAINTAINERS: add more devices to the PCI section (2015-09-25 09:40:04 +0300)


virtio,pc features, fixes

New features:
vhost-user multiqueue support
virtio-ccw virtio 1 support

Signed-off-by: Michael S. Tsirkin 


Changchun Ouyang (2):
  vhost-user: add multiple queue support
  vhost-user: add a new message to disable/enable a specific virt queue.

Cornelia Huck (4):
  virtio: ring sizes vs. reset
  virtio-ccw: support ring size changes
  virtio-ccw: feature bits > 31 handling
  virtio-ccw: enable virtio-1

Eduardo Habkost (3):
  q35: Move options common to all classes to pc_q35_machine_options()
  q35: Move options common to all classes to pc_i440fx_machine_options()
  pc: Introduce pc-*-2.5 machine classes

Jason Wang (1):
  virtio-net: unbreak self announcement and guest offloads after migration

Michael S. Tsirkin (1):
  vhost-user: add protocol feature negotiation

Paolo Bonzini (2):
  MAINTAINERS: add more devices to the PC section
  MAINTAINERS: add more devices to the PCI section

Pierre Morel (1):
  virtio: right size for virtio_queue_get_avail_size

Yuanhan Liu (4):
  vhost-user: use VHOST_USER_XXX macro for switch statement
  vhost: rename VHOST_RESET_OWNER to VHOST_RESET_DEVICE
  vhost-user: add VHOST_USER_GET_QUEUE_NUM message
  vhost: introduce vhost_backend_get_vq_index method

 qapi-schema.json  |   6 +-
 hw/s390x/virtio-ccw.h |   6 +-
 include/hw/compat.h   |   3 +
 include/hw/i386/pc.h  |   4 ++
 include/hw/virtio/vhost-backend.h |   4 ++
 include/hw/virtio/vhost.h |   2 +
 include/net/vhost_net.h   |   3 +
 linux-headers/linux/vhost.h   |   2 +-
 hw/i386/pc_piix.c |  22 --
 hw/i386/pc_q35.c  |  22 --
 hw/net/vhost_net.c|  44 ++--
 hw/net/virtio-net.c   |  48 -
 hw/s390x/s390-virtio-ccw.c|  20 ++
 hw/s390x/virtio-ccw.c |  64 -
 hw/virtio/vhost-backend.c |  10 ++-
 hw/virtio/vhost-user.c| 139 +++--
 hw/virtio/vhost.c |  20 +++---
 hw/virtio/virtio.c|  66 +-
 net/vhost-user.c  | 141 +-
 tests/vhost-user-test.c   |   2 +-
 MAINTAINERS   |  21 ++
 docs/specs/vhost-user.txt |  77 -
 qemu-options.hx   |   5 +-
 23 files changed, 590 insertions(+), 141 deletions(-)

Re: [Qemu-devel] [PATCH v11 01/12] qmp: delete qemu opts when delete an object

2015-09-24 Thread Jason Wang



On 09/24/2015 04:35 PM, Yang Hongyang wrote:
> On 09/24/2015 03:43 PM, Markus Armbruster wrote:
>> This has finally reached the front of my review queue.  I apologize for
>> the lng delay.
>>
>> Copying Paolo for another pair of eyeballs (he wrote this code).
>>
> [...]
>>> +
>>> +opts = qemu_opts_find(qemu_find_opts_err("object", NULL), id);
>>> +qemu_opts_del(opts);
>>
>> qemu_find_opts_err("object", &error_abort) please, because when it
>> fails, we want to die right away, not when the null pointer it returns
>> gets dereferenced.
>
> Thanks for the review.
> Jason, do you want me to propose a fix on top of this series or simply
> drop
> this for now because this patch is an independent bug fix and won't
> affect the
> other filter patch series.

Will drop this patch from my tree.

Thanks

>
>>
>> Same sloppiness in netdev_del_completion() and qmp_netdev_del(), not
>> your patch's fault.
>>
>> Elsewhere, we store the QemuOpts in the object just so we can delete it:
>> DeviceState, DriveInfo.  Paolo, what do you think?
>
> I don't get it. Currently, only objects created at the beginning through
> QEMU command line will be stored in the QemuOpts, objects that created
> with object_add won't stored in QemuOpts. Do you mean for DeviceState,
> DriveInfo they store there QemuOpts explicity so that they can delete it?
> Why don't we just delete it from objects directly instead?
>
>>
>>>   }
>>>
>>>   MemoryDeviceInfoList *qmp_query_memory_devices(Error **errp)
>> .
>>
>

[Qemu-devel] [PATCH v10 05/10] docs: block replication's description

2015-09-24 Thread Wen Congyang

Signed-off-by: Wen Congyang 
Signed-off-by: Yang Hongyang 
Signed-off-by: zhanghailiang 
Signed-off-by: Gonglei 
---
 docs/block-replication.txt | 259 +
 1 file changed, 259 insertions(+)
 create mode 100644 docs/block-replication.txt

diff --git a/docs/block-replication.txt b/docs/block-replication.txt
new file mode 100644
index 000..eab62df
--- /dev/null
+++ b/docs/block-replication.txt
@@ -0,0 +1,259 @@
+Block replication
+
+Copyright Fujitsu, Corp. 2015
+Copyright (c) 2015 Intel Corporation
+Copyright (c) 2015 HUAWEI TECHNOLOGIES CO., LTD.
+
+This work is licensed under the terms of the GNU GPL, version 2 or later.
+See the COPYING file in the top-level directory.
+
+Block replication is used for continuous checkpoints. It is designed
+for COLO (COurse-grain LOck-stepping) where the Secondary VM is running.
+It can also be applied for FT/HA (Fault-tolerance/High Assurance) scenario,
+where the Secondary VM is not running.
+
+This document gives an overview of block replication's design.
+
+== Background ==
+High availability solutions such as micro checkpoint and COLO will do
+consecutive checkpoints. The VM state of Primary VM and Secondary VM is
+identical right after a VM checkpoint, but becomes different as the VM
+executes till the next checkpoint. To support disk contents checkpoint,
+the modified disk contents in the Secondary VM must be buffered, and are
+only dropped at next checkpoint time. To reduce the network transportation
+effort at the time of checkpoint, the disk modification operations of
+Primary disk are asynchronously forwarded to the Secondary node.
+
+== Workflow ==
+The following is the image of block replication workflow:
+
++--+++
+|Primary Write Requests||Secondary Write Requests|
++--+++
+  |   |
+  |  (4)
+  |   V
+  |  /-\
+  |  Copy and Forward| |
+  |-(1)--+   | Disk Buffer |
+  |  |   | |
+  | (3)  \-/
+  | speculative  ^
+  |write through(2)
+  |  |   |
+  V  V   |
+   +--+   ++
+   | Primary Disk |   | Secondary Disk |
+   +--+   ++
+
+1) Primary write requests will be copied and forwarded to Secondary
+   QEMU.
+2) Before Primary write requests are written to Secondary disk, the
+   original sector content will be read from Secondary disk and
+   buffered in the Disk buffer, but it will not overwrite the existing
+   sector content (it could be from either "Secondary Write Requests" or
+   previous COW of "Primary Write Requests") in the Disk buffer.
+3) Primary write requests will be written to Secondary disk.
+4) Secondary write requests will be buffered in the Disk buffer and it
+   will overwrite the existing sector content in the buffer.
+
+== Architecture ==
+We are going to implement block replication from many basic
+blocks that are already in QEMU.
+
+ virtio-blk   ||
+ ^||.--
+ |||| Secondary
+1 Quorum  ||'--
+ /  \ ||
+/\||
+   Primary2 filter
+ disk ^
 virtio-blk
+  |
  ^
+3 NBD  --->  3 NBD 
  |
+client|| server
  2 filter
+  ||^  
  ^
+. |||  
  |
+Primary | ||  Secondary disk <- hidden-disk 5 
<- active-disk 4
+' |||  backing^   backing
+  ||| |
+  ||| |
+  ||'-'
+  ||   drive-backup sync=none 6
+
+1) The disk on the

[Qemu-devel] [PATCH v10 06/10] Add new block driver interfaces to control block replication

2015-09-24 Thread Wen Congyang

Signed-off-by: Wen Congyang 
Signed-off-by: zhanghailiang 
Signed-off-by: Gonglei 
Cc: Luiz Capitulino 
Cc: Michael Roth 
Reviewed-by: Paolo Bonzini 
---
 block.c   | 43 +++
 include/block/block.h |  5 +
 include/block/block_int.h | 14 ++
 qapi/block-core.json  | 13 +
 4 files changed, 75 insertions(+)

diff --git a/block.c b/block.c
index f9a985c..5cb916b 100644
--- a/block.c
+++ b/block.c
@@ -4253,3 +4253,46 @@ void bdrv_del_child(BlockDriverState *parent_bs, 
BlockDriverState *child_bs,
 
 parent_bs->drv->bdrv_del_child(parent_bs, child_bs, errp);
 }
+
+void bdrv_start_replication(BlockDriverState *bs, ReplicationMode mode,
+Error **errp)
+{
+BlockDriver *drv = bs->drv;
+
+if (drv && drv->bdrv_start_replication) {
+drv->bdrv_start_replication(bs, mode, errp);
+} else if (bs->file) {
+bdrv_start_replication(bs->file, mode, errp);
+} else {
+error_setg(errp, "The BDS %s doesn't support starting block"
+   " replication", bs->filename);
+}
+}
+
+void bdrv_do_checkpoint(BlockDriverState *bs, Error **errp)
+{
+BlockDriver *drv = bs->drv;
+
+if (drv && drv->bdrv_do_checkpoint) {
+drv->bdrv_do_checkpoint(bs, errp);
+} else if (bs->file) {
+bdrv_do_checkpoint(bs->file, errp);
+} else {
+error_setg(errp, "The BDS %s doesn't support block checkpoint",
+   bs->filename);
+}
+}
+
+void bdrv_stop_replication(BlockDriverState *bs, bool failover, Error **errp)
+{
+BlockDriver *drv = bs->drv;
+
+if (drv && drv->bdrv_stop_replication) {
+drv->bdrv_stop_replication(bs, failover, errp);
+} else if (bs->file) {
+bdrv_stop_replication(bs->file, failover, errp);
+} else {
+error_setg(errp, "The BDS %s doesn't support stopping block"
+   " replication", bs->filename);
+}
+}
diff --git a/include/block/block.h b/include/block/block.h
index 5154388..40ef59f 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -611,4 +611,9 @@ void bdrv_add_child(BlockDriverState *parent, 
BlockDriverState *child,
 void bdrv_del_child(BlockDriverState *parent, BlockDriverState *child,
 Error **errp);
 
+void bdrv_start_replication(BlockDriverState *bs, ReplicationMode mode,
+Error **errp);
+void bdrv_do_checkpoint(BlockDriverState *bs, Error **errp);
+void bdrv_stop_replication(BlockDriverState *bs, bool failover, Error **errp);
+
 #endif
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 636d0c9..ee4b8fa 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -293,6 +293,20 @@ struct BlockDriver {
 void (*bdrv_del_child)(BlockDriverState *parent, BlockDriverState *child,
Error **errp);
 
+void (*bdrv_start_replication)(BlockDriverState *bs, ReplicationMode mode,
+   Error **errp);
+/* Drop Disk buffer when doing checkpoint. */
+void (*bdrv_do_checkpoint)(BlockDriverState *bs, Error **errp);
+/*
+ * After failover, we should flush Disk buffer into secondary disk
+ * and stop block replication.
+ *
+ * If the guest is shutdown, we should drop Disk buffer and stop
+ * block representation.
+ */
+void (*bdrv_stop_replication)(BlockDriverState *bs, bool failover,
+  Error **errp);
+
 QLIST_ENTRY(BlockDriver) list;
 };
 
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 000ae47..d5a177b 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1797,6 +1797,19 @@
 '*read-pattern': 'QuorumReadPattern' } }
 
 ##
+# @ReplicationMode
+#
+# An enumeration of replication modes.
+#
+# @primary: Primary mode, the vm's state will be sent to secondary QEMU.
+#
+# @secondary: Secondary mode, receive the vm's state from primary QEMU.
+#
+# Since: 2.5
+##
+{ 'enum' : 'ReplicationMode', 'data' : [ 'primary', 'secondary' ] }
+
+##
 # @BlockdevOptions
 #
 # Options for creating a block device.
-- 
2.4.3

[Qemu-devel] [PATCH v10 08/10] Implement new driver for block replication

2015-09-24 Thread Wen Congyang

Signed-off-by: Wen Congyang 
Signed-off-by: zhanghailiang 
Signed-off-by: Gonglei 
---
 block/Makefile.objs |   1 +
 block/replication.c | 471 
 2 files changed, 472 insertions(+)
 create mode 100644 block/replication.c

diff --git a/block/Makefile.objs b/block/Makefile.objs
index fa05f37..94c1d03 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -23,6 +23,7 @@ block-obj-$(CONFIG_LIBSSH2) += ssh.o
 block-obj-y += accounting.o
 block-obj-y += write-threshold.o
 block-obj-y += backup.o
+block-obj-y += replication.o
 
 common-obj-y += stream.o
 common-obj-y += commit.o
diff --git a/block/replication.c b/block/replication.c
new file mode 100644
index 000..813f610
--- /dev/null
+++ b/block/replication.c
@@ -0,0 +1,471 @@
+/*
+ * Replication Block filter
+ *
+ * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2015 Intel Corporation
+ * Copyright (c) 2015 FUJITSU LIMITED
+ *
+ * Author:
+ *   Wen Congyang 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "block/blockjob.h"
+#include "block/nbd.h"
+
+typedef struct BDRVReplicationState {
+ReplicationMode mode;
+int replication_state;
+BlockDriverState *active_disk;
+BlockDriverState *hidden_disk;
+BlockDriverState *secondary_disk;
+int error;
+} BDRVReplicationState;
+
+enum {
+BLOCK_REPLICATION_NONE, /* block replication is not started */
+BLOCK_REPLICATION_RUNNING,  /* block replication is running */
+BLOCK_REPLICATION_DONE, /* block replication is done(failover) */
+};
+
+#define COMMIT_CLUSTER_BITS 16
+#define COMMIT_CLUSTER_SIZE (1 << COMMIT_CLUSTER_BITS)
+#define COMMIT_SECTORS_PER_CLUSTER (COMMIT_CLUSTER_SIZE / BDRV_SECTOR_SIZE)
+
+static void replication_stop(BlockDriverState *bs, bool failover, Error 
**errp);
+
+#define REPLICATION_MODE"mode"
+static QemuOptsList replication_runtime_opts = {
+.name = "replication",
+.head = QTAILQ_HEAD_INITIALIZER(replication_runtime_opts.head),
+.desc = {
+{
+.name = REPLICATION_MODE,
+.type = QEMU_OPT_STRING,
+},
+{ /* end of list */ }
+},
+};
+
+static int replication_open(BlockDriverState *bs, QDict *options,
+int flags, Error **errp)
+{
+int ret;
+BDRVReplicationState *s = bs->opaque;;
+Error *local_err = NULL;
+QemuOpts *opts = NULL;
+const char *mode;
+
+ret = -EINVAL;
+opts = qemu_opts_create(&replication_runtime_opts, NULL, 0, &error_abort);
+qemu_opts_absorb_qdict(opts, options, &local_err);
+if (local_err) {
+goto fail;
+}
+
+mode = qemu_opt_get(opts, REPLICATION_MODE);
+if (!mode) {
+error_setg(&local_err, "Missing the option mode");
+goto fail;
+}
+
+if (!strcmp(mode, "primary")) {
+s->mode = REPLICATION_MODE_PRIMARY;
+} else if (!strcmp(mode, "secondary")) {
+s->mode = REPLICATION_MODE_SECONDARY;
+} else {
+error_setg(&local_err,
+   "The option mode's value should be primary or secondary");
+goto fail;
+}
+
+ret = 0;
+
+fail:
+qemu_opts_del(opts);
+/* propagate error */
+if (local_err) {
+error_propagate(errp, local_err);
+}
+return ret;
+}
+
+static void replication_close(BlockDriverState *bs)
+{
+BDRVReplicationState *s = bs->opaque;
+
+if (s->replication_state == BLOCK_REPLICATION_RUNNING) {
+replication_stop(bs, false, NULL);
+}
+}
+
+static int64_t replication_getlength(BlockDriverState *bs)
+{
+return bdrv_getlength(bs->file);
+}
+
+static int replication_get_io_status(BDRVReplicationState *s)
+{
+switch (s->replication_state) {
+case BLOCK_REPLICATION_NONE:
+return -EIO;
+case BLOCK_REPLICATION_RUNNING:
+return 0;
+case BLOCK_REPLICATION_DONE:
+return s->mode == REPLICATION_MODE_PRIMARY ? -EIO : 1;
+default:
+abort();
+}
+}
+
+static int replication_return_value(BDRVReplicationState *s, int ret)
+{
+if (s->mode == REPLICATION_MODE_SECONDARY) {
+return ret;
+}
+
+if (ret < 0) {
+s->error = ret;
+ret = 0;
+}
+
+return ret;
+}
+
+static coroutine_fn int replication_co_readv(BlockDriverState *bs,
+ int64_t sector_num,
+ int remaining_sectors,
+ QEMUIOVector *qiov)
+{
+BDRVReplicationState *s = bs->opaque;
+int ret;
+
+if (s->mode == REPLICATION_MODE_PRIMARY) {
+/* We only use it to forward primary write requests */
+return -EIO;
+}
+
+ret = replication_get_io_status(s);
+if (ret < 0) {
+return ret;
+}
+
+/*
+ * After failover, because we don'

[Qemu-devel] [PATCH v10 07/10] quorum: implement block driver interfaces for block replication

2015-09-24 Thread Wen Congyang

Signed-off-by: Wen Congyang 
Signed-off-by: zhanghailiang 
Signed-off-by: Gonglei 
Reviewed-by: Alberto Garcia 
---
 block/quorum.c | 77 ++
 1 file changed, 77 insertions(+)

diff --git a/block/quorum.c b/block/quorum.c
index 111a57b..d647ab4 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -85,6 +85,8 @@ typedef struct BDRVQuorumState {
 */
 
 QuorumReadPattern read_pattern;
+
+int replication_index; /* store which child supports block replication */
 } BDRVQuorumState;
 
 typedef struct QuorumAIOCB QuorumAIOCB;
@@ -945,6 +947,7 @@ static int quorum_open(BlockDriverState *bs, QDict 
*options, int flags,
 }
 
 g_free(opened);
+s->replication_index = -1;
 goto exit;
 
 close_exit:
@@ -1093,6 +1096,76 @@ static void quorum_refresh_filename(BlockDriverState *bs)
 bs->full_open_options = opts;
 }
 
+static void quorum_start_replication(BlockDriverState *bs, ReplicationMode 
mode,
+ Error **errp)
+{
+BDRVQuorumState *s = bs->opaque;
+int count = 0, i, index;
+Error *local_err = NULL;
+
+/*
+ * TODO: support REPLICATION_MODE_SECONDARY if we allow secondary
+ * QEMU becoming primary QEMU.
+ */
+if (mode != REPLICATION_MODE_PRIMARY) {
+error_setg(errp, "The replication mode for quorum should be 
'primary'");
+return;
+}
+
+if (s->read_pattern != QUORUM_READ_PATTERN_FIFO) {
+error_setg(errp, "Block replication needs read pattern 'fifo'");
+return;
+}
+
+for (i = 0; i < s->num_children; i++) {
+bdrv_start_replication(s->bs[i], mode, &local_err);
+if (local_err) {
+error_free(local_err);
+local_err = NULL;
+} else {
+count++;
+index = i;
+}
+}
+
+if (count == 0) {
+error_setg(errp, "No child supports block replication");
+} else if (count > 1) {
+for (i = 0; i < s->num_children; i++) {
+bdrv_stop_replication(s->bs[i], false, NULL);
+}
+error_setg(errp, "Too many children support block replication");
+} else {
+s->replication_index = index;
+}
+}
+
+static void quorum_do_checkpoint(BlockDriverState *bs, Error **errp)
+{
+BDRVQuorumState *s = bs->opaque;
+
+if (s->replication_index < 0) {
+error_setg(errp, "Block replication is not running");
+return;
+}
+
+bdrv_do_checkpoint(s->bs[s->replication_index], errp);
+}
+
+static void quorum_stop_replication(BlockDriverState *bs, bool failover,
+Error **errp)
+{
+BDRVQuorumState *s = bs->opaque;
+
+if (s->replication_index < 0) {
+error_setg(errp, "Block replication is not running");
+return;
+}
+
+bdrv_stop_replication(s->bs[s->replication_index], failover, errp);
+s->replication_index = -1;
+}
+
 static BlockDriver bdrv_quorum = {
 .format_name= "quorum",
 .protocol_name  = "quorum",
@@ -1119,6 +1192,10 @@ static BlockDriver bdrv_quorum = {
 
 .is_filter  = true,
 .bdrv_recurse_is_first_non_filter   = quorum_recurse_is_first_non_filter,
+
+.bdrv_start_replication = quorum_start_replication,
+.bdrv_do_checkpoint = quorum_do_checkpoint,
+.bdrv_stop_replication  = quorum_stop_replication,
 };
 
 static void bdrv_quorum_init(void)
-- 
2.4.3

[Qemu-devel] [PATCH v10 10/10] Add a new API to start/stop replication, do checkpoint to all BDSes

2015-09-24 Thread Wen Congyang

Signed-off-by: Wen Congyang 
Signed-off-by: zhanghailiang 
Signed-off-by: Gonglei 
---
 block.c   | 83 +++
 include/block/block.h |  4 +++
 2 files changed, 87 insertions(+)

diff --git a/block.c b/block.c
index 5cb916b..5891c4d 100644
--- a/block.c
+++ b/block.c
@@ -4296,3 +4296,86 @@ void bdrv_stop_replication(BlockDriverState *bs, bool 
failover, Error **errp)
" replication", bs->filename);
 }
 }
+
+void bdrv_start_replication_all(ReplicationMode mode, Error **errp)
+{
+BlockDriverState *bs = NULL, *temp = NULL;
+Error *local_err = NULL;
+
+while ((bs = bdrv_next(bs))) {
+if (!QLIST_EMPTY(&bs->parents)) {
+/* It is not top BDS */
+continue;
+}
+
+if (bdrv_is_read_only(bs) || !bdrv_is_inserted(bs)) {
+continue;
+}
+
+bdrv_start_replication(bs, mode, &local_err);
+if (local_err) {
+error_propagate(errp, local_err);
+goto fail;
+}
+}
+
+return;
+
+fail:
+while ((temp = bdrv_next(temp)) && bs != temp) {
+bdrv_stop_replication(temp, false, NULL);
+}
+}
+
+void bdrv_do_checkpoint_all(Error **errp)
+{
+BlockDriverState *bs = NULL;
+Error *local_err = NULL;
+
+while ((bs = bdrv_next(bs))) {
+if (!QLIST_EMPTY(&bs->parents)) {
+/* It is not top BDS */
+continue;
+}
+
+if (bdrv_is_read_only(bs) || !bdrv_is_inserted(bs)) {
+continue;
+}
+
+bdrv_do_checkpoint(bs, &local_err);
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
+}
+}
+
+void bdrv_stop_replication_all(bool failover, Error **errp)
+{
+BlockDriverState *bs = NULL;
+Error *local_err = NULL;
+
+while ((bs = bdrv_next(bs))) {
+if (!QLIST_EMPTY(&bs->parents)) {
+/* It is not top BDS */
+continue;
+}
+
+if (bdrv_is_read_only(bs) || !bdrv_is_inserted(bs)) {
+continue;
+}
+
+bdrv_stop_replication(bs, failover, &local_err);
+if (!errp) {
+/*
+ * The caller doesn't care the result, they just
+ * want to stop all block's replication.
+ */
+continue;
+}
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
+}
+}
diff --git a/include/block/block.h b/include/block/block.h
index 40ef59f..eb6a4a2 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -616,4 +616,8 @@ void bdrv_start_replication(BlockDriverState *bs, 
ReplicationMode mode,
 void bdrv_do_checkpoint(BlockDriverState *bs, Error **errp);
 void bdrv_stop_replication(BlockDriverState *bs, bool failover, Error **errp);
 
+void bdrv_start_replication_all(ReplicationMode mode, Error **errp);
+void bdrv_do_checkpoint_all(Error **errp);
+void bdrv_stop_replication_all(bool failover, Error **errp);
+
 #endif
-- 
2.4.3

[Qemu-devel] [PATCH v10 03/10] Allow creating backup jobs when opening BDS

2015-09-24 Thread Wen Congyang

When opening BDS, we need to create backup jobs for
image-fleecing.

Signed-off-by: Wen Congyang 
Signed-off-by: zhanghailiang 
Signed-off-by: Gonglei 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Jeff Cody 
---
 block/Makefile.objs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 58ef2ef..fa05f37 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -22,10 +22,10 @@ block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
 block-obj-y += accounting.o
 block-obj-y += write-threshold.o
+block-obj-y += backup.o
 
 common-obj-y += stream.o
 common-obj-y += commit.o
-common-obj-y += backup.o
 
 iscsi.o-cflags := $(LIBISCSI_CFLAGS)
 iscsi.o-libs   := $(LIBISCSI_LIBS)
-- 
2.4.3

[Qemu-devel] [PATCH v10 00/10] Block replication for continuous checkpoints

2015-09-24 Thread Wen Congyang

Block replication is a very important feature which is used for
continuous checkpoints(for example: COLO).

You can the detailed information about block replication from here:
http://wiki.qemu.org/Features/BlockReplication

Usage:
Please refer to docs/block-replication.txt

This patch series is based on the following patch series:
1. http://lists.nongnu.org/archive/html/qemu-devel/2015-09/msg05514.html
2. http://lists.nongnu.org/archive/html/qemu-devel/2015-09/msg04900.html

You can get the patch here:
https://github.com/coloft/qemu/tree/wency/block-replication-v10

You can get the patch with framework here:
https://github.com/coloft/qemu/tree/wency/colo_framework_v9.5

TODO:
1. Continuous block replication. It will be started after basic functions
   are accepted.

Changs Log:
V10:
1. Use blockdev-remove-medium and blockdev-insert-medium to replace backing
   reference.
2. Address the comments from Eric Blake
V9:
1. Update the error messages
2. Rebase to the newest qemu
3. Split child add/delete support. These patches are sent in another patchset.
V8:
1. Address Alberto Garcia's comments
V7:
1. Implement adding/removing quorum child. Remove the option non-connect.
2. Simplify the backing refrence option according to Stefan Hajnoczi's 
suggestion
V6:
1. Rebase to the newest qemu.
V5:
1. Address the comments from Gong Lei
2. Speed the failover up. The secondary vm can take over very quickly even
   if there are too many I/O requests.
V4:
1. Introduce a new driver replication to avoid touch nbd and qcow2.
V3:
1: use error_setg() instead of error_set()
2. Add a new block job API
3. Active disk, hidden disk and nbd target uses the same AioContext
4. Add a testcase to test new hbitmap API
V2:
1. Redesign the secondary qemu(use image-fleecing)
2. Use Error objects to return error message
3. Address the comments from Max Reitz and Eric Blake

Wen Congyang (10):
  allow writing to the backing file
  Backup: clear all bitmap when doing block checkpoint
  Allow creating backup jobs when opening BDS
  block: make bdrv_put_ref_bh_schedule() as a public API
  docs: block replication's description
  Add new block driver interfaces to control block replication
  quorum: implement block driver interfaces for block replication
  Implement new driver for block replication
  support replication driver in blockdev-add
  Add a new API to start/stop replication, do checkpoint to all BDSes

 block.c| 192 +-
 block/Makefile.objs|   3 +-
 block/backup.c |  14 ++
 block/quorum.c |  77 
 block/replication.c| 471 +
 blockdev.c |  37 +---
 blockjob.c |  11 ++
 docs/block-replication.txt | 259 +
 include/block/block.h  |  10 +
 include/block/block_int.h  |  14 ++
 include/block/blockjob.h   |  12 ++
 qapi/block-core.json   |  34 +++-
 12 files changed, 1098 insertions(+), 36 deletions(-)
 create mode 100644 block/replication.c
 create mode 100644 docs/block-replication.txt

-- 
2.4.3

[Qemu-devel] [PATCH v10 04/10] block: make bdrv_put_ref_bh_schedule() as a public API

2015-09-24 Thread Wen Congyang

Signed-off-by: Wen Congyang 
---
 block.c   | 25 +
 blockdev.c| 37 ++---
 include/block/block.h |  1 +
 3 files changed, 32 insertions(+), 31 deletions(-)

diff --git a/block.c b/block.c
index 328c52f..f9a985c 100644
--- a/block.c
+++ b/block.c
@@ -3597,6 +3597,31 @@ void bdrv_unref(BlockDriverState *bs)
 }
 }
 
+typedef struct {
+QEMUBH *bh;
+BlockDriverState *bs;
+} BDRVPutRefBH;
+
+static void bdrv_put_ref_bh(void *opaque)
+{
+BDRVPutRefBH *s = opaque;
+
+bdrv_unref(s->bs);
+qemu_bh_delete(s->bh);
+g_free(s);
+}
+
+/* Release a BDS reference in a BH */
+void bdrv_put_ref_bh_schedule(BlockDriverState *bs)
+{
+BDRVPutRefBH *s;
+
+s = g_new(BDRVPutRefBH, 1);
+s->bh = qemu_bh_new(bdrv_put_ref_bh, s);
+s->bs = bs;
+qemu_bh_schedule(s->bh);
+}
+
 struct BdrvOpBlocker {
 Error *reason;
 QLIST_ENTRY(BdrvOpBlocker) list;
diff --git a/blockdev.c b/blockdev.c
index 3289cc3..11bc992 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -278,37 +278,6 @@ static void bdrv_format_print(void *opaque, const char 
*name)
 error_printf(" %s", name);
 }
 
-typedef struct {
-QEMUBH *bh;
-BlockDriverState *bs;
-} BDRVPutRefBH;
-
-static void bdrv_put_ref_bh(void *opaque)
-{
-BDRVPutRefBH *s = opaque;
-
-bdrv_unref(s->bs);
-qemu_bh_delete(s->bh);
-g_free(s);
-}
-
-/*
- * Release a BDS reference in a BH
- *
- * It is not safe to use bdrv_unref() from a callback function when the callers
- * still need the BlockDriverState.  In such cases we schedule a BH to release
- * the reference.
- */
-static void bdrv_put_ref_bh_schedule(BlockDriverState *bs)
-{
-BDRVPutRefBH *s;
-
-s = g_new(BDRVPutRefBH, 1);
-s->bh = qemu_bh_new(bdrv_put_ref_bh, s);
-s->bs = bs;
-qemu_bh_schedule(s->bh);
-}
-
 static int parse_block_error_action(const char *buf, bool is_read, Error 
**errp)
 {
 if (!strcmp(buf, "ignore")) {
@@ -2534,6 +2503,12 @@ static void block_job_cb(void *opaque, int ret)
 block_job_event_completed(bs->job, msg);
 }
 
+
+/*
+ * It is not safe to use bdrv_unref() from a callback function when the
+ * callers still need the BlockDriverState. In such cases we schedule
+ * a BH to release the reference.
+ */
 bdrv_put_ref_bh_schedule(bs);
 }
 
diff --git a/include/block/block.h b/include/block/block.h
index e4be19f..5154388 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -505,6 +505,7 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild 
*child);
 BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
  BlockDriverState *child_bs,
  const BdrvChildRole *child_role);
+void bdrv_put_ref_bh_schedule(BlockDriverState *bs);
 
 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
-- 
2.4.3

[Qemu-devel] [PATCH v10 09/10] support replication driver in blockdev-add

2015-09-24 Thread Wen Congyang

Signed-off-by: Wen Congyang 
---
 qapi/block-core.json | 21 ++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index d5a177b..0907a72 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -219,7 +219,7 @@
 #   'qcow2', 'raw', 'tftp', 'vdi', 'vmdk', 'vpc', 'vvfat'
 #   2.2: 'archipelago' added, 'cow' dropped
 #   2.3: 'host_floppy' deprecated
-#   2.5: 'host_floppy' dropped
+#   2.5: 'host_floppy' dropped, 'replication' added
 #
 # @backing_file: #optional the name of the backing file (for copy-on-write)
 #
@@ -1375,6 +1375,7 @@
 # Drivers that are supported in block device operations.
 #
 # @host_device, @host_cdrom: Since 2.1
+# @replication: Since 2.5
 #
 # Since: 2.0
 ##
@@ -1382,8 +1383,8 @@
   'data': [ 'archipelago', 'blkdebug', 'blkverify', 'bochs', 'cloop',
 'dmg', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device',
 'http', 'https', 'null-aio', 'null-co', 'parallels',
-'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'tftp', 'vdi', 'vhdx',
-'vmdk', 'vpc', 'vvfat' ] }
+'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'replication',
+'tftp', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
 
 ##
 # @BlockdevOptionsBase
@@ -1810,6 +1811,19 @@
 { 'enum' : 'ReplicationMode', 'data' : [ 'primary', 'secondary' ] }
 
 ##
+# @BlockdevOptionsReplication
+#
+# Driver specific block device options for replication
+#
+# @mode: the replication mode
+#
+# Since: 2.5
+##
+{ 'struct': 'BlockdevOptionsReplication',
+  'base': 'BlockdevOptionsGenericFormat',
+  'data': { 'mode': 'ReplicationMode'  } }
+
+##
 # @BlockdevOptions
 #
 # Options for creating a block device.
@@ -1846,6 +1860,7 @@
   'quorum': 'BlockdevOptionsQuorum',
   'raw':'BlockdevOptionsGenericFormat',
 # TODO rbd: Wait for structured options
+  'replication':'BlockdevOptionsReplication',
 # TODO sheepdog: Wait for structured options
 # TODO ssh: Should take InetSocketAddress for 'host'?
   'tftp':   'BlockdevOptionsFile',
-- 
2.4.3

[Qemu-devel] [PATCH v10 02/10] Backup: clear all bitmap when doing block checkpoint

2015-09-24 Thread Wen Congyang

Signed-off-by: Wen Congyang 
Signed-off-by: zhanghailiang 
Signed-off-by: Gonglei 
Reviewed-by: Jeff Cody 
---
 block/backup.c   | 14 ++
 blockjob.c   | 11 +++
 include/block/blockjob.h | 12 
 3 files changed, 37 insertions(+)

diff --git a/block/backup.c b/block/backup.c
index c61e4c3..5e5995e 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -214,11 +214,25 @@ static void backup_iostatus_reset(BlockJob *job)
 }
 }
 
+static void backup_do_checkpoint(BlockJob *job, Error **errp)
+{
+BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
+
+if (backup_job->sync_mode != MIRROR_SYNC_MODE_NONE) {
+error_setg(errp, "The backup job only supports block checkpoint in"
+   " sync=none mode");
+return;
+}
+
+hbitmap_reset_all(backup_job->bitmap);
+}
+
 static const BlockJobDriver backup_job_driver = {
 .instance_size  = sizeof(BackupBlockJob),
 .job_type   = BLOCK_JOB_TYPE_BACKUP,
 .set_speed  = backup_set_speed,
 .iostatus_reset = backup_iostatus_reset,
+.do_checkpoint  = backup_do_checkpoint,
 };
 
 static BlockErrorAction backup_error_action(BackupBlockJob *job,
diff --git a/blockjob.c b/blockjob.c
index ca4be94..ea4c44a 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -405,3 +405,14 @@ void block_job_defer_to_main_loop(BlockJob *job,
 
 qemu_bh_schedule(data->bh);
 }
+
+void block_job_do_checkpoint(BlockJob *job, Error **errp)
+{
+if (!job->driver->do_checkpoint) {
+error_setg(errp, "The job %s doesn't support block checkpoint",
+   BlockJobType_lookup[job->driver->job_type]);
+return;
+}
+
+job->driver->do_checkpoint(job, errp);
+}
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index dd9d5e6..0b4f386 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -50,6 +50,9 @@ typedef struct BlockJobDriver {
  * manually.
  */
 void (*complete)(BlockJob *job, Error **errp);
+
+/** Optional callback for job types that support checkpoint. */
+void (*do_checkpoint)(BlockJob *job, Error **errp);
 } BlockJobDriver;
 
 /**
@@ -356,4 +359,13 @@ void block_job_defer_to_main_loop(BlockJob *job,
   BlockJobDeferToMainLoopFn *fn,
   void *opaque);
 
+/**
+ * block_job_do_checkpoint:
+ * @job: The job.
+ * @errp: Error object.
+ *
+ * Do block checkpoint on the specified job.
+ */
+void block_job_do_checkpoint(BlockJob *job, Error **errp);
+
 #endif
-- 
2.4.3

[Qemu-devel] [PATCH v10 01/10] allow writing to the backing file

2015-09-24 Thread Wen Congyang

For block replication, we have such backing chain:
secondary disk <-- hidden disk <-- active disk
secondary disk is top BDS(use bacing reference), so it can be opened in
read-write mode. But hidden disk is read only, and we need to write to
hidden disk(backup job will write data to it).

TODO: support opening backing file in read-write mode if the BDS is
created by QMP command blockdev-add.

Signed-off-by: Wen Congyang 
Signed-off-by: zhanghailiang 
Signed-off-by: Gonglei 
---
 block.c | 41 -
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/block.c b/block.c
index 073d8d6..328c52f 100644
--- a/block.c
+++ b/block.c
@@ -738,6 +738,15 @@ static const BdrvChildRole child_backing = {
 .inherit_flags = bdrv_backing_flags,
 };
 
+static int bdrv_backing_rw_flags(int flags)
+{
+return bdrv_backing_flags(flags) | BDRV_O_RDWR;
+}
+
+static const BdrvChildRole child_backing_rw = {
+.inherit_flags = bdrv_backing_rw_flags,
+};
+
 static int bdrv_open_flags(BlockDriverState *bs, int flags)
 {
 int open_flags = flags | BDRV_O_CACHE_WB;
@@ -1150,6 +1159,20 @@ out:
 bdrv_refresh_limits(bs, NULL);
 }
 
+#define ALLOW_WRITE_BACKING_FILE"allow-write-backing-file"
+static QemuOptsList backing_file_opts = {
+.name = "backing_file",
+.head = QTAILQ_HEAD_INITIALIZER(backing_file_opts.head),
+.desc = {
+{
+.name = ALLOW_WRITE_BACKING_FILE,
+.type = QEMU_OPT_BOOL,
+.help = "allow writes to backing file",
+},
+{ /* end of list */ }
+},
+};
+
 /*
  * Opens the backing file for a BlockDriverState if not yet open
  *
@@ -1164,6 +1187,9 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict 
*options, Error **errp)
 int ret = 0;
 BlockDriverState *backing_hd;
 Error *local_err = NULL;
+QemuOpts *opts = NULL;
+bool child_rw = false;
+const BdrvChildRole *child_role = NULL;
 
 if (bs->backing_hd != NULL) {
 QDECREF(options);
@@ -1176,6 +1202,18 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict 
*options, Error **errp)
 }
 
 bs->open_flags &= ~BDRV_O_NO_BACKING;
+
+opts = qemu_opts_create(&backing_file_opts, NULL, 0, &error_abort);
+qemu_opts_absorb_qdict(opts, options, &local_err);
+if (local_err) {
+ret = -EINVAL;
+error_propagate(errp, local_err);
+QDECREF(options);
+goto free_exit;
+}
+child_rw = qemu_opt_get_bool(opts, ALLOW_WRITE_BACKING_FILE, false);
+child_role = child_rw ? &child_backing_rw : &child_backing;
+
 if (qdict_haskey(options, "file.filename")) {
 backing_filename[0] = '\0';
 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
@@ -1208,7 +1246,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict 
*options, Error **errp)
 assert(bs->backing_hd == NULL);
 ret = bdrv_open_inherit(&backing_hd,
 *backing_filename ? backing_filename : NULL,
-NULL, options, 0, bs, &child_backing, &local_err);
+NULL, options, 0, bs, child_role, &local_err);
 if (ret < 0) {
 bdrv_unref(backing_hd);
 backing_hd = NULL;
@@ -1222,6 +1260,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict 
*options, Error **errp)
 bdrv_set_backing_hd(bs, backing_hd);
 
 free_exit:
+qemu_opts_del(opts);
 g_free(backing_filename);
 return ret;
 }
-- 
2.4.3

Re: [Qemu-devel] [PATCH 11/16] Add new block driver interfaces to control block replication

2015-09-24 Thread Wen Congyang

On 09/03/2015 12:33 AM, Eric Blake wrote:
> On 09/02/2015 02:51 AM, Wen Congyang wrote:
>> Signed-off-by: Wen Congyang 
>> Signed-off-by: zhanghailiang 
>> Signed-off-by: Gonglei 
>> Cc: Luiz Capitulino 
>> Cc: Michael Roth 
>> Reviewed-by: Paolo Bonzini 
>> ---
>>  block.c   | 43 +++
>>  include/block/block.h |  5 +
>>  include/block/block_int.h | 14 ++
>>  qapi/block-core.json  | 15 +++
>>  4 files changed, 77 insertions(+)
>>
> 
> Just an interface review for now:
> 
>> +++ b/qapi/block-core.json
>> @@ -1810,6 +1810,21 @@
>>'data': { '*export': 'str' } }
>>  
>>  ##
>> +# @ReplicationMode
>> +#
>> +# An enumeration of replication modes.
>> +#
>> +# @unprotected: Replication is not started or after failover.
> 
> Maybe:
> 
> Replication is either not started, or has experienced failover.

This is internal state, and this mode is used to tell qemu that
it is on which side.

Thanks
Wen Congyang

> 
>> +#
>> +# @primary: Primary mode, the vm's state will be sent to secondary QEMU.
>> +#
>> +# @secondary: Secondary mode, receive the vm's state from primary QEMU.
>> +#
>> +# Since: 2.4
> 
> You've missed 2.4; this should be 2.5.
> 
>> +##
>> +{ 'enum' : 'ReplicationMode', 'data' : [ 'primary', 'secondary' ] }
> 
> Where is 'unprotected' in this list?
>

Re: [Qemu-devel] [PATCH v3 6/7] qdev: Protect device-list-properties against broken devices

2015-09-24 Thread Markus Armbruster

Eduardo Habkost  writes:

> On Thu, Sep 24, 2015 at 08:57:21PM +0200, Markus Armbruster wrote:
> [...]
>> diff --git a/hw/arm/allwinner-a10.c b/hw/arm/allwinner-a10.c
>> index ff249af..7692090 100644
>> --- a/hw/arm/allwinner-a10.c
>> +++ b/hw/arm/allwinner-a10.c
>> @@ -103,6 +103,8 @@ static void aw_a10_class_init(ObjectClass *oc, void 
>> *data)
>>  DeviceClass *dc = DEVICE_CLASS(oc);
>>  
>>  dc->realize = aw_a10_realize;
>> +/* Reason: creates a CPU, thus use after free(), see cpu_class_init() */
>> +dc->cannot_even_create_with_object_new_yet = true;
>
> The comments at aw_a10_class_init(), digic_class_init(),
> fsl_imx25_class_init(), fsl_imx31_class_init(), and
> xlnx_zynqmp_class_init() are now outdated, as cpu_class_init() doesn't
> set cannot_even_create_with_object_new_yet anymore.
>
> We could do this:
> * Update the comments to "Reason: creates an ARM CPU, thus use after
>   free(), see arm_cpu_class_init()"

Yes.

> * Add a note at arm_cpu_class_init() saying that we can probably
>   unset cannot_even_create_with_object_new_yet in those functions
>   once we fix TYPE_ARM_CPU

Okay.

Thanks!

Re: [Qemu-devel] [v4][PATCH 0/2] libxl: try to support IGD passthrough for qemu upstream

2015-09-24 Thread Chen, Tiejun


Ping...

Thanks
Tiejun

On 9/18/2015 4:30 PM, Tiejun Chen wrote:

Ian,

As we discussed previously,

http://patchwork.ozlabs.org/patch/457055/

now it's time to push this into on xen/tools side since all qemu stuffs
have been merged.

https://lists.gnu.org/archive/html/qemu-devel/2015-09/msg02094.html

v4:

Ian,

Actually we had v3.5 online previously, which was reviewed by you.

http://permalink.gmane.org/gmane.comp.emulators.qemu/329100

So here I just bring a little bit to refine code just for patch #2
according to out last conversation.

v3:

* Refine some codes based on Campbell's feedback so thanks for Campbell's
   kind guideline to patch #2
* Update the manpages in patch #2

v2:

* Refine patch #2's head description
* Improve codes quality inside patch #1 based on Wei's comments
* Refill the summary inside patch #0 based on Konrad and Wei's suggestion

When we're working to support IGD GFX passthrough with qemu
upstream, instead of "-gfx_passthru" we'd like to make that
a machine option, "-machine xxx,igd-passthru=on".

https://lists.nongnu.org/archive/html/qemu-devel/2015-01/msg02050.html

This need to bring a change on tool side.

After a discussion with Campbell, we'd like to construct a table to record
all IGD devices we can support. If we hit that table, we should pass that
option. And so we also introduce a new field of type, 'gfx_passthru_kind',
to cooperate with 'gfx_passthru' to cover all scenarios like this,

 gfx_passthru = 0=> sets build_info.u.gfx_passthru to false
 gfx_passthru = 1=> sets build_info.u.gfx_passthru to true and
build_info.u.gfx_passthru_kind to DEFAULT
 gfx_passthru = "igd"=> sets build_info.u.gfx_passthru to false
and build_info.u.gfx_passthru_kind to IGD

And note actually that option "-gfx_passthru" is just introduced to
work for qemu-xen-traditional so we should get this away from
libxl__build_device_model_args_new() in the case of qemu upstream.


Tiejun Chen (2):
   libxl: introduce libxl__is_igd_vga_passthru
   libxl: introduce gfx_passthru_kind

  docs/man/xl.cfg.pod.5|  35 --
  tools/libxl/libxl.h  |   6 ++
  tools/libxl/libxl_dm.c   |  46 +++--
  tools/libxl/libxl_internal.h |   2 +
  tools/libxl/libxl_pci.c  | 124 +++
  tools/libxl/libxl_types.idl  |   6 ++
  tools/libxl/xl_cmdimpl.c |  14 +++-
  7 files changed, 223 insertions(+), 10 deletions(-)

Thanks
Tiejun

Re: [Qemu-devel] [PATCH 5/7] memory: Allow replay of IOMMU mapping notifications

2015-09-24 Thread David Gibson

On Thu, Sep 24, 2015 at 06:08:59PM +0200, Laurent Vivier wrote:
> 
> 
> On 24/09/2015 06:33, David Gibson wrote:
> > When we have guest visible IOMMUs, we allow notifiers to be registered
> > which will be informed of all changes to IOMMU mappings.  This is used by
> > vfio to keep the host IOMMU mappings in sync with guest IOMMU mappings.
> > 
> > However, unlike with a memory region listener, an iommu notifier won't be
> > told about any mappings which already exist in the (guest) IOMMU at the
> > time it is registered.  This can cause problems if hotplugging a VFIO
> > device onto a guest bus which had existing guest IOMMU mappings, but didn't
> > previously have an VFIO devices (and hence no host IOMMU mappings).
> > 
> > This adds a memory_region_register_iommu_notifier_replay() function to
> > handle this case.  As well as registering the new notifier it replays
> > existing mappings.  Because the IOMMU memory region doesn't internally
> > remember the granularity of the guest IOMMU it has a small hack where the
> > caller must specify a granularity at which to replay mappings.
> > 
> > If there are finer mappings in the guest IOMMU these will be reported in
> > the iotlb structures passed to the notifier which it must handle (probably
> > causing it to flag an error).  This isn't new - the VFIO iommu notifier
> > must already handle notifications about guest IOMMU mappings too short
> > for it to represent in the host IOMMU.
> > 
> > Signed-off-by: David Gibson 
> > ---
> >  include/exec/memory.h | 17 +
> >  memory.c  | 18 ++
> >  2 files changed, 35 insertions(+)
> > 
> > diff --git a/include/exec/memory.h b/include/exec/memory.h
> > index 5baaf48..304f985 100644
> > --- a/include/exec/memory.h
> > +++ b/include/exec/memory.h
> > @@ -583,6 +583,23 @@ void memory_region_notify_iommu(MemoryRegion
> > *mr,
[snip]
> > +void memory_region_register_iommu_notifier_replay(MemoryRegion *mr, 
> > Notifier *n,
> > +  hwaddr granularity,
> > +  bool is_write)
> > +{
> > +hwaddr addr;
> > +IOMMUTLBEntry iotlb;
> > +
> > +memory_region_register_iommu_notifier(mr, n);
> > +
> > +for (addr = 0; addr < memory_region_size(mr); addr += granularity) {
> > +
> > +iotlb = mr->iommu_ops->translate(mr, addr, is_write);
> > +if (iotlb.perm != IOMMU_NONE) {
> > +n->notify(n, &iotlb);
> > +}
> > +}
> > +}
> 
> If mr->size > (UINT64_MAX + 1 - granularity), you run into an infinite
> loop because hwaddr is a 64bit value and the stop condition is beyond
> its max value. You can avoid this by using the power of 2 of the

Ugh, yes, and I think my old version with more int128s was still
wrong, too.

> granularity, instead of the granularity:
> 
> int shift = ctz64(granularity);
> hwaddr size = memory_region_size(mr) >> shift;
> for (addr = 0; addr < size; addr++)
> {
> iotlb = mr->iommu_ops->translate(mr, addr << shift, is_write);
> ...
> 
> so in patch 6, you should pass the power of 2 instead of the value of
> the granularity.
> 
> Of course, it works if granularity is at least 2

Hrm, rather clunky.

I've instead gone for putting this at the end of the loop body:

/* if (2^64 - MR size) < granularity, it's possible to get an
 * infinite loop here.  This should catch such a wraparound */
if ((addr + granularity) < addr) {
break;
}

Of course, unless granularity is huge, stepping through a whole 2^64
address space might be indistinguishable from an infinite loop in
practice..

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


pgpTwx4Vi_ouL.pgp
Description: PGP signature

Re: [Qemu-devel] [PATCH 3/7] vfio: Check guest IOVA ranges against host IOMMU capabilities

2015-09-24 Thread David Gibson

On Thu, Sep 24, 2015 at 11:32:01AM -0600, Alex Williamson wrote:
> On Thu, 2015-09-24 at 14:33 +1000, David Gibson wrote:
> > The current vfio core code assumes that the host IOMMU is capable of
> > mapping any IOVA the guest wants to use to where we need.  However, real
> > IOMMUs generally only support translating a certain range of IOVAs (the
> > "DMA window") not a full 64-bit address space.
> > 
> > The common x86 IOMMUs support a wide enough range that guests are very
> > unlikely to go beyond it in practice, however the IOMMU used on IBM Power
> > machines - in the default configuration - supports only a much more limited
> > IOVA range, usually 0..2GiB.
> > 
> > If the guest attempts to set up an IOVA range that the host IOMMU can't
> > map, qemu won't report an error until it actually attempts to map a bad
> > IOVA.  If guest RAM is being mapped directly into the IOMMU (i.e. no guest
> > visible IOMMU) then this will show up very quickly.  If there is a guest
> > visible IOMMU, however, the problem might not show up until much later when
> > the guest actually attempt to DMA with an IOVA the host can't handle.
> > 
> > This patch adds a test so that we will detect earlier if the guest is
> > attempting to use IOVA ranges that the host IOMMU won't be able to deal
> > with.
> > 
> > For now, we assume that "Type1" (x86) IOMMUs can support any IOVA, this is
> > incorrect, but no worse than what we have already.  We can't do better for
> > now because the Type1 kernel interface doesn't tell us what IOVA range the
> > IOMMU actually supports.
> > 
> > For the Power "sPAPR TCE" IOMMU, however, we can retrieve the supported
> > IOVA range and validate guest IOVA ranges against it, and this patch does
> > so.
> > 
> > Signed-off-by: David Gibson 
> > Reviewed-by: Laurent Vivier 
> > ---
> >  hw/vfio/common.c  | 40 +---
> >  include/hw/vfio/vfio-common.h |  6 ++
> >  2 files changed, 43 insertions(+), 3 deletions(-)
> > 
> > diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> > index 95a4850..f90cc75 100644
> > --- a/hw/vfio/common.c
> > +++ b/hw/vfio/common.c
> > @@ -343,14 +343,22 @@ static void vfio_listener_region_add(MemoryListener 
> > *listener,
> >  if (int128_ge(int128_make64(iova), llend)) {
> >  return;
> >  }
> > +end = int128_get64(llend);
> > +
> > +if ((iova < container->min_iova) || ((end - 1) > container->max_iova)) 
> > {
> > +error_report("vfio: IOMMU container %p can't map guest IOVA region"
> > + " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx,
> > + container, iova, end - 1);
> > +ret = -EFAULT; /* FIXME: better choice here? */
> 
> "Bad address" makes sense to me.  This looks like an RFC comment, can we
> remove it?

Ok.

> 
> > +goto fail;
> > +}
> >  
> >  memory_region_ref(section->mr);
> >  
> >  if (memory_region_is_iommu(section->mr)) {
> >  VFIOGuestIOMMU *giommu;
> >  
> > -trace_vfio_listener_region_add_iommu(iova,
> > -int128_get64(int128_sub(llend, int128_one(;
> > +trace_vfio_listener_region_add_iommu(iova, end - 1);
> >  /*
> >   * FIXME: We should do some checking to see if the
> >   * capabilities of the host VFIO IOMMU are adequate to model
> > @@ -387,7 +395,6 @@ static void vfio_listener_region_add(MemoryListener 
> > *listener,
> >  
> >  /* Here we assume that memory_region_is_ram(section->mr)==true */
> >  
> > -end = int128_get64(llend);
> >  vaddr = memory_region_get_ram_ptr(section->mr) +
> >  section->offset_within_region +
> >  (iova - section->offset_within_address_space);
> > @@ -685,7 +692,19 @@ static int vfio_connect_container(VFIOGroup *group, 
> > AddressSpace *as)
> >  ret = -errno;
> >  goto free_container_exit;
> >  }
> > +
> > +/*
> > + * FIXME: This assumes that a Type1 IOMMU can map any 64-bit
> > + * IOVA whatsoever.  That's not actually true, but the current
> > + * kernel interface doesn't tell us what it can map, and the
> > + * existing Type1 IOMMUs generally support any IOVA we're
> > + * going to actually try in practice.
> > + */
> > +container->min_iova = 0;
> > +container->max_iova = (hwaddr)-1;
> >  } else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU)) {
> > +struct vfio_iommu_spapr_tce_info info;
> > +
> >  ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
> >  if (ret) {
> >  error_report("vfio: failed to set group container: %m");
> > @@ -710,6 +729,21 @@ static int vfio_connect_container(VFIOGroup *group, 
> > AddressSpace *as)
> >  ret = -errno;
> >  goto free_container_exit;
> >  }
> > +
> > +/*
> > + * FIXME: This only considers the host IOMMU' 32-bit window.
> 
> IOMMU's?

Yes.

Re: [Qemu-devel] [PATCH 4/7] vfio: Record host IOMMU's available IO page sizes

2015-09-24 Thread David Gibson

On Thu, Sep 24, 2015 at 11:32:14AM -0600, Alex Williamson wrote:
> On Thu, 2015-09-24 at 14:33 +1000, David Gibson wrote:
> > Depending on the host IOMMU type we determine and record the available page
> > sizes for IOMMU translation.  We'll need this for other validation in
> > future patches.
> > 
> > Signed-off-by: David Gibson 
> > Reviewed-by: Thomas Huth 
> > Reviewed-by: Laurent Vivier 
> > ---
> >  hw/vfio/common.c  | 13 +
> >  include/hw/vfio/vfio-common.h |  1 +
> >  2 files changed, 14 insertions(+)
> > 
> > diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> > index f90cc75..db8dff3 100644
> > --- a/hw/vfio/common.c
> > +++ b/hw/vfio/common.c
> > @@ -677,6 +677,7 @@ static int vfio_connect_container(VFIOGroup *group, 
> > AddressSpace *as)
> >  if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU) ||
> >  ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_IOMMU)) {
> >  bool v2 = !!ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_IOMMU);
> > +struct vfio_iommu_type1_info info;
> >  
> >  ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
> >  if (ret) {
> > @@ -702,6 +703,15 @@ static int vfio_connect_container(VFIOGroup *group, 
> > AddressSpace *as)
> >   */
> >  container->min_iova = 0;
> >  container->max_iova = (hwaddr)-1;
> > +
> > +/* Assume just 4K IOVA page size */
> > +container->iova_pgsizes = 0x1000;
> > +info.argsz = sizeof(info);
> > +ret = ioctl(fd, VFIO_IOMMU_GET_INFO, &info);
> > +/* Ignore errors */
> > +if ((ret == 0) && (info.flags & VFIO_IOMMU_INFO_PGSIZES)) {
> > +container->iova_pgsizes = info.iova_pgsizes;
> > +}
> >  } else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU)) {
> >  struct vfio_iommu_spapr_tce_info info;
> >  
> > @@ -744,6 +754,9 @@ static int vfio_connect_container(VFIOGroup *group, 
> > AddressSpace *as)
> >  }
> >  container->min_iova = info.dma32_window_start;
> >  container->max_iova = container->min_iova + info.dma32_window_size 
> > - 1;
> > +
> > +/* Assume just 4K IOVA pages for now */
> 
> 
> Ironically, no FIXME ;)

:p

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


pgpeKCgRCZkDf.pgp
Description: PGP signature

Re: [Qemu-devel] [PATCH 5/7] memory: Allow replay of IOMMU mapping notifications

2015-09-24 Thread David Gibson

On Thu, Sep 24, 2015 at 11:32:29AM -0600, Alex Williamson wrote:
> On Thu, 2015-09-24 at 14:33 +1000, David Gibson wrote:
> > When we have guest visible IOMMUs, we allow notifiers to be registered
> > which will be informed of all changes to IOMMU mappings.  This is used by
> > vfio to keep the host IOMMU mappings in sync with guest IOMMU mappings.
> > 
> > However, unlike with a memory region listener, an iommu notifier won't be
> > told about any mappings which already exist in the (guest) IOMMU at the
> > time it is registered.  This can cause problems if hotplugging a VFIO
> > device onto a guest bus which had existing guest IOMMU mappings, but didn't
> > previously have an VFIO devices (and hence no host IOMMU mappings).
> > 
> > This adds a memory_region_register_iommu_notifier_replay() function to
> > handle this case.  As well as registering the new notifier it replays
> > existing mappings.  Because the IOMMU memory region doesn't internally
> > remember the granularity of the guest IOMMU it has a small hack where the
> > caller must specify a granularity at which to replay mappings.
> > 
> > If there are finer mappings in the guest IOMMU these will be reported in
> > the iotlb structures passed to the notifier which it must handle (probably
> > causing it to flag an error).  This isn't new - the VFIO iommu notifier
> > must already handle notifications about guest IOMMU mappings too short
> > for it to represent in the host IOMMU.
> > 
> > Signed-off-by: David Gibson 

[snip]
> > +void memory_region_register_iommu_notifier_replay(MemoryRegion *mr, 
> > Notifier *n,
> > +  hwaddr granularity,
> > +  bool is_write)
> > +{
> > +hwaddr addr;
> > +IOMMUTLBEntry iotlb;
> > +
> > +memory_region_register_iommu_notifier(mr, n);
> > +
> > +for (addr = 0; addr < memory_region_size(mr); addr += granularity) {
> > +
> > +iotlb = mr->iommu_ops->translate(mr, addr, is_write);
> > +if (iotlb.perm != IOMMU_NONE) {
> > +n->notify(n, &iotlb);
> > +}
> > +}
> > +}
> > +
> 
> 
> When memory_listener_register() replays mappings, it does so on an rcu
> copy of the flatview for each AddressSpace.  Here we don't seem to have
> anything protecting against concurrency... do we need to worry about
> that?

I was assuming that the IOMMU mappings are protected by the BQL.  I
_think_ that's the case (for every IOMMU we have so far), but I'm not
really sure how to be sure.

> 
> >  void memory_region_unregister_iommu_notifier(Notifier *n)
> >  {
> >  notifier_remove(n);
> 
> 
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


pgpZ6CvXbnRR0.pgp
Description: PGP signature

Re: [Qemu-devel] [PATCH 1/7] vfio: Remove unneeded union from VFIOContainer

2015-09-24 Thread David Gibson

On Thu, Sep 24, 2015 at 10:01:55AM -0600, Alex Williamson wrote:
> On Thu, 2015-09-24 at 14:33 +1000, David Gibson wrote:
> > Currently the VFIOContainer iommu_data field contains a union with
> > different information for different host iommu types.  However:
> >* It only actually contains information for the x86-like "Type1" iommu
> >* Because we have a common listener the Type1 fields are actually used
> > on all IOMMU types, including the SPAPR TCE type as well
> >* There's no tag in the VFIOContainer to tell you which union member is
> > valid anyway.
> 
> FWIW, this last point isn't valid.  The IOMMU setup determines which
> union member is active and the listener and release functions are
> specific to the union member.  There's no need whatsoever for a tag to
> keep track of the union member in use.  The only problem is that the
> union solved a problem that never really came to exist, so we can now
> remove it and simplify things.

I could argue some of the details there, but none of them are really
important.

> I'll remove this last bullet point unless there's some objection.
> Thanks,

That's fine.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


pgp_Y3sbWSZUj.pgp
Description: PGP signature

Re: [Qemu-devel] [PULL 6/7] vmxnet3: Drop net_vmxnet3_info.can_receive

2015-09-24 Thread Jason Wang



On 09/24/2015 07:19 PM, Shmulik Ladkani wrote:
> Hi,
>
> On Thu, 3 Sep 2015 10:19:19 +0300, shmulik.ladk...@ravellosystems.com wrote:
>> On Wed,  2 Sep 2015 17:14:52 +0100, stefa...@redhat.com wrote:
>>> From: Fam Zheng 
>>>
>>> Commit 6e99c63 ("net/socket: Drop net_socket_can_send") changed the
>>> semantics around .can_receive for sockets to now require the device to
>>> flush queued pkts when transitioning to a .can_receive=true state. But
>>> it's OK to drop incoming packets when the link is not active.
>>>
>>> Signed-off-by: Fam Zheng 
>>> Signed-off-by: Stefan Hajnoczi 
>> Tested-by: Shmulik Ladkani 
> Ping...
>
> I'm not sure what's the exact policy, but shouldn't
> '2734a20 vmxnet3: Drop net_vmxnet3_info.can_receive' go into stable-2.4?

If you think a patch is stable candidate. Need cc qemu-sta...@nongnu.org
explicitly in the commit log.

Cc Michael for this.

>
> Otherwise, vmxnet3 has no rx connectivity in 2.4 based releases.
>
> See here: 
> https://lists.gnu.org/archive/html/qemu-devel/2015-08/msg02233.html
>
> Regards,
> Shmulik

[Qemu-devel] [PATCH V2 3/3] virtio-net: correctly drop truncated packets

2015-09-24 Thread Jason Wang

When packet is truncated during receiving, we drop the packets but
neither discard the descriptor nor add and signal used
descriptor. This will lead several issues:

- sg mappings are leaked
- rx will be stalled if a lots of packets were truncated

In order to be consistent with vhost, fix by discarding the descriptor
in this case.

Cc: Michael S. Tsirkin 
Signed-off-by: Jason Wang 
---
 hw/net/virtio-net.c | 8 +---
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index f72eebf..038a18b 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1086,13 +1086,7 @@ static ssize_t virtio_net_receive(NetClientState *nc, 
const uint8_t *buf, size_t
  * must have consumed the complete packet.
  * Otherwise, drop it. */
 if (!n->mergeable_rx_bufs && offset < size) {
-#if 0
-error_report("virtio-net truncated non-mergeable packet: "
- "i %zd mergeable %d offset %zd, size %zd, "
- "guest hdr len %zd, host hdr len %zd",
- i, n->mergeable_rx_bufs,
- offset, size, n->guest_hdr_len, n->host_hdr_len);
-#endif
+virtqueue_discard(q->rx_vq, &elem, total);
 return size;
 }
 
-- 
2.1.4

[Qemu-devel] [PATCH V2 2/3] virtio: introduce virtqueue_discard()

2015-09-24 Thread Jason Wang

This patch introduces virtqueue_discard() to discard a descriptor and
unmap the sgs. This will be used by the patch that will discard
descriptor when packet is truncated.

Cc: Michael S. Tsirkin 
Signed-off-by: Jason Wang 
---
 hw/virtio/virtio.c | 7 +++
 include/hw/virtio/virtio.h | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index bb2c4cf..d24f775 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -266,6 +266,13 @@ static void virtqueue_unmap_sg(VirtQueue *vq, const 
VirtQueueElement *elem,
   0, elem->out_sg[i].iov_len);
 }
 
+void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
+   unsigned int len)
+{
+vq->last_avail_idx--;
+virtqueue_unmap_sg(vq, elem, len);
+}
+
 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
 unsigned int len, unsigned int idx)
 {
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 6201ee8..9d09115 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -146,6 +146,8 @@ void virtio_del_queue(VirtIODevice *vdev, int n);
 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
 unsigned int len);
 void virtqueue_flush(VirtQueue *vq, unsigned int count);
+void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
+   unsigned int len);
 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
 unsigned int len, unsigned int idx);
 
-- 
2.1.4

[Qemu-devel] [PATCH V2 1/3] virtio: introduce virtqueue_unmap_sg()

2015-09-24 Thread Jason Wang

Factor out sg unmapping logic. This will be reused by the patch that
can discard descriptor.

Cc: Michael S. Tsirkin 
Cc: Andrew James 
Signed-off-by: Jason Wang 
---
Changes from V1:
- Fix use of uninitialized var warning.
---
 hw/virtio/virtio.c | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 0832db9..bb2c4cf 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -243,14 +243,12 @@ int virtio_queue_empty(VirtQueue *vq)
 return vring_avail_idx(vq) == vq->last_avail_idx;
 }
 
-void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
-unsigned int len, unsigned int idx)
+static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
+   unsigned int len)
 {
 unsigned int offset;
 int i;
 
-trace_virtqueue_fill(vq, elem, len, idx);
-
 offset = 0;
 for (i = 0; i < elem->in_num; i++) {
 size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
@@ -266,6 +264,14 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement 
*elem,
 cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
   elem->out_sg[i].iov_len,
   0, elem->out_sg[i].iov_len);
+}
+
+void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
+unsigned int len, unsigned int idx)
+{
+trace_virtqueue_fill(vq, elem, len, idx);
+
+virtqueue_unmap_sg(vq, elem, len);
 
 idx = (idx + vring_used_idx(vq)) % vq->vring.num;
 
-- 
2.1.4

Re: [Qemu-devel] [PATCH v2] Add argument filters to the seccomp sandbox

2015-09-24 Thread Namsun Ch'o

>Can you point out which exact use case breaks if you don't whitelist the
>below mentioned system calls' flags?

It happens whenever I do -runas with the sandbox enabled, or chroot with the
sandbox enabled.

sh# qemu-system-x86_64 -m 2048 -enable-kvm -chroot /var/empty -sandbox on \
> -cdrom /tmp/devuan-jessie-netboot-i386-alpha2.iso
^C
Session terminated, terminating shell...^C ...terminated.
sh# tail -n 1 /var/log/audit/audit.log | fold -s -w 80
type=SECCOMP msg=audit(1443154833.702:286096): auid=0 uid=0 gid=0 ses=12
pid=985623 comm="qemu-system-x86" exe="/usr/bin/qemu-system-x86_64" sig=31
arch=c03e syscall=161 compat=0 ip=0x309c2885397 code=0x0

>We thought about this in beggining of the development of seccomp on
>qemu. Some feature like allow all, which would print to stderr all
>illegal hits and a another argument like
>-sandbox_add="syscall1,syscall2", but this would be against the concept
>of the whole security schema. We don't want the user to take full
>control of it, and if you're a developer, you know what to do.

Is there an official security model for QEMU? I actually think a config file
which contains seccomp rules would be a very good idea, because it would let
the people who want to deploy a secure VM do so, so they can tighten it up
based on the functions they need, without needing to go to the trouble of
compiling a custom version (which might not be a very good idea when your job
is on the line when some unexpected crash caused by a custom patch causes
several hours of downtime for customers).

Another idea which would fit in with the security model is to have a dynamic
sandbox which enables syscalls and syscall filters based on what command line
or config parameters are passed to QEMU on its first start. For example QEMU
should have no need to perform every single filesystem operation that exists
on a setup where 9p is not in use. The same applies to the highly dangerous
syscalls like setsockopt, getsockopt, and ioctl, which would have to be
blanket enabled just because someone might use an obscure configuration.
Implementing a dynamic seccomp policy would be as easy as something like:

if (howerver_qemu_checks_for_enabled_options(optname) == 0)
enable_calls_needed_for_optname();

>Isn't it IPC_CREAT? Or am I missing something?

Yes, that was a dumb typo on my part. I posted an older patch of mine before
fixing that typo

>Can you resend a v3 describing the changes you did from v1 to v2 and v3?
>This helps keep tracking of ideas and discussions.

Yes, I put it in a new top thread as the FAQ suggested.

[Qemu-devel] [PATCH v3] Add argument filters to the seccomp sandbox

2015-09-24 Thread Namsun Ch'o

Here's the v3 patch. I applied it and compiled QEMU, and it worked fine.

Changes so far:
v1
 - Created argument filters for the madvise, shmget, and shmctl syscalls.
v1 -> v2
 - Added 5 new madvise flags which were present in the source code but not in
   the strace which I generated.
 - Added IP_CREAT|0600 to shmget, which Daniel Berrange pointed out was
   present in GTK2, which QEMU uses but does not call directly.
v2 -> v3
 - Replaced include asm/mman-common.h with sys/mman.h which is more proper.
 - Fixed a stupid typo where I had IP_CREAT instead of IPC_CREAT.
 - Removed the comma on the last entry of the madvise_flags array.
 - Removed one madvise flag (MADV_INVALID) which doesn't exist, apparently.

Signed-off-by: Namsun Ch'o 
---
diff --git a/qemu-seccomp.c b/qemu-seccomp.c
index f9de0d3..a353ef9 100644
--- a/qemu-seccomp.c
+++ b/qemu-seccomp.c
@@ -14,6 +14,8 @@
  */
 #include 
 #include 
+#include 
+#include 
 #include "sysemu/seccomp.h"
 
 struct QemuSeccompSyscall {
@@ -105,7 +107,6 @@ static const struct QemuSeccompSyscall seccomp_whitelist[] 
= {
 { SCMP_SYS(rt_sigreturn), 245 },
 { SCMP_SYS(sync), 245 },
 { SCMP_SYS(pread64), 245 },
-{ SCMP_SYS(madvise), 245 },
 { SCMP_SYS(set_robust_list), 245 },
 { SCMP_SYS(lseek), 245 },
 { SCMP_SYS(pselect6), 245 },
@@ -224,11 +225,9 @@ static const struct QemuSeccompSyscall seccomp_whitelist[] 
= {
 { SCMP_SYS(arch_prctl), 240 },
 { SCMP_SYS(mkdir), 240 },
 { SCMP_SYS(fchmod), 240 },
-{ SCMP_SYS(shmget), 240 },
 { SCMP_SYS(shmat), 240 },
 { SCMP_SYS(shmdt), 240 },
 { SCMP_SYS(timerfd_create), 240 },
-{ SCMP_SYS(shmctl), 240 },
 { SCMP_SYS(mlockall), 240 },
 { SCMP_SYS(mlock), 240 },
 { SCMP_SYS(munlock), 240 },
@@ -264,6 +263,59 @@ int seccomp_start(void)
 }
 }
 
+/* madvise */
+static const int madvise_flags[] = {
+MADV_DODUMP,
+MADV_DONTDUMP,
+MADV_UNMERGEABLE,
+MADV_WILLNEED,
+MADV_DONTFORK,
+MADV_DONTNEED,
+MADV_HUGEPAGE,
+MADV_MERGEABLE
+};
+for (i = 0; i < ARRAY_SIZE(madvise_flags); i++) {
+rc = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(madvise), 1,
+SCMP_A2(SCMP_CMP_EQ, madvise_flags[i]));
+if (rc < 0) {
+goto seccomp_return;
+}
+}
+rc = seccomp_syscall_priority(ctx, SCMP_SYS(madvise), 245);
+if (rc < 0) {
+goto seccomp_return;
+}
+
+/* shmget */
+rc = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(shmget), 2,
+SCMP_A0(SCMP_CMP_EQ, IPC_PRIVATE),
+SCMP_A2(SCMP_CMP_EQ, IPC_CREAT|0777));
+if (rc < 0) {
+goto seccomp_return;
+}
+rc = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(shmget), 2,
+SCMP_A0(SCMP_CMP_EQ, IPC_PRIVATE),
+SCMP_A2(SCMP_CMP_EQ, IPC_CREAT|0600));
+if (rc < 0) {
+goto seccomp_return;
+}
+rc = seccomp_syscall_priority(ctx, SCMP_SYS(shmget), 240);
+if (rc < 0) {
+goto seccomp_return;
+}
+
+/* shmctl */
+rc = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(shmctl), 2,
+SCMP_A1(SCMP_CMP_EQ, IPC_RMID),
+SCMP_A2(SCMP_CMP_EQ, 0));
+if (rc < 0) {
+goto seccomp_return;
+}
+rc = seccomp_syscall_priority(ctx, SCMP_SYS(shmctl), 240);
+if (rc < 0) {
+goto seccomp_return;
+}
+
 rc = seccomp_load(ctx);
 
   seccomp_return:

Re: [Qemu-devel] [PATCH V6 2/2] sdhci: Split sdhci.h for public and internal device usage

2015-09-24 Thread Sai Pavan Boddu



> -Original Message-
> From: Peter Crosthwaite [mailto:crosthwaitepe...@gmail.com]
> Sent: Friday, September 25, 2015 9:00 AM
> To: Sai Pavan Boddu
> Cc: qemu-devel@nongnu.org Developers; Peter Maydell; Alistair Francis;
> Edgar Iglesias; Sai Pavan Boddu
> Subject: Re: [PATCH V6 2/2] sdhci: Split sdhci.h for public and internal 
> device
> usage
> 
> On Wed, Sep 23, 2015 at 12:32 AM, Sai Pavan Boddu
>  wrote:
> > Split sdhci.h into Pubilc version (i.e include/hw/sd/sdhci.h) and
> > Internal version (i.e hw/sd/sdhci-interna.h) based on register
> 
> "internal".
> 
> > declarations and object declaration.
> >
> > Signed-off-by: Sai Pavan Boddu 
> > Reviewed-by: Alistair Francis 
> 
> Reviewed-by: Peter Crosthwaite 
Thanks,
Sai Pavan
> 
> Regards,
> Peter
> 
> > ---
> > Changes for V6:
> > Fix commit message.
> > Chages for V5:
> > Rename pubilc header version as sdhci.h and internal version to
> > sdhci-internal.h
> > Changes for V4:
> > Remain the name of internal version of sdchi.h as same. And change
> > Re-Adding qemu-common.h header.
> > the one which is in includes/ to sdhci-common.h
> > Changes for V2:
> > Create new area in includes for sd. And move sdhci.h to same.
> > Changes for V3:
> > Split the headers to public and common.
> > ---
> >  hw/sd/{sdhci.h => sdhci-internal.h} | 67 +--
> >  hw/sd/sdhci.c   |  3 +-
> >  include/hw/sd/sdhci.h   | 92
> +
> >  3 files changed, 95 insertions(+), 67 deletions(-)
> >  rename hw/sd/{sdhci.h => sdhci-internal.h} (75%)
> >  create mode 100644 include/hw/sd/sdhci.h
> >
> > diff --git a/hw/sd/sdhci.h b/hw/sd/sdhci-internal.h
> > similarity index 75%
> > rename from hw/sd/sdhci.h
> > rename to hw/sd/sdhci-internal.h
> > index a45593f..c40ae2b 100644
> > --- a/hw/sd/sdhci.h
> > +++ b/hw/sd/sdhci-internal.h
> > @@ -21,14 +21,10 @@
> >   * You should have received a copy of the GNU General Public License
> along
> >   * with this program; if not, see .
> >   */
> > -
> >  #ifndef SDHCI_H
> >  #define SDHCI_H
> >
> > -#include "qemu-common.h"
> > -#include "hw/pci/pci.h"
> > -#include "hw/sysbus.h"
> > -#include "hw/sd/sd.h"
> > +#include "hw/sd/sdhci.h"
> >
> >  /* R/W SDMA System Address register 0x0 */
> >  #define SDHC_SYSAD 0x00
> > @@ -231,65 +227,6 @@ enum {
> >  sdhc_gap_write  = 2   /* SDHC stopped at block gap during write
> operation */
> >  };
> >
> > -/* SD/MMC host controller state */
> > -typedef struct SDHCIState {
> > -union {
> > -PCIDevice pcidev;
> > -SysBusDevice busdev;
> > -};
> > -SDState *card;
> > -MemoryRegion iomem;
> > -
> > -QEMUTimer *insert_timer;   /* timer for 'changing' sd card. */
> > -QEMUTimer *transfer_timer;
> > -qemu_irq eject_cb;
> > -qemu_irq ro_cb;
> > -qemu_irq irq;
> > -
> > -uint32_t sdmasysad;/* SDMA System Address register */
> > -uint16_t blksize;  /* Host DMA Buff Boundary and Transfer BlkSize 
> > Reg
> */
> > -uint16_t blkcnt;   /* Blocks count for current transfer */
> > -uint32_t argument; /* Command Argument Register */
> > -uint16_t trnmod;   /* Transfer Mode Setting Register */
> > -uint16_t cmdreg;   /* Command Register */
> > -uint32_t rspreg[4];/* Response Registers 0-3 */
> > -uint32_t prnsts;   /* Present State Register */
> > -uint8_t  hostctl;  /* Host Control Register */
> > -uint8_t  pwrcon;   /* Power control Register */
> > -uint8_t  blkgap;   /* Block Gap Control Register */
> > -uint8_t  wakcon;   /* WakeUp Control Register */
> > -uint16_t clkcon;   /* Clock control Register */
> > -uint8_t  timeoutcon;   /* Timeout Control Register */
> > -uint8_t  admaerr;  /* ADMA Error Status Register */
> > -uint16_t norintsts;/* Normal Interrupt Status Register */
> > -uint16_t errintsts;/* Error Interrupt Status Register */
> > -uint16_t norintstsen;  /* Normal Interrupt Status Enable Register */
> > -uint16_t errintstsen;  /* Error Interrupt Status Enable Register */
> > -uint16_t norintsigen;  /* Normal Interrupt Signal Enable Register */
> > -uint16_t errintsigen;  /* Error Interrupt Signal Enable Register */
> > -uint16_t acmd12errsts; /* Auto CMD12 error status register */
> > -uint64_t admasysaddr;  /* ADMA System Address Register */
> > -
> > -uint32_t capareg;  /* Capabilities Register */
> > -uint32_t maxcurr;  /* Maximum Current Capabilities Register */
> > -uint8_t  *fifo_buffer; /* SD host i/o FIFO buffer */
> > -uint32_t buf_maxsz;
> > -uint16_t data_count;   /* current element in FIFO buffer */
> > -uint8_t  stopped_state;/* Current SDHC state */
> > -/* Buffer Data Port Register - virtual access point to R and W buffers 
> > */
> > -/* Software Reset Register - always

Re: [Qemu-devel] [PATCH V6 1/2] sd.h: Move sd.h to include/hw/sd/

2015-09-24 Thread Sai Pavan Boddu



> -Original Message-
> From: Peter Crosthwaite [mailto:crosthwaitepe...@gmail.com]
> Sent: Friday, September 25, 2015 9:02 AM
> To: Sai Pavan Boddu
> Cc: qemu-devel@nongnu.org Developers; Peter Maydell; Alistair Francis;
> Edgar Iglesias; Sai Pavan Boddu
> Subject: Re: [PATCH V6 1/2] sd.h: Move sd.h to include/hw/sd/
> 
> On Wed, Sep 23, 2015 at 12:32 AM, Sai Pavan Boddu
>  wrote:
> > Create a sd directory under include/hw/ and move sd.h to same.
> >
> > Signed-off-by: Sai Pavan Boddu 
> > Reviewed-by: Alistair Francis 
> 
> This probably needs more work for what the public API should be, but a
> step in the right direction for SoCification (and QoMification).
> 
> Reviewed-by: Peter Crosthwaite 
Thanks,
Sai Pavan
> 
> > ---
> > Changes for V6:
> > Fix commit message.
> > Changes for V5:
> > None
> > Changes for V4:
> > Fix commit message.
> > Changes for V3:
> > None.
> > ---
> >  hw/sd/milkymist-memcard.c | 2 +-
> >  hw/sd/omap_mmc.c  | 2 +-
> >  hw/sd/pl181.c | 2 +-
> >  hw/sd/pxa2xx_mmci.c   | 2 +-
> >  hw/sd/sd.c| 2 +-
> >  hw/sd/sdhci.h | 2 +-
> >  hw/sd/ssi-sd.c| 2 +-
> >  include/hw/{ => sd}/sd.h  | 0
> >  8 files changed, 7 insertions(+), 7 deletions(-)
> >  rename include/hw/{ => sd}/sd.h (100%)
> >
> > diff --git a/hw/sd/milkymist-memcard.c b/hw/sd/milkymist-memcard.c
> > index 2209ef1..b430d56 100644
> > --- a/hw/sd/milkymist-memcard.c
> > +++ b/hw/sd/milkymist-memcard.c
> > @@ -28,7 +28,7 @@
> >  #include "qemu/error-report.h"
> >  #include "sysemu/block-backend.h"
> >  #include "sysemu/blockdev.h"
> > -#include "hw/sd.h"
> > +#include "hw/sd/sd.h"
> >
> >  enum {
> >  ENABLE_CMD_TX   = (1<<0),
> > diff --git a/hw/sd/omap_mmc.c b/hw/sd/omap_mmc.c
> > index 35d8033..5bc4719 100644
> > --- a/hw/sd/omap_mmc.c
> > +++ b/hw/sd/omap_mmc.c
> > @@ -18,7 +18,7 @@
> >   */
> >  #include "hw/hw.h"
> >  #include "hw/arm/omap.h"
> > -#include "hw/sd.h"
> > +#include "hw/sd/sd.h"
> >
> >  struct omap_mmc_s {
> >  qemu_irq irq;
> > diff --git a/hw/sd/pl181.c b/hw/sd/pl181.c
> > index 11fcd47..ddd9b6f 100644
> > --- a/hw/sd/pl181.c
> > +++ b/hw/sd/pl181.c
> > @@ -10,7 +10,7 @@
> >  #include "sysemu/block-backend.h"
> >  #include "sysemu/blockdev.h"
> >  #include "hw/sysbus.h"
> > -#include "hw/sd.h"
> > +#include "hw/sd/sd.h"
> >
> >  //#define DEBUG_PL181 1
> >
> > diff --git a/hw/sd/pxa2xx_mmci.c b/hw/sd/pxa2xx_mmci.c
> > index d1fe6d5..b217080 100644
> > --- a/hw/sd/pxa2xx_mmci.c
> > +++ b/hw/sd/pxa2xx_mmci.c
> > @@ -12,7 +12,7 @@
> >
> >  #include "hw/hw.h"
> >  #include "hw/arm/pxa.h"
> > -#include "hw/sd.h"
> > +#include "hw/sd/sd.h"
> >  #include "hw/qdev.h"
> >
> >  struct PXA2xxMMCIState {
> > diff --git a/hw/sd/sd.c b/hw/sd/sd.c
> > index a1ff465..0787e33 100644
> > --- a/hw/sd/sd.c
> > +++ b/hw/sd/sd.c
> > @@ -31,7 +31,7 @@
> >
> >  #include "hw/hw.h"
> >  #include "sysemu/block-backend.h"
> > -#include "hw/sd.h"
> > +#include "hw/sd/sd.h"
> >  #include "qemu/bitmap.h"
> >
> >  //#define DEBUG_SD 1
> > diff --git a/hw/sd/sdhci.h b/hw/sd/sdhci.h
> > index 3352d23..a45593f 100644
> > --- a/hw/sd/sdhci.h
> > +++ b/hw/sd/sdhci.h
> > @@ -28,7 +28,7 @@
> >  #include "qemu-common.h"
> >  #include "hw/pci/pci.h"
> >  #include "hw/sysbus.h"
> > -#include "hw/sd.h"
> > +#include "hw/sd/sd.h"
> >
> >  /* R/W SDMA System Address register 0x0 */
> >  #define SDHC_SYSAD 0x00
> > diff --git a/hw/sd/ssi-sd.c b/hw/sd/ssi-sd.c
> > index e4b2d4f..c49ff62 100644
> > --- a/hw/sd/ssi-sd.c
> > +++ b/hw/sd/ssi-sd.c
> > @@ -13,7 +13,7 @@
> >  #include "sysemu/block-backend.h"
> >  #include "sysemu/blockdev.h"
> >  #include "hw/ssi.h"
> > -#include "hw/sd.h"
> > +#include "hw/sd/sd.h"
> >
> >  //#define DEBUG_SSI_SD 1
> >
> > diff --git a/include/hw/sd.h b/include/hw/sd/sd.h
> > similarity index 100%
> > rename from include/hw/sd.h
> > rename to include/hw/sd/sd.h
> > --
> > 2.1.4
> >

Re: [Qemu-devel] [PATCH V6 1/2] sd.h: Move sd.h to include/hw/sd/

2015-09-24 Thread Peter Crosthwaite

On Wed, Sep 23, 2015 at 12:32 AM, Sai Pavan Boddu
 wrote:
> Create a sd directory under include/hw/ and move sd.h to same.
>
> Signed-off-by: Sai Pavan Boddu 
> Reviewed-by: Alistair Francis 

This probably needs more work for what the public API should be, but a
step in the right direction for SoCification (and QoMification).

Reviewed-by: Peter Crosthwaite 

> ---
> Changes for V6:
> Fix commit message.
> Changes for V5:
> None
> Changes for V4:
> Fix commit message.
> Changes for V3:
> None.
> ---
>  hw/sd/milkymist-memcard.c | 2 +-
>  hw/sd/omap_mmc.c  | 2 +-
>  hw/sd/pl181.c | 2 +-
>  hw/sd/pxa2xx_mmci.c   | 2 +-
>  hw/sd/sd.c| 2 +-
>  hw/sd/sdhci.h | 2 +-
>  hw/sd/ssi-sd.c| 2 +-
>  include/hw/{ => sd}/sd.h  | 0
>  8 files changed, 7 insertions(+), 7 deletions(-)
>  rename include/hw/{ => sd}/sd.h (100%)
>
> diff --git a/hw/sd/milkymist-memcard.c b/hw/sd/milkymist-memcard.c
> index 2209ef1..b430d56 100644
> --- a/hw/sd/milkymist-memcard.c
> +++ b/hw/sd/milkymist-memcard.c
> @@ -28,7 +28,7 @@
>  #include "qemu/error-report.h"
>  #include "sysemu/block-backend.h"
>  #include "sysemu/blockdev.h"
> -#include "hw/sd.h"
> +#include "hw/sd/sd.h"
>
>  enum {
>  ENABLE_CMD_TX   = (1<<0),
> diff --git a/hw/sd/omap_mmc.c b/hw/sd/omap_mmc.c
> index 35d8033..5bc4719 100644
> --- a/hw/sd/omap_mmc.c
> +++ b/hw/sd/omap_mmc.c
> @@ -18,7 +18,7 @@
>   */
>  #include "hw/hw.h"
>  #include "hw/arm/omap.h"
> -#include "hw/sd.h"
> +#include "hw/sd/sd.h"
>
>  struct omap_mmc_s {
>  qemu_irq irq;
> diff --git a/hw/sd/pl181.c b/hw/sd/pl181.c
> index 11fcd47..ddd9b6f 100644
> --- a/hw/sd/pl181.c
> +++ b/hw/sd/pl181.c
> @@ -10,7 +10,7 @@
>  #include "sysemu/block-backend.h"
>  #include "sysemu/blockdev.h"
>  #include "hw/sysbus.h"
> -#include "hw/sd.h"
> +#include "hw/sd/sd.h"
>
>  //#define DEBUG_PL181 1
>
> diff --git a/hw/sd/pxa2xx_mmci.c b/hw/sd/pxa2xx_mmci.c
> index d1fe6d5..b217080 100644
> --- a/hw/sd/pxa2xx_mmci.c
> +++ b/hw/sd/pxa2xx_mmci.c
> @@ -12,7 +12,7 @@
>
>  #include "hw/hw.h"
>  #include "hw/arm/pxa.h"
> -#include "hw/sd.h"
> +#include "hw/sd/sd.h"
>  #include "hw/qdev.h"
>
>  struct PXA2xxMMCIState {
> diff --git a/hw/sd/sd.c b/hw/sd/sd.c
> index a1ff465..0787e33 100644
> --- a/hw/sd/sd.c
> +++ b/hw/sd/sd.c
> @@ -31,7 +31,7 @@
>
>  #include "hw/hw.h"
>  #include "sysemu/block-backend.h"
> -#include "hw/sd.h"
> +#include "hw/sd/sd.h"
>  #include "qemu/bitmap.h"
>
>  //#define DEBUG_SD 1
> diff --git a/hw/sd/sdhci.h b/hw/sd/sdhci.h
> index 3352d23..a45593f 100644
> --- a/hw/sd/sdhci.h
> +++ b/hw/sd/sdhci.h
> @@ -28,7 +28,7 @@
>  #include "qemu-common.h"
>  #include "hw/pci/pci.h"
>  #include "hw/sysbus.h"
> -#include "hw/sd.h"
> +#include "hw/sd/sd.h"
>
>  /* R/W SDMA System Address register 0x0 */
>  #define SDHC_SYSAD 0x00
> diff --git a/hw/sd/ssi-sd.c b/hw/sd/ssi-sd.c
> index e4b2d4f..c49ff62 100644
> --- a/hw/sd/ssi-sd.c
> +++ b/hw/sd/ssi-sd.c
> @@ -13,7 +13,7 @@
>  #include "sysemu/block-backend.h"
>  #include "sysemu/blockdev.h"
>  #include "hw/ssi.h"
> -#include "hw/sd.h"
> +#include "hw/sd/sd.h"
>
>  //#define DEBUG_SSI_SD 1
>
> diff --git a/include/hw/sd.h b/include/hw/sd/sd.h
> similarity index 100%
> rename from include/hw/sd.h
> rename to include/hw/sd/sd.h
> --
> 2.1.4
>

Re: [Qemu-devel] [PATCH V6 2/2] sdhci: Split sdhci.h for public and internal device usage

2015-09-24 Thread Peter Crosthwaite

On Wed, Sep 23, 2015 at 12:32 AM, Sai Pavan Boddu
 wrote:
> Split sdhci.h into Pubilc version (i.e include/hw/sd/sdhci.h) and
> Internal version (i.e hw/sd/sdhci-interna.h) based on register

"internal".

> declarations and object declaration.
>
> Signed-off-by: Sai Pavan Boddu 
> Reviewed-by: Alistair Francis 

Reviewed-by: Peter Crosthwaite 

Regards,
Peter

> ---
> Changes for V6:
> Fix commit message.
> Chages for V5:
> Rename pubilc header version as sdhci.h and internal version to
> sdhci-internal.h
> Changes for V4:
> Remain the name of internal version of sdchi.h as same. And change
> Re-Adding qemu-common.h header.
> the one which is in includes/ to sdhci-common.h
> Changes for V2:
> Create new area in includes for sd. And move sdhci.h to same.
> Changes for V3:
> Split the headers to public and common.
> ---
>  hw/sd/{sdhci.h => sdhci-internal.h} | 67 +--
>  hw/sd/sdhci.c   |  3 +-
>  include/hw/sd/sdhci.h   | 92 
> +
>  3 files changed, 95 insertions(+), 67 deletions(-)
>  rename hw/sd/{sdhci.h => sdhci-internal.h} (75%)
>  create mode 100644 include/hw/sd/sdhci.h
>
> diff --git a/hw/sd/sdhci.h b/hw/sd/sdhci-internal.h
> similarity index 75%
> rename from hw/sd/sdhci.h
> rename to hw/sd/sdhci-internal.h
> index a45593f..c40ae2b 100644
> --- a/hw/sd/sdhci.h
> +++ b/hw/sd/sdhci-internal.h
> @@ -21,14 +21,10 @@
>   * You should have received a copy of the GNU General Public License along
>   * with this program; if not, see .
>   */
> -
>  #ifndef SDHCI_H
>  #define SDHCI_H
>
> -#include "qemu-common.h"
> -#include "hw/pci/pci.h"
> -#include "hw/sysbus.h"
> -#include "hw/sd/sd.h"
> +#include "hw/sd/sdhci.h"
>
>  /* R/W SDMA System Address register 0x0 */
>  #define SDHC_SYSAD 0x00
> @@ -231,65 +227,6 @@ enum {
>  sdhc_gap_write  = 2   /* SDHC stopped at block gap during write 
> operation */
>  };
>
> -/* SD/MMC host controller state */
> -typedef struct SDHCIState {
> -union {
> -PCIDevice pcidev;
> -SysBusDevice busdev;
> -};
> -SDState *card;
> -MemoryRegion iomem;
> -
> -QEMUTimer *insert_timer;   /* timer for 'changing' sd card. */
> -QEMUTimer *transfer_timer;
> -qemu_irq eject_cb;
> -qemu_irq ro_cb;
> -qemu_irq irq;
> -
> -uint32_t sdmasysad;/* SDMA System Address register */
> -uint16_t blksize;  /* Host DMA Buff Boundary and Transfer BlkSize 
> Reg */
> -uint16_t blkcnt;   /* Blocks count for current transfer */
> -uint32_t argument; /* Command Argument Register */
> -uint16_t trnmod;   /* Transfer Mode Setting Register */
> -uint16_t cmdreg;   /* Command Register */
> -uint32_t rspreg[4];/* Response Registers 0-3 */
> -uint32_t prnsts;   /* Present State Register */
> -uint8_t  hostctl;  /* Host Control Register */
> -uint8_t  pwrcon;   /* Power control Register */
> -uint8_t  blkgap;   /* Block Gap Control Register */
> -uint8_t  wakcon;   /* WakeUp Control Register */
> -uint16_t clkcon;   /* Clock control Register */
> -uint8_t  timeoutcon;   /* Timeout Control Register */
> -uint8_t  admaerr;  /* ADMA Error Status Register */
> -uint16_t norintsts;/* Normal Interrupt Status Register */
> -uint16_t errintsts;/* Error Interrupt Status Register */
> -uint16_t norintstsen;  /* Normal Interrupt Status Enable Register */
> -uint16_t errintstsen;  /* Error Interrupt Status Enable Register */
> -uint16_t norintsigen;  /* Normal Interrupt Signal Enable Register */
> -uint16_t errintsigen;  /* Error Interrupt Signal Enable Register */
> -uint16_t acmd12errsts; /* Auto CMD12 error status register */
> -uint64_t admasysaddr;  /* ADMA System Address Register */
> -
> -uint32_t capareg;  /* Capabilities Register */
> -uint32_t maxcurr;  /* Maximum Current Capabilities Register */
> -uint8_t  *fifo_buffer; /* SD host i/o FIFO buffer */
> -uint32_t buf_maxsz;
> -uint16_t data_count;   /* current element in FIFO buffer */
> -uint8_t  stopped_state;/* Current SDHC state */
> -/* Buffer Data Port Register - virtual access point to R and W buffers */
> -/* Software Reset Register - always reads as 0 */
> -/* Force Event Auto CMD12 Error Interrupt Reg - write only */
> -/* Force Event Error Interrupt Register- write only */
> -/* RO Host Controller Version Register always reads as 0x2401 */
> -} SDHCIState;
> -
>  extern const VMStateDescription sdhci_vmstate;
>
> -#define TYPE_PCI_SDHCI "sdhci-pci"
> -#define PCI_SDHCI(obj) OBJECT_CHECK(SDHCIState, (obj), TYPE_PCI_SDHCI)
> -
> -#define TYPE_SYSBUS_SDHCI "generic-sdhci"
> -#define SYSBUS_SDHCI(obj)   \
> - OBJECT_CHECK(SDHCIState, (obj), TYPE_SYSBUS_SDHCI)
> -
> -#endif /* SDHCI_H */
> +#endif
> di

Re: [Qemu-devel] [PATCH 1/3] virtio: introduce virtqueue_unmap_sg()

2015-09-24 Thread Jason Wang



On 09/25/2015 12:19 AM, Andrew James wrote:
> On 09/18/2015 02:01 AM, Jason Wang wrote:
>> > Factor out sg unmapping logic. This will be reused by the patch that
>> > can discard descriptor.
>> > 
>> > Cc: Michael S. Tsirkin 
>> > Signed-off-by: Jason Wang 
>> > ---
>> >  hw/virtio/virtio.c | 15 ++-
>> >  1 file changed, 10 insertions(+), 5 deletions(-)
>> > 
>> > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
>> > index 0832db9..eb8d5ca 100644
>> > --- a/hw/virtio/virtio.c
>> > +++ b/hw/virtio/virtio.c
>> > @@ -243,15 +243,12 @@ int virtio_queue_empty(VirtQueue *vq)
>> >  return vring_avail_idx(vq) == vq->last_avail_idx;
>> >  }
>> >  
>> > -void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
>> > -unsigned int len, unsigned int idx)
>> > +static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement 
>> > *elem,
>> > +   unsigned int len)
>> >  {
>> >  unsigned int offset;
>> >  int i;
>> >  
>> > -trace_virtqueue_fill(vq, elem, len, idx);
>> > -
>> > -offset = 0;
>> >  for (i = 0; i < elem->in_num; i++) {
>> >  size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
>> >  
> Should the "offset = 0" really be dropped here? 

Probably not.

> Seems like it ends
> up uninitialized. GCC thinks it might too.
>

Yes, will keep the offset initialization in V2.

Thanks

Re: [Qemu-devel] [PULL 20/22] virtio: introduce virtqueue_unmap_sg()

2015-09-24 Thread Jason Wang



On 09/25/2015 02:58 AM, Michael S. Tsirkin wrote:
> On Thu, Sep 24, 2015 at 04:21:02PM +0300, Michael S. Tsirkin wrote:
>> From: Jason Wang 
>>
>> Factor out sg unmapping logic. This will be reused by the patch that
>> can discard descriptor.
>>
>> Cc: Michael S. Tsirkin 
>> Signed-off-by: Jason Wang 
>> Reviewed-by: Michael S. Tsirkin 
>> Signed-off-by: Michael S. Tsirkin 
>> ---
>>  hw/virtio/virtio.c | 15 ++-
>>  1 file changed, 10 insertions(+), 5 deletions(-)
>>
>> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
>> index 7504f8b..d6a2bca 100644
>> --- a/hw/virtio/virtio.c
>> +++ b/hw/virtio/virtio.c
>> @@ -244,15 +244,12 @@ int virtio_queue_empty(VirtQueue *vq)
>>  return vring_avail_idx(vq) == vq->last_avail_idx;
>>  }
>>  
>> -void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
>> -unsigned int len, unsigned int idx)
>> +static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
>> +   unsigned int len)
>>  {
>>  unsigned int offset;
>>  int i;
>>  
>> -trace_virtqueue_fill(vq, elem, len, idx);
>> -
>> -offset = 0;
>>  for (i = 0; i < elem->in_num; i++) {
>>  size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
>>  
> This part causes an unitialized variable warning to appear.
> I dropped this from the pull request.
> Jason, could you look into this please?
>

Will fix this in V2.

Thanks

Re: [Qemu-devel] [PATCH 1/1] migration: fix deadlock

2015-09-24 Thread Wen Congyang

On 09/24/2015 08:53 PM, Denis V. Lunev wrote:
> From: Igor Redko 
> 
> Release qemu global mutex before call synchronize_rcu().
> synchronize_rcu() waiting for all readers to finish their critical
> sections. There is at least one critical section in which we try
> to get QGM (critical section is in address_space_rw() and
> prepare_mmio_access() is trying to aquire QGM).
> 
> Both functions (migration_end() and migration_bitmap_extend())
> are called from main thread which is holding QGM.
> 
> Thus there is a race condition that ends up with deadlock:
> main thread   working thread
> Lock QGA|
> | Call KVM_EXIT_IO handler
> | |
> |Open rcu reader's critical section
> Migration cleanup bh|
> |   |
> synchronize_rcu() is|
> waiting for readers |
> |prepare_mmio_access() is waiting for QGM
>   \   /
>  deadlock
> 
> The patch just releases QGM before calling synchronize_rcu().
> 
> Signed-off-by: Igor Redko 
> Reviewed-by: Anna Melekhova 
> Signed-off-by: Denis V. Lunev 
> CC: Juan Quintela 
> CC: Amit Shah 
> ---
>  migration/ram.c | 6 ++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/migration/ram.c b/migration/ram.c
> index 7f007e6..d01febc 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -1028,12 +1028,16 @@ static void migration_end(void)
>  {
>  /* caller have hold iothread lock or is in a bh, so there is
>   * no writing race against this migration_bitmap
> + * but rcu used not only for migration_bitmap, so we should
> + * release QGM or we get in deadlock.
>   */
>  unsigned long *bitmap = migration_bitmap;
>  atomic_rcu_set(&migration_bitmap, NULL);
>  if (bitmap) {
>  memory_global_dirty_log_stop();
> +qemu_mutex_unlock_iothread();
>  synchronize_rcu();
> +qemu_mutex_lock_iothread();

migration_end() can called in two cases:
1. migration_completed
2. migration is cancelled

In case 1, you should not unlock iothread, otherwise, the vm's state may be 
changed
unexpectedly.

>  g_free(bitmap);
>  }
>  
> @@ -1085,7 +1089,9 @@ void migration_bitmap_extend(ram_addr_t old, ram_addr_t 
> new)
>  atomic_rcu_set(&migration_bitmap, bitmap);
>  qemu_mutex_unlock(&migration_bitmap_mutex);
>  migration_dirty_pages += new - old;
> +qemu_mutex_unlock_iothread();
>  synchronize_rcu();
> +qemu_mutex_lock_iothread();

Hmm, I think it is OK to unlock iothread here

>  g_free(old_bitmap);
>  }
>  }
>

Re: [Qemu-devel] [PATCH v11 02/12] init/cleanup of netfilter object

2015-09-24 Thread Yang Hongyang




On 09/24/2015 07:40 PM, Markus Armbruster wrote:

Yang Hongyang  writes:


On 09/24/2015 04:41 PM, Markus Armbruster wrote:

Yang Hongyang  writes:


Add a netfilter object based on QOM.

A netfilter is attached to a netdev, captures all network packets
that pass through the netdev. When we delete the netdev, we also
delete the netfilter object attached to it, because if the netdev is
removed, the filter which attached to it is useless.

QTAILQ_ENTRY next used by netdev, filter belongs to the specific netdev is
in this queue.


I don't get this paragraph.  Not sure it's needed.


Also init delayed object after net_init_clients, because netfilters need
to be initialized after net clients initialized.


A paragraph starting with "Also" in a commit message is a pretty good
sign the patch should be split :)



Signed-off-by: Yang Hongyang 
---
v11: no need to free nf->netdev_id, it will be auto freeed while object deleted
   remove global_list net_filters, will add back when needed
v10: use QOM for netfilter
v9: use flat union instead of simple union in QAPI schema
v8: include vhost_net header
v7: add check for vhost
  fix error propagate bug
v6: add multiqueue support (net_filter_init1)
v5: remove model from NetFilterState
  add a sent_cb param to receive_iov API
---
   include/net/filter.h|  60 +
   include/net/net.h   |   1 +
   include/qemu/typedefs.h |   1 +
   net/Makefile.objs   |   1 +
   net/filter.c| 138 

   net/net.c   |   7 +++
   qapi-schema.json|  18 +++
   vl.c|  13 ++---
   8 files changed, 233 insertions(+), 6 deletions(-)
   create mode 100644 include/net/filter.h
   create mode 100644 net/filter.c

diff --git a/include/net/filter.h b/include/net/filter.h
new file mode 100644
index 000..226f2f7
--- /dev/null
+++ b/include/net/filter.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2015 FUJITSU LIMITED
+ * Author: Yang Hongyang 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_NET_FILTER_H
+#define QEMU_NET_FILTER_H
+
+#include "qom/object.h"
+#include "qemu-common.h"
+#include "qemu/typedefs.h"
+#include "net/queue.h"
+
+#define TYPE_NETFILTER "netfilter"
+#define NETFILTER(obj) \
+OBJECT_CHECK(NetFilterState, (obj), TYPE_NETFILTER)
+#define NETFILTER_GET_CLASS(obj) \
+OBJECT_GET_CLASS(NetFilterClass, (obj), TYPE_NETFILTER)
+#define NETFILTER_CLASS(klass) \
+OBJECT_CLASS_CHECK(NetFilterClass, (klass), TYPE_NETFILTER)
+
+typedef void (FilterSetup) (NetFilterState *nf, Error **errp);
+typedef void (FilterCleanup) (NetFilterState *nf);
+/*
+ * Return:
+ *   0: finished handling the packet, we should continue
+ *   size: filter stolen this packet, we stop pass this packet further
+ */
+typedef ssize_t (FilterReceiveIOV)(NetFilterState *nc,
+   NetClientState *sender,
+   unsigned flags,
+   const struct iovec *iov,
+   int iovcnt,
+   NetPacketSent *sent_cb);
+
+struct NetFilterClass {
+ObjectClass parent_class;
+
+FilterSetup *setup;
+FilterCleanup *cleanup;
+FilterReceiveIOV *receive_iov;
+};
+typedef struct NetFilterClass NetFilterClass;


Not splitting the declaration is more concise:

  typedef struct {
  ObjectClass parent_class;
  FilterSetup *setup;
  FilterCleanup *cleanup;
  FilterReceiveIOV *receive_iov;
  } NetFilterClass;

Are any of the methods optional?  If yes, please add suitable comments.


Hi Markus, I split it because the checkpatch.pl told me to do so...


Understand.  However, it's a recent change to checkpatch.pl that's going
to be reverted:
Message-ID: <55faeb33.50...@redhat.com>
http://lists.gnu.org/archive/html/qemu-devel/2015-09/msg04644.html


Thanks for the information.



[...]
.



--
Thanks,
Yang.

Re: [Qemu-devel] [PATCH v11 01/12] qmp: delete qemu opts when delete an object

2015-09-24 Thread Yang Hongyang




On 09/24/2015 07:36 PM, Markus Armbruster wrote:

Yang Hongyang  writes:


On 09/24/2015 05:42 PM, Markus Armbruster wrote:

Yang Hongyang  writes:


On 09/24/2015 03:43 PM, Markus Armbruster wrote:

This has finally reached the front of my review queue.  I apologize for
the lng delay.

Copying Paolo for another pair of eyeballs (he wrote this code).


[...]

+
+opts = qemu_opts_find(qemu_find_opts_err("object", NULL), id);
+qemu_opts_del(opts);


qemu_find_opts_err("object", &error_abort) please, because when it
fails, we want to die right away, not when the null pointer it returns
gets dereferenced.


Thanks for the review.
Jason, do you want me to propose a fix on top of this series or simply drop
this for now because this patch is an independent bug fix and won't
affect the
other filter patch series.



Same sloppiness in netdev_del_completion() and qmp_netdev_del(), not
your patch's fault.

Elsewhere, we store the QemuOpts in the object just so we can delete it:
DeviceState, DriveInfo.  Paolo, what do you think?


I don't get it. Currently, only objects created at the beginning through
QEMU command line will be stored in the QemuOpts, objects that created
with object_add won't stored in QemuOpts. Do you mean for DeviceState,
DriveInfo they store there QemuOpts explicity so that they can delete it?
Why don't we just delete it from objects directly instead?


Let me elaborate.

We have the same pattern in multiple places: some kind of object gets
configured via QemuOpts, and an object's QemuOpts need to stay around
until the object dies.

Example 1: Block device backends

  DriveInfo has a member opts.

  drive_new() stores the QemuOpts in dinfo->opts.

  drive_info_del() destroys dinfo->opts.

  Note: DriveInfo member opts is always non-null.  But not every
  BlockBackend has a DriveInfo.

Example 2: Device frontends

  DeviceState has a member opts.

  qdev_device_add() stores the QemuOpts in dev->opts.

  device_finalize() destroys dev->opts.

  Note: DeviceState member opts may be null (not every device is
  created by qdev_device_add()).  Fine, because qemu_opts_del(NULL) is
  a no-op.

Example 3: Character device backends

  CharDriverState has a member opts.

  qemu_chr_new_from_opts() stores the QemuOpts in chr->opts.

  qemu_chr_delete() destroys chr->opts.

Example 4: Network device backends

  Two cases

  A. netdev

 qmp_netdev_add() does not store the QemuOpts.


The QemuOpts stored by qmp_netdev_add() and also hmp_netdev_add().
through this function:
net/net.c: qmp_netdev_add()
1134 opts = qemu_opts_from_qdict(opts_list, qdict, &local_err);

hmp.c: hmp_netdev_add()
1579 opts = qemu_opts_from_qdict(qemu_find_opts("netdev"), qdict, &err);


That's where the QemuOpts are created.  By "does not store" I mean "does
not store in its own state, unlike example 1-3".


Understand, thank you.





 qmp_netdev_del() still needs to destroy it.  It has to find it
 somehow.  Here's how it does it:

 opts = qemu_opts_find(qemu_find_opts_err("netdev", NULL), id);
 if (!opts) {
 error_setg(errp, "Device '%s' is not a netdev", id);
 return;
 }

 The !opts condition is a non-obvious way to test "not created
 with -netdev", see commit 645c949.  Note that the commit's claim
 that qemu_opts_del(NULL) crashes is no longer true since commit
 4782183.

  B. Legacy net

 hmp_host_net_add() does not store the QemuOpts.

 hmp_host_net_remove() still needs to destroy it.  I can't see
 where that happens, and I'm not sure it does.

Example 5: Generic object

  object_create() does not store the QemuOpts.

  It still needs to be destroyed along with the object.  It isn't, and
  your patch fixes it.

Personally, I find the technique in example 1-3 easier to understand
than the one in example 4-5.
.


.



--
Thanks,
Yang.

Re: [Qemu-devel] [PATCH v11 01/12] qmp: delete qemu opts when delete an object

2015-09-24 Thread Yang Hongyang




On 09/24/2015 07:35 PM, Markus Armbruster wrote:

Yang Hongyang  writes:


On 09/24/2015 05:42 PM, Markus Armbruster wrote:

Yang Hongyang  writes:


On 09/24/2015 03:43 PM, Markus Armbruster wrote:

This has finally reached the front of my review queue.  I apologize for
the lng delay.

Copying Paolo for another pair of eyeballs (he wrote this code).


[...]

+
+opts = qemu_opts_find(qemu_find_opts_err("object", NULL), id);
+qemu_opts_del(opts);


qemu_find_opts_err("object", &error_abort) please, because when it
fails, we want to die right away, not when the null pointer it returns
gets dereferenced.


Thanks for the review.
Jason, do you want me to propose a fix on top of this series or simply drop
this for now because this patch is an independent bug fix and won't
affect the
other filter patch series.



Same sloppiness in netdev_del_completion() and qmp_netdev_del(), not
your patch's fault.

Elsewhere, we store the QemuOpts in the object just so we can delete it:
DeviceState, DriveInfo.  Paolo, what do you think?


I don't get it. Currently, only objects created at the beginning through
QEMU command line will be stored in the QemuOpts, objects that created
with object_add won't stored in QemuOpts. Do you mean for DeviceState,
DriveInfo they store there QemuOpts explicity so that they can delete it?
Why don't we just delete it from objects directly instead?


Let me elaborate.


Thanks very much for the elaboration.



We have the same pattern in multiple places: some kind of object gets
configured via QemuOpts, and an object's QemuOpts need to stay around
until the object dies.

Example 1: Block device backends

  DriveInfo has a member opts.

  drive_new() stores the QemuOpts in dinfo->opts.

  drive_info_del() destroys dinfo->opts.

  Note: DriveInfo member opts is always non-null.  But not every
  BlockBackend has a DriveInfo.

Example 2: Device frontends

  DeviceState has a member opts.

  qdev_device_add() stores the QemuOpts in dev->opts.

  device_finalize() destroys dev->opts.

  Note: DeviceState member opts may be null (not every device is
  created by qdev_device_add()).  Fine, because qemu_opts_del(NULL) is
  a no-op.

Example 3: Character device backends

  CharDriverState has a member opts.

  qemu_chr_new_from_opts() stores the QemuOpts in chr->opts.

  qemu_chr_delete() destroys chr->opts.


1-3 store there ops in there own state, not in global ops group right?


Both!  But keeping a pointer in their own state simplifies calling
qemu_opts_del() on destruction, and also makes it more obvious what is
keeping the QemuOpts alive.


I see. Thanks.




Example 4: Network device backends

  Two cases

  A. netdev

 qmp_netdev_add() does not store the QemuOpts.

 qmp_netdev_del() still needs to destroy it.  It has to find it
 somehow.  Here's how it does it:

 opts = qemu_opts_find(qemu_find_opts_err("netdev", NULL), id);
 if (!opts) {
 error_setg(errp, "Device '%s' is not a netdev", id);
 return;
 }

 The !opts condition is a non-obvious way to test "not created
 with -netdev", see commit 645c949.  Note that the commit's claim
 that qemu_opts_del(NULL) crashes is no longer true since commit
 4782183.

  B. Legacy net

 hmp_host_net_add() does not store the QemuOpts.


I'm afraid it does store the QemuOpts, but not in it's own state.
net/net.c:
1088 qemu_opt_set(opts, "type", device, &error_abort);
This will store the QemuOpts, or am I misunderstood it?


Doesn't store opts anywhere, actually.  It merely modifies it (adds a
parameter "type")


As you said "store" means store in there own state, then I see...thanks





 hmp_host_net_remove() still needs to destroy it.  I can't see
 where that happens, and I'm not sure it does.

Example 5: Generic object

  object_create() does not store the QemuOpts.

  It still needs to be destroyed along with the object.  It isn't, and
  your patch fixes it.

Personally, I find the technique in example 1-3 easier to understand
than the one in example 4-5.


I agree that opts should not be used to determine not created something
while there's case when something created but Opts not stored.

.



--
Thanks,
Yang.

Re: [Qemu-devel] [PATCH v8] hw/arm/virt: Add high MMIO PCI region, 512G in size

2015-09-24 Thread Peter Maydell

On 4 September 2015 at 00:13, Pavel Fedin  wrote:
> Peter Maydell wrote:
>> Did you report the bug where the pci controller driver
>> fails to start if the second region is out of its range
>> to the kernel mailing list? (It would be nice to be able
>> to point to a kernel patch in the changelog too.)
>
>  I didn't yet, because have to time to retest it. Well, OK, will
> do it.

Nudge -- have you reported this as a kernel bug against the
PCI generic driver yet?

thanks
-- PMM

Re: [Qemu-devel] [PULL 0/5] Patch queue for w32/w64

2015-09-24 Thread Peter Maydell

On 24 September 2015 at 13:16, Stefan Weil  wrote:
> The following changes since commit 9438fe9e56760e5e5e11d6c7d12ed9c64a0c8446:
>
>   Merge remote-tracking branch 'remotes/elmarco/tags/rm-libcacard' into 
> staging (2015-09-24 17:04:31 +0100)
>
> are available in the git repository at:
>
>   git://qemu.weilnetz.de/qemu.git tags/pull-wxx-20150924
>
> for you to fetch changes up to 4d9310f427b477a126f6f2006c3a73b9764948b6:
>
>   oslib-win32: only provide localtime_r/gmtime_r if missing (2015-09-24 
> 21:13:49 +0200)
>
> 
> wxx patch queue
>
>   - Faster Win32 QemuEvent
>   - Fixes needed for latest Mingw-w64 build environment
>   - Version information in QEMU installer for Windows
>
> 

Applied, thanks.

-- PMM

Re: [Qemu-devel] [PATCH v4 2/2] spapr: generate DT node names

2015-09-24 Thread Gavin Shan

On Thu, Sep 24, 2015 at 12:27:39PM +0200, Laurent Vivier wrote:
>When DT node names for PCI devices are generated by SLOF,
>they are generated according to the type of the device
>(for instance, ethernet for virtio-net-pci device).
>
>Node name for hotplugged devices is generated by QEMU.
>This patch adds the mechanic to QEMU to create the node
>name according to the device type too.
>
>The data structure has been roughly copied from OpenBIOS/OpenHackware,
>node names from SLOF.
>
>Example:
>
>Hotplugging some PCI cards with QEMU monitor:
>
>device_add virtio-tablet-pci
>device_add virtio-serial-pci
>device_add virtio-mouse-pci
>device_add virtio-scsi-pci
>device_add virtio-gpu-pci
>device_add ne2k_pci
>device_add nec-usb-xhci
>device_add intel-hda
>
>What we can see in linux device tree:
>
>for dir in /proc/device-tree/pci@8002000/*@*/; do
>echo $dir
>cat $dir/name
>echo
>done
>
>WITHOUT this patch:
>
>/proc/device-tree/pci@8002000/pci@0/
>pci
>/proc/device-tree/pci@8002000/pci@1/
>pci
>/proc/device-tree/pci@8002000/pci@2/
>pci
>/proc/device-tree/pci@8002000/pci@3/
>pci
>/proc/device-tree/pci@8002000/pci@4/
>pci
>/proc/device-tree/pci@8002000/pci@5/
>pci
>/proc/device-tree/pci@8002000/pci@6/
>pci
>/proc/device-tree/pci@8002000/pci@7/
>pci
>
>WITH this patch:
>
>/proc/device-tree/pci@8002000/communication-controller@1/
>communication-controller
>/proc/device-tree/pci@8002000/display@4/
>display
>/proc/device-tree/pci@8002000/ethernet@5/
>ethernet
>/proc/device-tree/pci@8002000/input-controller@0/
>input-controller
>/proc/device-tree/pci@8002000/mouse@2/
>mouse
>/proc/device-tree/pci@8002000/multimedia-device@7/
>multimedia-device
>/proc/device-tree/pci@8002000/scsi@3/
>scsi
>/proc/device-tree/pci@8002000/usb-xhci@6/
>usb-xhci
>
>Signed-off-by: Laurent Vivier 
>Reviewed-by: Thomas Huth 
>---
> hw/ppc/spapr_pci.c | 292 ++---
> 1 file changed, 278 insertions(+), 14 deletions(-)
>
>diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>index a2feb4c..63eb28c 100644
>--- a/hw/ppc/spapr_pci.c
>+++ b/hw/ppc/spapr_pci.c
>@@ -38,6 +38,7 @@
>
> #include "hw/pci/pci_bridge.h"
> #include "hw/pci/pci_bus.h"
>+#include "hw/pci/pci_ids.h"
> #include "hw/ppc/spapr_drc.h"
> #include "sysemu/device_tree.h"
>
>@@ -944,6 +945,276 @@ static void populate_resource_props(PCIDevice *d, 
>ResourceProps *rp)
> rp->assigned_len = assigned_idx * sizeof(ResourceFields);
> }
>

One question would be: is there one reason why the logic, converting
class/subclass/iface code to tring, isn't put into generic PCI module?
If the code is put there, all platforms can reuse it.

Thanks,
Gavin

>+typedef struct PCIClass PCIClass;
>+typedef struct PCISubClass PCISubClass;
>+typedef struct PCIIFace PCIIFace;
>+
>+struct PCIIFace {
>+uint8_t iface;
>+const char *name;
>+};
>+
>+struct PCISubClass {
>+uint8_t subclass;
>+const char *name;
>+const PCIIFace *iface;
>+};
>+#define SUBCLASS(a) ((uint8_t)a)
>+#define IFACE(a)((uint8_t)a)
>+
>+struct PCIClass {
>+const char *name;
>+const PCISubClass *subc;
>+};
>+
>+static const PCISubClass undef_subclass[] = {
>+{ IFACE(PCI_CLASS_NOT_DEFINED_VGA), "display", NULL },
>+{ 0xFF, NULL, NULL, NULL },
>+};
>+
>+static const PCISubClass mass_subclass[] = {
>+{ SUBCLASS(PCI_CLASS_STORAGE_SCSI), "scsi", NULL },
>+{ SUBCLASS(PCI_CLASS_STORAGE_IDE), "ide", NULL },
>+{ SUBCLASS(PCI_CLASS_STORAGE_FLOPPY), "fdc", NULL },
>+{ SUBCLASS(PCI_CLASS_STORAGE_IPI), "ipi", NULL },
>+{ SUBCLASS(PCI_CLASS_STORAGE_RAID), "raid", NULL },
>+{ SUBCLASS(PCI_CLASS_STORAGE_ATA), "ata", NULL },
>+{ SUBCLASS(PCI_CLASS_STORAGE_SATA), "sata", NULL },
>+{ SUBCLASS(PCI_CLASS_STORAGE_SAS), "sas", NULL },
>+{ 0xFF, NULL, NULL },
>+};
>+
>+static const PCISubClass net_subclass[] = {
>+{ SUBCLASS(PCI_CLASS_NETWORK_ETHERNET), "ethernet", NULL },
>+{ SUBCLASS(PCI_CLASS_NETWORK_TOKEN_RING), "token-ring", NULL },
>+{ SUBCLASS(PCI_CLASS_NETWORK_FDDI), "fddi", NULL },
>+{ SUBCLASS(PCI_CLASS_NETWORK_ATM), "atm", NULL },
>+{ SUBCLASS(PCI_CLASS_NETWORK_ISDN), "isdn", NULL },
>+{ SUBCLASS(PCI_CLASS_NETWORK_WORLDFIP), "worldfip", NULL },
>+{ SUBCLASS(PCI_CLASS_NETWORK_PICMG214), "picmg", NULL },
>+{ 0xFF, NULL, NULL },
>+};
>+
>+static const PCISubClass displ_subclass[] = {
>+{ SUBCLASS(PCI_CLASS_DISPLAY_VGA), "vga", NULL },
>+{ SUBCLASS(PCI_CLASS_DISPLAY_XGA), "xga", NULL },
>+{ SUBCLASS(PCI_CLASS_DISPLAY_3D), "3d-controller", NULL },
>+{ 0xFF, NULL, NULL },
>+};
>+
>+static const PCISubClass media_subclass[] = {
>+{ SUBCLASS(PCI_CLASS_MULTIMEDIA_VIDEO), "video", NULL },
>+{ SUBCLASS(PCI_CLASS_MULTIMEDIA_AUDIO), "sound", NULL },
>+{ SUBCLASS(PCI_CLASS_MULTIMEDIA_PHONE), "telephony", NULL },
>+{ 0xFF, NULL, NULL },
>+};
>+

[Qemu-devel] Possible bug in target-i386/helper.c:do_cpu_init()?

2015-09-24 Thread Bill Paul

Consider the following circumstances:

- An x86-64 multicore system is running with all cores set for long mode
  (EFER.LME and EFER.LMA set)
- The OS decides to re-launch one of the AP CPUs using an INIT IPI

According to the Intel architecture manual, an INIT IPI should reset the CPU 
state (with a few small exceptions):

[...]
10.4.7.3   Local APIC State After an INIT Reset ("Wait-for-SIPI" State)

An INIT reset of the processor can be initiated in either of two ways:
·By asserting the processor's INIT# pin.
·By sending the processor an INIT IPI (an IPI with the delivery mode set 
to INIT).

Upon receiving an INIT through either of these mechanisms, the processor 
responds by beginning the initialization process of the processor core and the 
local APIC. The state of the local APIC following an INIT reset is the same as
it is after a power-up or hardware reset, except that the APIC ID and 
arbitration ID registers are not affected. This state is also referred to at 
the "wait-for-SIPI" state (see also: Section 8.4.2, "MP Initialization 
Protocol Requirements and Restrictions").
[...]

Note however that do_cpu_init() does this:

1225 void do_cpu_init(X86CPU *cpu)
1226 {
1227 CPUState *cs = CPU(cpu);
1228 CPUX86State *env = &cpu->env;
1229 CPUX86State *save = g_new(CPUX86State, 1);
1230 int sipi = cs->interrupt_request & CPU_INTERRUPT_SIPI;
1231 
1232 *save = *env;
1233 
1234 cpu_reset(cs);
1235 cs->interrupt_request = sipi;
1236 memcpy(&env->start_init_save, &save->start_init_save,
1237offsetof(CPUX86State, end_init_save) -
1238offsetof(CPUX86State, start_init_save));
1239 g_free(save);
1240 
1241 if (kvm_enabled()) {
1242 kvm_arch_do_init_vcpu(cpu);
1243 }
1244 apic_init_reset(cpu->apic_state);
1245 }

The CPU environment, which in this case includes the EFER state, is saved and 
restored when calling cpu_reset(). The x86_cpu_reset() function actually does 
clear all of the CPU environment, but this function puts it all back.

The result of this is that if the CPU was in long mode and you do an INIT IPI, 
the CPU still has the EFER.LMA and EFER.LME bits set, even though it's not 
actually running in long mode anymore. It doesn't seem possible for the guest 
to get the CPU out of this state, and one nasty side-effect is that trying to 
set the CR0 to enable paging never succeeds.

I added the following code at line 1240 above as a workaround:

#ifdef TARGET_X86_64
/*
 * The initial state of the CPU is not 64-bit mode. This being
 * the case, don't leave the EFER.LME or EFER.LME bits set.
 */
 
cpu_load_efer(env, 0);
#endif

This seemed to fix the problem I was having, however I'm not certain this is 
the correct fix.

As background, I ran across this problem testing VxWorks with QEMU 2.3.0 and 
OVMF firmware. The VxWorks BOOTX64.EFI loader is able to load and run 32-bit 
VxWorks images on 64-bit hardware by forcing the CPU back to 32-bit mode 
before handing control to the OS. However it only does this for the BSP (CPU 
0). It turns out that the UEFI firmware puts the AP cores into long mode too. 
(This may be new in recent UEFI/OVMF versions, because I'm pretty sure tested 
this path before and didn't see a problem.) Everything works ok with 
uniprocessor images, but with SMP images, launching the first AP CPU fails due 
to the above condition (the CPU starts up, but is unable to enable paging and 
dies screaming in short order).

Booting with the 32-bit OVMF build and the VxWorks BOOTIA32.EFI loader works 
ok. The same VxWorks loader and kernel code also seems to run ok on real 
hardware.

I'm using QEMU 2.3.0 on FreeBSD/amd64 9.2-RELEASE. I'm not using KVM. It looks 
like the code is still the same in the git repo. Am I correct that 
do_cpu_init() should be clearing the EFER contents?

-Bill

-- 
=
-Bill Paul(510) 749-2329 | Senior Member of Technical Staff,
 wp...@windriver.com | Master of Unix-Fu - Wind River Systems
=
   "I put a dollar in a change machine. Nothing changed." - George Carlin
=

Re: [Qemu-devel] [PATCH] tilegx: Support raise instruction

2015-09-24 Thread Chen Gang


On 9/24/15 02:34, Richard Henderson wrote:
> You forgot to cc qemu-devel.
> 

Oh, we can not find it in qemu mail archive list. But I really cc
qemu-devel.

When I send the next patches, I'll notice about it, if still "can not cc
qemu-devel", I shall try to send patches from my hotmail client.


> This patch needs to be split.
> 

OK.

> On 09/22/2015 03:38 PM, gang.chen.5...@gmail.com wrote:
>>  
>> +static int sigdata_code(uint64_t sigdata)
>> +{
>> +return (sigdata & 0x3ff) >> 6;
>> +}
>> +
>> +static int sigdata_no(uint64_t sigdata)
>> +{
>> +return sigdata & 0x3f;
>> +}
>> +
>> +static void do_raise(CPUTLGState *env)
>> +{
>> +target_siginfo_t info;
>> +
>> +info.si_signo = sigdata_no(env->sigdata);
>> +info.si_errno = 0;
>> +info.si_code = sigdata_code(env->sigdata);
>> +info._sifields._sigfault._addr = env->pc;
>> +queue_signal(env, info.si_signo, &info);
>> +}
> 
> 
> This should look much more like the linux kernel code, where instead of 
> passing
> "sigdata", we load the instruction that faulted and parse out the data.  You
> should only need TILEGX_EXCP_OPCODE_ILL.
>

OK, thank, I will try, next.

>> 
>> +env->spregs[TILEGX_SPR_EX_CONTEXT_1] = 
>> TILEGX_PL_ICS_EX1(TILEGX_USER_PL, 1);
>> +env->regs[TILEGX_R_SP] = (unsigned long) frame;
>> +env->regs[TILEGX_R_LR] = restorer;
>> +env->regs[0] = (unsigned long) sig;
>> +env->regs[1] = (unsigned long) &frame->info;
>> +env->regs[2] = (unsigned long) &frame->uc;
>> +/* regs->flags |= PT_FLAGS_CALLER_SAVES; FIXME: we can skip it? */
>> +
>> +unlock_user_struct(frame, frame_addr, 1);
>> +return;
>> +
>> +give_sigsegv:
>> +if (sig == TARGET_SIGSEGV) {
>> +ka->_sa_handler = TARGET_SIG_DFL;
>> +}
>> +force_sig(TARGET_SIGSEGV /* , current */);
>> +}
>> +
>> +/* kernel: arch/tile/kernel/signal.c */
>> +long do_rt_sigreturn(CPUArchState *env)
>> +{
>> +fprintf(stderr, "do_rt_sigreturn: not implemented\n");
>> +return -TARGET_ENOSYS;
>> +}
>> +
> 
> The introduction of signal handling needs to be it's own patch.
> 

OK, thanks.

> I'm also disapointed that you didn't fill in rt_sigreturn; there's no point in
> not doing them together.
> 

This patch is only for raise insn, so I only implement the related code
for raise.

But, OK, I will let them together in one patch, next.

>>  
>>  #include "exec/cpu-defs.h"
>> +#include "spr_def_64.h"
>>  
>> +#define TILEGX_EX1_PL(ex1) \
>> +  (((ex1) >> SPR_EX_CONTEXT_1_1__PL_SHIFT) & SPR_EX_CONTEXT_1_1__PL_RMASK)
>> +#define TILEGX_EX1_ICS(ex1) \
>> +  (((ex1) >> SPR_EX_CONTEXT_1_1__ICS_SHIFT) & SPR_EX_CONTEXT_1_1__ICS_RMASK)
>> +#define TILEGX_PL_ICS_EX1(pl, ics) \
>> +  (((pl) << SPR_EX_CONTEXT_1_1__PL_SHIFT) | \
>> +   ((ics) << SPR_EX_CONTEXT_1_1__ICS_SHIFT))
>> +
>> +#define TILEGX_USER_PL 0
> 
> What's this for?  It appears to be something unrelated and not used.
>

The previous setup_rt_frame() will use some of them.

But it is OK for me to move them to the related file. 
 
>>  
>>  case OE(ADDLI_OPCODE_X0, 0, X0):
>>  case OE(ADDLI_OPCODE_X1, 0, X1):
>> -tcg_gen_addi_tl(tdest, tsrca, imm);
>> +if ((srca == TILEGX_R_ZERO) && (dest == TILEGX_R_ZERO)) {
>> +t0 = tcg_const_tl(imm & 0x);
>> +tcg_gen_st_tl(t0, cpu_env, offsetof(CPUTLGState, sigdata));
>> +tcg_temp_free(t0);
>> +} else {
>> +tcg_gen_addi_tl(tdest, tsrca, imm);
>> +}
>>  mnemonic = "addli";
>>  break;
>>  case OE(ADDXLI_OPCODE_X0, 0, X0):
>>
> 
> Certainly you should not be complicating addli like this.
> 

OK, I'll try.


Thanks.
-- 
Chen Gang (陈刚)

Open, share, and attitude like air, water, and life which God blessed

Re: [Qemu-devel] [PATCH v5 15/21] vhost user: add rarp sending after live migration for legacy guest

2015-09-24 Thread Marc-André Lureau

Hi

- Original Message -
> On Thu, Sep 24, 2015 at 6:22 PM,   wrote:
> > From: Thibaut Collet 
> >
> > A new vhost user message is added to allow QEMU to ask to vhost user
> > backend to
> > broadcast a fake RARP after live migration for guest without GUEST_ANNOUNCE
> > capability.
> >
> > This new message is sent only if the backend supports the new
> > VHOST_USER_PROTOCOL_F_RARP protocol feature.
> > The payload of this new message is the MAC address of the guest (not known
> > by
> > the backend). The MAC address is copied in the first 6 bytes of a u64 to
> > avoid
> > to create a new payload message type.
> >
> > This new message has no equivalent ioctl so a new callback is added in the
> > userOps structure to send the request.
> >
> > Upon reception of this new message the vhost user backend must generate and
> > broadcast a fake RARP request to notify the migration is terminated.
> >
> > Signed-off-by: Thibaut Collet 
> > [Rebased and fixed checkpatch errors - Marc-André]
> > Signed-off-by: Marc-André Lureau 
> > ---
> >  docs/specs/vhost-user.txt | 15 +++
> >  hw/net/vhost_net.c| 17 +
> >  hw/virtio/vhost-user.c| 30 ++
> >  include/hw/virtio/vhost-backend.h |  3 +++
> >  include/net/vhost_net.h   |  1 +
> >  net/vhost-user.c  | 24 ++--
> >  6 files changed, 88 insertions(+), 2 deletions(-)
> >
> > diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt
> > index e0292a0..e0d71e2 100644
> > --- a/docs/specs/vhost-user.txt
> > +++ b/docs/specs/vhost-user.txt
> > @@ -194,6 +194,7 @@ Protocol features
> >
> >  #define VHOST_USER_PROTOCOL_F_MQ 0
> >  #define VHOST_USER_PROTOCOL_F_LOG_SHMFD  1
> > +#define VHOST_USER_PROTOCOL_F_RARP   2
> >
> >  Message types
> >  -
> > @@ -381,3 +382,17 @@ Message types
> >Master payload: vring state description
> >
> >Signal slave to enable or disable corresponding vring.
> > +
> > + * VHOST_USER_SEND_RARP
> > +
> > +  Id: 19
> > +  Equivalent ioctl: N/A
> > +  Master payload: u64
> > +
> > +  Ask vhost user backend to broadcast a fake RARP to notify the
> > migration
> > +  is terminated for guest that does not support GUEST_ANNOUNCE.
> > +  Only legal if feature bit VHOST_USER_F_PROTOCOL_FEATURES is present
> > in
> > +  VHOST_USER_GET_FEATURES and protocol feature bit
> > VHOST_USER_PROTOCOL_F_RARP
> > +  is present in VHOST_USER_GET_PROTOCOL_FEATURES.
> > +  The first 6 bytes of the payload contain the mac address of the
> > guest to
> > +  allow the vhost user backend to construct and broadcast the fake
> > RARP.
> > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > index 840f443..da66b64 100644
> > --- a/hw/net/vhost_net.c
> > +++ b/hw/net/vhost_net.c
> > @@ -388,6 +388,18 @@ void vhost_net_cleanup(struct vhost_net *net)
> >  g_free(net);
> >  }
> >
> > +int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr)
> > +{
> > +const VhostOps *vhost_ops = net->dev.vhost_ops;
> > +int r = -1;
> > +
> > +if (vhost_ops->vhost_migration_done) {
> > +r = vhost_ops->vhost_migration_done(&net->dev, mac_addr);
> > +}
> > +
> > +return r;
> > +}
> > +
> >  bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
> >  {
> >  return vhost_virtqueue_pending(&net->dev, idx);
> > @@ -479,6 +491,11 @@ void vhost_net_virtqueue_mask(VHostNetState *net,
> > VirtIODevice *dev,
> >  {
> >  }
> >
> > +int vhost_net_notify_migration_done(struct vhost_net *net)
> > +{
> > +return -1;
> > +}
> > +
> >  VHostNetState *get_vhost_net(NetClientState *nc)
> >  {
> >  return 0;
> > diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> > index 455caba..b7f3699 100644
> > --- a/hw/virtio/vhost-user.c
> > +++ b/hw/virtio/vhost-user.c
> > @@ -10,6 +10,7 @@
> >
> >  #include "hw/virtio/vhost.h"
> >  #include "hw/virtio/vhost-backend.h"
> > +#include "hw/virtio/virtio-net.h"
> >  #include "sysemu/char.h"
> >  #include "sysemu/kvm.h"
> >  #include "qemu/error-report.h"
> > @@ -30,6 +31,7 @@
> >  #define VHOST_USER_PROTOCOL_FEATURE_MASK 0x3ULL
> >  #define VHOST_USER_PROTOCOL_F_MQ 0
> >  #define VHOST_USER_PROTOCOL_F_LOG_SHMFD  1
> > +#define VHOST_USER_PROTOCOL_F_RARP   2
> 
> The VHOST_USER_PROTOCOL_FEATURE_MASK  must be changed and set to 0x7ULL
> 

Good catch (too many rebases, having a test would help to prevent this kind of 
mistake)

thanks

Re: [Qemu-devel] [PATCH 1/3] virtio: introduce virtqueue_unmap_sg()

2015-09-24 Thread Andrew James

On 09/18/2015 02:01 AM, Jason Wang wrote:
> Factor out sg unmapping logic. This will be reused by the patch that
> can discard descriptor.
> 
> Cc: Michael S. Tsirkin 
> Signed-off-by: Jason Wang 
> ---
>  hw/virtio/virtio.c | 15 ++-
>  1 file changed, 10 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index 0832db9..eb8d5ca 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -243,15 +243,12 @@ int virtio_queue_empty(VirtQueue *vq)
>  return vring_avail_idx(vq) == vq->last_avail_idx;
>  }
>  
> -void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
> -unsigned int len, unsigned int idx)
> +static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
> +   unsigned int len)
>  {
>  unsigned int offset;
>  int i;
>  
> -trace_virtqueue_fill(vq, elem, len, idx);
> -
> -offset = 0;
>  for (i = 0; i < elem->in_num; i++) {
>  size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
>  

Should the "offset = 0" really be dropped here? Seems like it ends
up uninitialized. GCC thinks it might too.

> @@ -266,6 +263,14 @@ void virtqueue_fill(VirtQueue *vq, const 
> VirtQueueElement *elem,
>  cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
>elem->out_sg[i].iov_len,
>0, elem->out_sg[i].iov_len);
> +}
> +
> +void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
> +unsigned int len, unsigned int idx)
> +{
> +trace_virtqueue_fill(vq, elem, len, idx);
> +
> +virtqueue_unmap_sg(vq, elem, len);
>  
>  idx = (idx + vring_used_idx(vq)) % vq->vring.num;
>  
> 

Thanks,
-- 
Andrew James

[Qemu-devel] [RFC] transactions: add transaction-wide property

2015-09-24 Thread John Snow

This replaces the per-action property as in Fam's series.
Instead, we have a transaction-wide property that is shared
with each action.

At the action level, if a property supplied transaction-wide
is disagreeable, we return error and the transaction is aborted.

RFC:

Where this makes sense: Any transactional actions that aren't
prepared to accept this new paradigm of transactional behavior
can error_setg and return.

Where this may not make sense: consider the transactions which
do not use BlockJobs to perform their actions, i.e. they are
synchronous during the transactional phase. Because they either
fail or succeed so early, we might say that of course they can
support this property ...

...however, consider the case where we create a new bitmap and
perform a full backup, using allow_partial=false. If the backup
fails, we might well expect the bitmap to be deleted because a
member of the transaction ultimately/eventually failed. However,
the bitmap creation was not undone because it does not have a
pending/delayed abort/commit action -- those are only for jobs
in this implementation.

How do we fix this?

(1) We just say "No, you can't use the new block job transaction
completion mechanic in conjunction with these commands,"

(2) We make Bitmap creation/resetting small, synchronous blockjobs
that can join the BlockJobTxn

Signed-off-by: John Snow 
---
 blockdev.c | 87 --
 blockjob.c |  2 +-
 qapi-schema.json   | 15 +++--
 qapi/block-core.json   | 26 ---
 qmp-commands.hx|  2 +-
 tests/qemu-iotests/124 | 12 +++
 6 files changed, 83 insertions(+), 61 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 45a9fe7..02b1a83 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1061,7 +1061,7 @@ static void blockdev_do_action(int kind, void *data, 
Error **errp)
 action.data = data;
 list.value = &action;
 list.next = NULL;
-qmp_transaction(&list, errp);
+qmp_transaction(&list, false, NULL, errp);
 }
 
 void qmp_blockdev_snapshot_sync(bool has_device, const char *device,
@@ -1286,6 +1286,7 @@ struct BlkActionState {
 TransactionAction *action;
 const BlkActionOps *ops;
 BlockJobTxn *block_job_txn;
+TransactionProperties *txn_props;
 QSIMPLEQ_ENTRY(BlkActionState) entry;
 };
 
@@ -1322,6 +1323,12 @@ static void internal_snapshot_prepare(BlkActionState 
*common,
 name = internal->name;
 
 /* 2. check for validation */
+if (!common->txn_props->allow_partial) {
+error_setg(errp,
+   "internal_snapshot does not support allow_partial = false");
+return;
+}
+
 blk = blk_by_name(device);
 if (!blk) {
 error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
@@ -1473,6 +1480,12 @@ static void external_snapshot_prepare(BlkActionState 
*common,
 }
 
 /* start processing */
+if (!common->txn_props->allow_partial) {
+error_setg(errp,
+   "external_snapshot does not support allow_partial = false");
+return;
+}
+
 state->old_bs = bdrv_lookup_bs(has_device ? device : NULL,
has_node_name ? node_name : NULL,
&local_err);
@@ -1603,14 +1616,11 @@ static void drive_backup_prepare(BlkActionState 
*common, Error **errp)
 DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
 BlockDriverState *bs;
 BlockBackend *blk;
-DriveBackupTxn *backup_txn;
 DriveBackup *backup;
-BlockJobTxn *txn = NULL;
 Error *local_err = NULL;
 
 assert(common->action->kind == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
-backup_txn = common->action->drive_backup;
-backup = backup_txn->base;
+backup = common->action->drive_backup->base;
 
 blk = blk_by_name(backup->device);
 if (!blk) {
@@ -1624,11 +1634,6 @@ static void drive_backup_prepare(BlkActionState *common, 
Error **errp)
 state->aio_context = bdrv_get_aio_context(bs);
 aio_context_acquire(state->aio_context);
 
-if (backup_txn->has_transactional_cancel &&
-backup_txn->transactional_cancel) {
-txn = common->block_job_txn;
-}
-
 do_drive_backup(backup->device, backup->target,
 backup->has_format, backup->format,
 backup->sync,
@@ -1637,7 +1642,7 @@ static void drive_backup_prepare(BlkActionState *common, 
Error **errp)
 backup->has_bitmap, backup->bitmap,
 backup->has_on_source_error, backup->on_source_error,
 backup->has_on_target_error, backup->on_target_error,
-txn, &local_err);
+common->block_job_txn, &local_err);
 if (local_err) {
 error_propagate(errp, local_err);
 return;
@@ -1686,16 +1691,13 @@ static void do_blockdev_backup(const char *device, 
const char *target,
 static void blockdev_backup_prepare(BlkActionState *common

[Qemu-devel] [Bug 1498144] Re: Failure booting hurd with qemu-system-i386 on ARM

2015-09-24 Thread PeteVine

Even though not related to the original issue (was also happening on
i386 a few days ago), after getting to the login prompt inside hurd the
keyboard doesn't work and the only clue from the kernel at boot time
might be this:

'Unexpected ACK from keyboard'

or this:

'/bin/console: could not receive return value from the daemon process:
Connection timed out'

I haven't got any more info so I'm not going to open a new bug myself.
Thx.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1498144

Title:
   Failure booting hurd with qemu-system-i386 on ARM

Status in QEMU:
  New

Bug description:
  Trying to boot debian-hurd-20150320.img ends with:

  qemu-system-i386: qemu-coroutine-lock.c:91: qemu_co_queue_restart_all:
  Assertion `qemu_in_coroutine()' failed.

  Program received signal SIGABRT, Aborted.
  __libc_do_syscall ()
  at ../ports/sysdeps/unix/sysv/linux/arm/libc-do-syscall.S:44
  44  ../ports/sysdeps/unix/sysv/linux/arm/libc-do-syscall.S: No such file 
or directory.
  (gdb) bt
  #0  __libc_do_syscall ()
  at ../ports/sysdeps/unix/sysv/linux/arm/libc-do-syscall.S:44
  #1  0xb6ef8f0e in __GI_raise (sig=sig@entry=6)
  at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
  #2  0xb6efb766 in __GI_abort () at abort.c:89
  #3  0xb6ef4150 in __assert_fail_base (
  fmt=0x1 ,
  assertion=0x7f89a234 "qemu_in_coroutine()", assertion@entry=0x0,
  file=0x7f89da58 "qemu-coroutine-lock.c", file@entry=0xb566 "\001",
  line=91, line@entry=3069931692,
  function=function@entry=0x7f89ab78 "qemu_co_queue_restart_all")
  at assert.c:92
  #4  0xb6ef41e6 in __GI___assert_fail (assertion=0x0, file=0xb566 "\001",
  line=3069931692, function=0x7f89ab78 "qemu_co_queue_restart_all")
  at assert.c:101
  #5  0x7f59a6b4 in ?? ()

  I was using the same setup as in Bug 893208 (i.e git checkout from
  2015-09-15, armv7 Odroid C1)

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1498144/+subscriptions

[Qemu-devel] [Bug 1498144] Re: Failure booting hurd with qemu-system-i386 on ARM

2015-09-24 Thread PeteVine

Lastly, the machine 'power down' button doesn't work and a new message
appeared inside hurd:

'kdb: queue full'

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1498144

Title:
   Failure booting hurd with qemu-system-i386 on ARM

Status in QEMU:
  New

Bug description:
  Trying to boot debian-hurd-20150320.img ends with:

  qemu-system-i386: qemu-coroutine-lock.c:91: qemu_co_queue_restart_all:
  Assertion `qemu_in_coroutine()' failed.

  Program received signal SIGABRT, Aborted.
  __libc_do_syscall ()
  at ../ports/sysdeps/unix/sysv/linux/arm/libc-do-syscall.S:44
  44  ../ports/sysdeps/unix/sysv/linux/arm/libc-do-syscall.S: No such file 
or directory.
  (gdb) bt
  #0  __libc_do_syscall ()
  at ../ports/sysdeps/unix/sysv/linux/arm/libc-do-syscall.S:44
  #1  0xb6ef8f0e in __GI_raise (sig=sig@entry=6)
  at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
  #2  0xb6efb766 in __GI_abort () at abort.c:89
  #3  0xb6ef4150 in __assert_fail_base (
  fmt=0x1 ,
  assertion=0x7f89a234 "qemu_in_coroutine()", assertion@entry=0x0,
  file=0x7f89da58 "qemu-coroutine-lock.c", file@entry=0xb566 "\001",
  line=91, line@entry=3069931692,
  function=function@entry=0x7f89ab78 "qemu_co_queue_restart_all")
  at assert.c:92
  #4  0xb6ef41e6 in __GI___assert_fail (assertion=0x0, file=0xb566 "\001",
  line=3069931692, function=0x7f89ab78 "qemu_co_queue_restart_all")
  at assert.c:101
  #5  0x7f59a6b4 in ?? ()

  I was using the same setup as in Bug 893208 (i.e git checkout from
  2015-09-15, armv7 Odroid C1)

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1498144/+subscriptions

Re: [Qemu-devel] [PULL v2] virtio,pc features, fixes

2015-09-24 Thread Peter Maydell

On 24 September 2015 at 14:08, Peter Maydell  wrote:
> On 24 September 2015 at 12:54, Michael S. Tsirkin  wrote:
>> Same as v1 but 2 last patches dropped.
>> Not reposting since patches left are the same.
>>
>> The following changes since commit fefa4b128de06cec6d513f00ee61e8208aed4a87:
>>
>>   Merge remote-tracking branch 
>> 'remotes/awilliam/tags/vfio-update-20150923.0' into staging (2015-09-23 
>> 21:39:46 +0100)
>>
>> are available in the git repository at:
>>
>>   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream
>>
>> for you to fetch changes up to 2d0c513cb66b21d5a7c4692e2092af9e997fd251:
>>
>>   MAINTAINERS: add more devices to the PCI section (2015-09-24 16:27:53 
>> +0300)
>>
>> 
>> virtio,pc features, fixes
>>
>> New features:
>> vhost-user multiqueue support
>> virtio-ccw virtio 1 support
>> pci bridge support in iommu
>>
>> Signed-off-by: Michael S. Tsirkin 
>
> Fails to build with our minimal glib version:

...and also on 32-bit:
/root/qemu/hw/i386/intel_iommu.c: In function ‘vtd_find_add_as’:
/root/qemu/hw/i386/intel_iommu.c:1869:20: error: cast from pointer to
integer of different size [-Werror=pointer-to-int-cast]
/root/qemu/hw/i386/intel_iommu.c:1877:15: error: cast from pointer to
integer of different size [-Werror=pointer-to-int-cast]

-- PMM

Re: [Qemu-devel] [PULL v2] virtio,pc features, fixes

2015-09-24 Thread Peter Maydell

On 24 September 2015 at 12:54, Michael S. Tsirkin  wrote:
> Same as v1 but 2 last patches dropped.
> Not reposting since patches left are the same.
>
> The following changes since commit fefa4b128de06cec6d513f00ee61e8208aed4a87:
>
>   Merge remote-tracking branch 'remotes/awilliam/tags/vfio-update-20150923.0' 
> into staging (2015-09-23 21:39:46 +0100)
>
> are available in the git repository at:
>
>   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream
>
> for you to fetch changes up to 2d0c513cb66b21d5a7c4692e2092af9e997fd251:
>
>   MAINTAINERS: add more devices to the PCI section (2015-09-24 16:27:53 +0300)
>
> 
> virtio,pc features, fixes
>
> New features:
> vhost-user multiqueue support
> virtio-ccw virtio 1 support
> pci bridge support in iommu
>
> Signed-off-by: Michael S. Tsirkin 

Fails to build with our minimal glib version:
Undefined symbols for architecture x86_64:
  "_g_hash_table_add", referenced from:
  _vtd_find_add_as in intel_iommu.o
  SETFILE lm32-softmmu/qemu-system-lm32

g_hash_table_add only appeared in glib 2.32; our minimum
is 2.22.

thanks
-- PMM

Re: [Qemu-devel] [PATCH v3 18/25] tcg: Add TCG_MAX_INSNS

2015-09-24 Thread Richard Henderson

On 09/24/2015 01:02 PM, Aurelien Jarno wrote:
>> @@ -2903,6 +2903,9 @@ static inline void 
>> gen_intermediate_code_internal(AlphaCPU *cpu,
>>  if (max_insns == 0) {
>>  max_insns = CF_COUNT_MASK;
>>  }
> 
> I guess you can change also change the value to TCG_MAX_INSNS, though I
> guess the compiler will realize about that.

I did wonder about the best thing to do re CF_COUNT_MASK.  Especially as it's
currently set to 0x7fff.  FWIW, the largest TB I've seen so far while
collecting statistics is 157 insns.  So the current setting of TCG_MAX_INSNS at
512 is more than enough.

> 
>> +if (max_insns > TCG_MAX_INSNS) {
>> +max_insns = TCG_MAX_INSNS;
>> +}
>>  
>>  if (in_superpage(&ctx, pc_start)) {
>>  pc_mask = (1ULL << 41) - 1;
> 
> Given we have the same pattern in all targets, I do wonder if it
> wouldn't be better to just setup (cflags & CF_COUNT_MASK) to
> TCG_MAX_INSNS instead of 0 in translate-all.c when not using icount.

Yes, that would probably be best.

There should probably be some helper function that handles all these as well as
noticing single-stepping.  Too many targets test

   (num_insns >= max_insns || singlestep || ...)

when we could just as well set max_insns to 1 and have just the one runtime
test.  Then there's all the targets which have a fixed insn size, where we can
pre-compute the number of insns left on the page, and fold in the end-of-page
test as well.

I'll put cleaning this up on the to-do list.

r~

> 
> That said your code is correct, so:
> 
> Reviewed-by: Aurelien Jarno 
>

[Qemu-devel] [PULL 5/5] oslib-win32: only provide localtime_r/gmtime_r if missing

2015-09-24 Thread Stefan Weil

From: "Daniel P. Berrange" 

The oslib-win32 file currently provides a localtime_r and
gmtime_r replacement unconditionally. Some versions of
Mingw-w64 would provide crude macros for localtime_r/gmtime_r
which QEMU takes care to disable. Latest versions of Mingw-w64
now provide actual functions for localtime_r/gmtime_r, but
with a twist that you have to include unistd.h or pthread.h
before including time.h.  By luck some files in QEMU have
such an include order, resulting in compile errors:

  CCutil/osdep.o
In file included from include/qemu-common.h:48:0,
 from util/osdep.c:48:
include/sysemu/os-win32.h:77:12: error: redundant redeclaration of 'gmtime_r' 
[-Werror=redundant-decls]
 struct tm *gmtime_r(const time_t *timep, struct tm *result);
^
In file included from include/qemu-common.h:35:0,
 from util/osdep.c:48:
/usr/i686-w64-mingw32/sys-root/mingw/include/time.h:272:107: note: previous 
definition of 'gmtime_r' was here
In file included from include/qemu-common.h:48:0,
 from util/osdep.c:48:
include/sysemu/os-win32.h:79:12: error: redundant redeclaration of 
'localtime_r' [-Werror=redundant-decls]
 struct tm *localtime_r(const time_t *timep, struct tm *result);
^
In file included from include/qemu-common.h:35:0,
 from util/osdep.c:48:
/usr/i686-w64-mingw32/sys-root/mingw/include/time.h:269:107: note: previous 
definition of 'localtime_r' was here

This change adds a configure test to see if localtime_r
exits, and only enables the QEMU impl if missing. We also
re-arrange qemu-common.h try attempt to guarantee that all
source files get unistd.h before time.h and thus see the
localtime_r/gmtime_r defs.

[sw: Use "official" spellings for Mingw-w64, MinGW in comments.]
[sw: Terminate sentences with a dot in comments.]

Signed-off-by: Daniel P. Berrange 
Reviewed-by: Denis V. Lunev 
Signed-off-by: Stefan Weil 
---
 configure | 34 ++
 include/qemu/osdep.h  |  4 +++-
 include/sysemu/os-win32.h |  2 ++
 util/oslib-win32.c|  2 ++
 4 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/configure b/configure
index 29009ee..f14454e 100755
--- a/configure
+++ b/configure
@@ -1737,6 +1737,37 @@ else
 fi
 
 ##
+# MinGW / Mingw-w64 localtime_r/gmtime_r check
+
+if test "$mingw32" = "yes"; then
+# Some versions of MinGW / Mingw-w64 lack localtime_r
+# and gmtime_r entirely.
+#
+# Some versions of Mingw-w64 define a macro for
+# localtime_r/gmtime_r.
+#
+# Some versions of Mingw-w64 will define functions
+# for localtime_r/gmtime_r, but only if you have
+# _POSIX_THREAD_SAFE_FUNCTIONS defined. For fun
+# though, unistd.h and pthread.h both define
+# that for you.
+#
+# So this #undef localtime_r and #include 
+# are not in fact redundant.
+cat > $TMPC << EOF
+#include 
+#include 
+#undef localtime_r
+int main(void) { localtime_r(NULL, NULL); return 0; }
+EOF
+if compile_prog "" "" ; then
+localtime_r="yes"
+else
+localtime_r="no"
+fi
+fi
+
+##
 # pkg-config probe
 
 if ! has "$pkg_config_exe"; then
@@ -5034,6 +5065,9 @@ fi
 if test "$zero_malloc" = "yes" ; then
   echo "CONFIG_ZERO_MALLOC=y" >> $config_host_mak
 fi
+if test "$localtime_r" = "yes" ; then
+  echo "CONFIG_LOCALTIME_R=y" >> $config_host_mak
+fi
 if test "$qom_cast_debug" = "yes" ; then
   echo "CONFIG_QOM_CAST_DEBUG=y" >> $config_host_mak
 fi
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index ab3c876..ef21efb 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -38,10 +38,12 @@
 #include 
 #include 
 #include 
+/* Put unistd.h before time.h as that triggers localtime_r/gmtime_r
+ * function availability on recentish Mingw-w64 platforms. */
+#include 
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h
index 706d85a..13dcef6 100644
--- a/include/sysemu/os-win32.h
+++ b/include/sysemu/os-win32.h
@@ -73,10 +73,12 @@
 #define siglongjmp(env, val) longjmp(env, val)
 
 /* Missing POSIX functions. Don't use MinGW-w64 macros. */
+#ifndef CONFIG_LOCALTIME_R
 #undef gmtime_r
 struct tm *gmtime_r(const time_t *timep, struct tm *result);
 #undef localtime_r
 struct tm *localtime_r(const time_t *timep, struct tm *result);
+#endif /* CONFIG_LOCALTIME_R */
 
 
 static inline void os_setup_signal_handling(void) {}
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index 730a670..08f5a9c 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -95,6 +95,7 @@ void qemu_anon_ram_free(void *ptr, size_t size)
 }
 }
 
+#ifndef CONFIG_LOCALTIME_R
 /* FIXME: add proper locking */
 struct tm *gmtime_r(const time_t *timep, struct tm *result)
 {
@@ -118,6 +119,7 @@ struct tm *localtime_r(const time_t *timep, struct tm 
*result)
 }
 ret

[Qemu-devel] [PULL 4/5] gtk: avoid redefining _WIN32_WINNT macro

2015-09-24 Thread Stefan Weil

From: "Daniel P. Berrange" 

When building for Mingw64 target on Fedora 22 a warning
is issued about _WIN32_WINNT being redefined.

In file included from ui/gtk.c:40:0:
include/ui/gtk.h:5:0: warning: "_WIN32_WINNT" redefined
 # define _WIN32_WINNT 0x0601 /* needed to get definition of MAPVK_VK_TO_VSC */
  ^
In file included from 
/usr/i686-w64-mingw32/sys-root/mingw/include/crtdefs.h:10:0,
 from /usr/i686-w64-mingw32/sys-root/mingw/include/stdio.h:9,
 from /home/berrange/src/virt/qemu/include/qemu/fprintf-fn.h:12,
 from /home/berrange/src/virt/qemu/include/qemu-common.h:18,
 from ui/gtk.c:37:
/usr/i686-w64-mingw32/sys-root/mingw/include/_mingw.h:225:0: note: this is the 
location of the previous definition
 #define _WIN32_WINNT 0x502
 ^

Rather than try to get MAPVK_VK_TO_VSC defined indirectly
by defining _WIN32_WINNT, instead just define it explicitly
if missing.

Signed-off-by: Daniel P. Berrange 
Signed-off-by: Stefan Weil 
Acked-by: Paolo Bonzini 
---
 include/ui/gtk.h | 4 
 ui/gtk.c | 9 +
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/include/ui/gtk.h b/include/ui/gtk.h
index ee6dffd..0359333 100644
--- a/include/ui/gtk.h
+++ b/include/ui/gtk.h
@@ -1,10 +1,6 @@
 #ifndef UI_GTK_H
 #define UI_GTK_H
 
-#ifdef _WIN32
-# define _WIN32_WINNT 0x0601 /* needed to get definition of MAPVK_VK_TO_VSC */
-#endif
-
 #ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
 /* Work around an -Wstrict-prototypes warning in GTK headers */
 #pragma GCC diagnostic push
diff --git a/ui/gtk.c b/ui/gtk.c
index 187de74..3057cdc 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -104,6 +104,15 @@
 #define GDK_KEY_Pause GDK_Pause
 #endif
 
+/* Some older mingw versions lack this constant or have
+ * it conditionally defined */
+#ifdef _WIN32
+# ifndef MAPVK_VK_TO_VSC
+#  define MAPVK_VK_TO_VSC 0
+# endif
+#endif
+
+
 #define HOTKEY_MODIFIERS(GDK_CONTROL_MASK | GDK_MOD1_MASK)
 
 static const int modifier_keycode[] = {
-- 
2.1.4

[Qemu-devel] [PULL 3/5] qemu-thread: add a fast path to the Win32 QemuEvent

2015-09-24 Thread Stefan Weil

From: Paolo Bonzini 

QemuEvents are used heavily by call_rcu.  We do not want them to be slow,
but the current implementation does a kernel call on every invocation
of qemu_event_* and won't cut it.

So, wrap a Win32 manual-reset event with a fast userspace path.  The
states and transitions are the same as for the futex and mutex/condvar
implementations, but the slow path is different of course.  The idea
is to reset the Win32 event lazily, as part of a test-reset-test-wait
sequence.  Such a sequence is, indeed, how QemuEvents are used by
RCU and other subsystems!

The patch includes a formal model of the algorithm.

Tested-by: Stefan Weil 
Signed-off-by: Paolo Bonzini 
Signed-off-by: Stefan Weil 
---
 docs/win32-qemu-event.promela | 98 +++
 include/qemu/thread-win32.h   |  1 +
 util/qemu-thread-win32.c  | 66 +++--
 3 files changed, 161 insertions(+), 4 deletions(-)
 create mode 100644 docs/win32-qemu-event.promela

diff --git a/docs/win32-qemu-event.promela b/docs/win32-qemu-event.promela
new file mode 100644
index 000..c446a71
--- /dev/null
+++ b/docs/win32-qemu-event.promela
@@ -0,0 +1,98 @@
+/*
+ * This model describes the implementation of QemuEvent in
+ * util/qemu-thread-win32.c.
+ *
+ * Author: Paolo Bonzini 
+ *
+ * This file is in the public domain.  If you really want a license,
+ * the WTFPL will do.
+ *
+ * To verify it:
+ * spin -a docs/event.promela
+ * gcc -O2 pan.c -DSAFETY
+ * ./a.out
+ */
+
+bool event;
+int value;
+
+/* Primitives for a Win32 event */
+#define RAW_RESET event = false
+#define RAW_SET   event = true
+#define RAW_WAIT  do :: event -> break; od
+
+#if 0
+/* Basic sanity checking: test the Win32 event primitives */
+#define RESET RAW_RESET
+#define SET   RAW_SET
+#define WAIT  RAW_WAIT
+#else
+/* Full model: layer a userspace-only fast path on top of the RAW_*
+ * primitives.  SET/RESET/WAIT have exactly the same semantics as
+ * RAW_SET/RAW_RESET/RAW_WAIT, but try to avoid invoking them.
+ */
+#define EV_SET 0
+#define EV_FREE 1
+#define EV_BUSY -1
+
+int state = EV_FREE;
+
+int xchg_result;
+#define SET   if :: state != EV_SET ->  \
+atomic { /* xchg_result=xchg(state, EV_SET) */  \
+xchg_result = state;\
+state = EV_SET; \
+}   \
+if :: xchg_result == EV_BUSY -> RAW_SET;\
+   :: else -> skip; \
+fi; \
+ :: else -> skip;   \
+  fi
+
+#define RESET if :: state == EV_SET -> atomic { state = state | EV_FREE; }  \
+ :: else-> skip;\
+  fi
+
+int tmp1, tmp2;
+#define WAIT  tmp1 = state; \
+  if :: tmp1 != EV_SET ->   \
+if :: tmp1 == EV_FREE ->\
+  RAW_RESET;\
+  atomic { /* tmp2=cas(state, EV_FREE, EV_BUSY) */  \
+  tmp2 = state; \
+  if :: tmp2 == EV_FREE -> state = EV_BUSY; \
+ :: else-> skip;\
+  fi;   \
+  } \
+  if :: tmp2 == EV_SET -> tmp1 = EV_SET;\
+ :: else   -> tmp1 = EV_BUSY;   \
+  fi;   \
+   :: else -> skip; \
+fi; \
+assert(tmp1 != EV_FREE);\
+if :: tmp1 == EV_BUSY -> RAW_WAIT;  \
+   :: else -> skip; \
+fi; \
+ :: else -> skip;   \
+  fi
+#endif
+
+active proctype waiter()
+{
+ if
+ :: !value ->
+ RESET;
+ if
+ :: !value -> WAIT;
+ :: else   -> skip;
+ fi;
+:: else -> skip;
+fi;
+assert(value);
+}
+
+active proctype notifier()
+{
+value = true;
+S

[Qemu-devel] [PULL 2/5] slirp: Fix non blocking connect for w32

2015-09-24 Thread Stefan Weil

Signed-off-by: Stefan Weil 
---
 slirp/tcp_input.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/slirp/tcp_input.c b/slirp/tcp_input.c
index f946db8..00a77b4 100644
--- a/slirp/tcp_input.c
+++ b/slirp/tcp_input.c
@@ -584,7 +584,13 @@ findso:
goto cont_input;
  }
 
- if((tcp_fconnect(so) == -1) && (errno != EINPROGRESS) && (errno != 
EWOULDBLOCK)) {
+  if ((tcp_fconnect(so) == -1) &&
+#if defined(_WIN32)
+  socket_error() != WSAEWOULDBLOCK
+#else
+  (errno != EINPROGRESS) && (errno != EWOULDBLOCK)
+#endif
+  ) {
u_char code=ICMP_UNREACH_NET;
DEBUG_MISC((dfd, " tcp fconnect errno = %d-%s\n",
errno,strerror(errno)));
-- 
2.1.4

[Qemu-devel] [PULL 1/5] nsis: Add QEMU version information to Windows registry

2015-09-24 Thread Stefan Weil

The uninstall keys include an option key "DisplayVersion" which we set
now. By default the version value is read from file VERSION, but it is
also possible to pass VERSION=#.#.# to make.

Signed-off-by: Stefan Weil 
---
 Makefile | 1 +
 qemu.nsi | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/Makefile b/Makefile
index 9e75362..68e2e1b 100644
--- a/Makefile
+++ b/Makefile
@@ -623,6 +623,7 @@ endif # SIGNCODE
 $(if $(DLL_PATH),-DDLLDIR="$(DLL_PATH)") \
 -DSRCDIR="$(SRC_PATH)" \
 -DOUTFILE="$(INSTALLER)" \
+-DDISPLAYVERSION="$(VERSION)" \
 $(SRC_PATH)/qemu.nsi
rm -r ${INSTDIR}
 ifdef SIGNCODE
diff --git a/qemu.nsi b/qemu.nsi
index cc5fafd..a20f6ef 100644
--- a/qemu.nsi
+++ b/qemu.nsi
@@ -139,6 +139,9 @@ Section "${PRODUCT} (required)"
 
 ; Write the uninstall keys for Windows
 WriteRegStr HKLM "${UNINST_KEY}" "DisplayName" "QEMU"
+!ifdef DISPLAYVERSION
+WriteRegStr HKLM "${UNINST_KEY}" "DisplayVersion" "${DISPLAYVERSION}"
+!endif
 WriteRegStr HKLM "${UNINST_KEY}" "UninstallString" '"${UNINST_EXE}"'
 WriteRegDWORD HKLM "${UNINST_KEY}" "NoModify" 1
 WriteRegDWORD HKLM "${UNINST_KEY}" "NoRepair" 1
-- 
2.1.4

[Qemu-devel] [PULL 0/5] Patch queue for w32/w64

2015-09-24 Thread Stefan Weil

The following changes since commit 9438fe9e56760e5e5e11d6c7d12ed9c64a0c8446:

  Merge remote-tracking branch 'remotes/elmarco/tags/rm-libcacard' into staging 
(2015-09-24 17:04:31 +0100)

are available in the git repository at:

  git://qemu.weilnetz.de/qemu.git tags/pull-wxx-20150924

for you to fetch changes up to 4d9310f427b477a126f6f2006c3a73b9764948b6:

  oslib-win32: only provide localtime_r/gmtime_r if missing (2015-09-24 
21:13:49 +0200)


wxx patch queue

  - Faster Win32 QemuEvent
  - Fixes needed for latest Mingw-w64 build environment
  - Version information in QEMU installer for Windows


Daniel P. Berrange (2):
  gtk: avoid redefining _WIN32_WINNT macro
  oslib-win32: only provide localtime_r/gmtime_r if missing

Paolo Bonzini (1):
  qemu-thread: add a fast path to the Win32 QemuEvent

Stefan Weil (2):
  nsis: Add QEMU version information to Windows registry
  slirp: Fix non blocking connect for w32

 Makefile  |  1 +
 configure | 34 ++
 docs/win32-qemu-event.promela | 98 
++
 include/qemu/osdep.h  |  4 +++-
 include/qemu/thread-win32.h   |  1 +
 include/sysemu/os-win32.h |  2 ++
 include/ui/gtk.h  |  4 
 qemu.nsi  |  3 +++
 slirp/tcp_input.c |  8 +++-
 ui/gtk.c  |  9 +
 util/oslib-win32.c|  2 ++
 util/qemu-thread-win32.c  | 66 
++
 12 files changed, 222 insertions(+), 10 deletions(-)
 create mode 100644 docs/win32-qemu-event.promela

[PULL 1/5] nsis: Add QEMU version information to Windows registry
[PULL 2/5] slirp: Fix non blocking connect for w32
[PULL 3/5] qemu-thread: add a fast path to the Win32 QemuEvent
[PULL 4/5] gtk: avoid redefining _WIN32_WINNT macro
[PULL 5/5] oslib-win32: only provide localtime_r/gmtime_r if missing

Re: [Qemu-devel] [PATCH v3 19/25] tcg: Pass data argument to restore_state_to_opc

2015-09-24 Thread Aurelien Jarno

On 2015-09-22 13:25, Richard Henderson wrote:
> The gen_opc_* arrays are already redundant with the data stored in
> the insn_start arguments.  Transition restore_state_to_opc to use
> data from the latter.
> 
> Reviewed-by: Peter Maydell 
> Signed-off-by: Richard Henderson 
> ---
>  include/exec/exec-all.h   |  2 +-
>  target-alpha/translate.c  |  5 +++--
>  target-arm/translate.c|  9 +
>  target-cris/translate.c   |  5 +++--
>  target-i386/translate.c   | 26 ++
>  target-lm32/translate.c   |  5 +++--
>  target-m68k/translate.c   |  5 +++--
>  target-microblaze/translate.c |  5 +++--
>  target-mips/translate.c   |  9 +
>  target-moxie/translate.c  |  5 +++--
>  target-openrisc/translate.c   |  4 ++--
>  target-ppc/translate.c|  5 +++--
>  target-s390x/translate.c  |  8 
>  target-sh4/translate.c|  7 ---
>  target-sparc/translate.c  | 10 ++
>  target-tilegx/translate.c |  5 +++--
>  target-tricore/translate.c|  5 +++--
>  target-unicore32/translate.c  |  5 +++--
>  target-xtensa/translate.c |  5 +++--
>  tcg/tcg.c | 11 ++-
>  tcg/tcg.h |  2 ++
>  translate-all.c   |  2 +-
>  22 files changed, 79 insertions(+), 66 deletions(-)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net

[Qemu-devel] [PATCH v2 1/3] hw/vfio/platform: irqfd setup sequence update

2015-09-24 Thread Eric Auger

With current implementation, eventfd VFIO signaling is first set up and
then irqfd is setup, if supported and allowed.

This start sequence causes several issues with IRQ forwarding setup
which, if supported, is transparently attempted on irqfd setup:
IRQ forwarding setup is likely to fail if the IRQ is detected as under
injection into the guest (active at irqchip level or VFIO masked).

This currently always happens because the current sequence explicitly
VFIO-masks the IRQ before setting irqfd.

Even if that masking were removed, we couldn't prevent the case where
the IRQ is under injection into the guest.

So the simpler solution is to remove this 2-step startup and directly
attempt irqfd setup. This is what this patch does.

Also in case the eventfd setup fails, there is no reason to go farther:
let's abort.

Signed-off-by: Eric Auger 
---
 hw/vfio/platform.c | 51 +--
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index a6726cd..d864342 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -310,18 +310,29 @@ static void vfio_platform_eoi(VFIODevice *vbasedev)
 /**
  * vfio_start_eventfd_injection - starts the virtual IRQ injection using
  * user-side handled eventfds
- * @intp: the IRQ struct pointer
+ * @sbdev: the sysbus device handle
+ * @irq: the qemu irq handle
  */
 
-static int vfio_start_eventfd_injection(VFIOINTp *intp)
+static void vfio_start_eventfd_injection(SysBusDevice *sbdev, qemu_irq irq)
 {
 int ret;
+VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
+VFIOINTp *intp;
+
+QLIST_FOREACH(intp, &vdev->intp_list, next) {
+if (intp->qemuirq == irq) {
+break;
+}
+}
+assert(intp);
 
 ret = vfio_set_trigger_eventfd(intp, vfio_intp_interrupt);
 if (ret) {
-error_report("vfio: Error: Failed to pass IRQ fd to the driver: %m");
+error_report("vfio: failed to start eventfd signaling for IRQ %d: %m",
+ intp->pin);
+abort();
 }
-return ret;
 }
 
 /*
@@ -359,6 +370,15 @@ static int vfio_set_resample_eventfd(VFIOINTp *intp)
 return ret;
 }
 
+/**
+ * vfio_start_irqfd_injection - starts the virtual IRQ injection using
+ * irqfd
+ *
+ * @sbdev: the sysbus device handle
+ * @irq: the qemu irq handle
+ *
+ * In case the irqfd setup fails, we fallback to userspace handled eventfd
+ */
 static void vfio_start_irqfd_injection(SysBusDevice *sbdev, qemu_irq irq)
 {
 VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
@@ -366,7 +386,7 @@ static void vfio_start_irqfd_injection(SysBusDevice *sbdev, 
qemu_irq irq)
 
 if (!kvm_irqfds_enabled() || !kvm_resamplefds_enabled() ||
 !vdev->irqfd_allowed) {
-return;
+goto fail_irqfd;
 }
 
 QLIST_FOREACH(intp, &vdev->intp_list, next) {
@@ -376,13 +396,6 @@ static void vfio_start_irqfd_injection(SysBusDevice 
*sbdev, qemu_irq irq)
 }
 assert(intp);
 
-/* Get to a known interrupt state */
-qemu_set_fd_handler(event_notifier_get_fd(&intp->interrupt),
-NULL, NULL, vdev);
-
-vfio_mask_single_irqindex(&vdev->vbasedev, intp->pin);
-qemu_set_irq(intp->qemuirq, 0);
-
 if (kvm_irqchip_add_irqfd_notifier(kvm_state, &intp->interrupt,
&intp->unmask, irq) < 0) {
 goto fail_irqfd;
@@ -395,9 +408,6 @@ static void vfio_start_irqfd_injection(SysBusDevice *sbdev, 
qemu_irq irq)
 goto fail_vfio;
 }
 
-/* Let's resume injection with irqfd setup */
-vfio_unmask_single_irqindex(&vdev->vbasedev, intp->pin);
-
 intp->kvm_accel = true;
 
 trace_vfio_platform_start_irqfd_injection(intp->pin,
@@ -406,9 +416,11 @@ static void vfio_start_irqfd_injection(SysBusDevice 
*sbdev, qemu_irq irq)
 return;
 fail_vfio:
 kvm_irqchip_remove_irqfd_notifier(kvm_state, &intp->interrupt, irq);
+error_report("vfio: failed to start eventfd signaling for IRQ %d: %m",
+ intp->pin);
+abort();
 fail_irqfd:
-vfio_start_eventfd_injection(intp);
-vfio_unmask_single_irqindex(&vdev->vbasedev, intp->pin);
+vfio_start_eventfd_injection(sbdev, irq);
 return;
 }
 
@@ -646,7 +658,6 @@ static void vfio_platform_realize(DeviceState *dev, Error 
**errp)
 VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev);
 SysBusDevice *sbdev = SYS_BUS_DEVICE(dev);
 VFIODevice *vbasedev = &vdev->vbasedev;
-VFIOINTp *intp;
 int i, ret;
 
 vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM;
@@ -665,10 +676,6 @@ static void vfio_platform_realize(DeviceState *dev, Error 
**errp)
 vfio_map_region(vdev, i);
 sysbus_init_mmio(sbdev, &vdev->regions[i]->mem);
 }
-
-QLIST_FOREACH(intp, &vdev->intp_list, next) {
-vfio_start_eventfd_injection(intp);
-}
 }
 
 static const VMStateDescription vfio_platform_vmstate = {
-- 
1.8.3.2

[Qemu-devel] [PATCH v2 3/3] hw/vfio/platform: do not set resamplefd for edge-sensitive IRQS

2015-09-24 Thread Eric Auger

In irqfd mode, current code attempts to set a resamplefd whatever
the type of the IRQ. For an edge-sensitive IRQ this attempt fails
and as a consequence, the whole irqfd setup fails and we fall back
to the slow mode. This patch bypasses the resamplefd setting for
non level-sentive IRQs.

Signed-off-by: Eric Auger 

---

v1 -> v2:
- introduce vfio_irq_is_automasked helper function. in case of
  edge-sensitive IRQ, do not allocate/initialize unmask EventNotifier
  nor call vfio_set_resample_eventfd
---
 hw/vfio/platform.c | 42 +++---
 trace-events   |  4 +++-
 2 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index cab1517..5c1156c 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -32,6 +32,11 @@
  * Functions used whatever the injection method
  */
 
+static inline bool vfio_irq_is_automasked(VFIOINTp *intp)
+{
+return intp->flags & VFIO_IRQ_INFO_AUTOMASKED;
+}
+
 /**
  * vfio_init_intp - allocate, initialize the IRQ struct pointer
  * and add it into the list of IRQs
@@ -65,15 +70,17 @@ static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev,
 error_report("vfio: Error: trigger event_notifier_init failed ");
 return NULL;
 }
-/* Get an eventfd for resample/unmask */
-intp->unmask = g_malloc0(sizeof(EventNotifier));
-ret = event_notifier_init(intp->unmask, 0);
-if (ret) {
-g_free(intp->interrupt);
-g_free(intp->unmask);
-g_free(intp);
-error_report("vfio: Error: resamplefd event_notifier_init failed");
-return NULL;
+if (vfio_irq_is_automasked(intp)) {
+/* Get an eventfd for resample/unmask */
+intp->unmask = g_malloc0(sizeof(EventNotifier));
+ret = event_notifier_init(intp->unmask, 0);
+if (ret) {
+g_free(intp->interrupt);
+g_free(intp->unmask);
+g_free(intp);
+error_report("vfio: Error: resamplefd event_notifier_init failed");
+return NULL;
+}
 }
 
 QLIST_INSERT_HEAD(&vdev->intp_list, intp, next);
@@ -294,7 +301,7 @@ static void vfio_platform_eoi(VFIODevice *vbasedev)
 /* deassert the virtual IRQ */
 qemu_set_irq(intp->qemuirq, 0);
 
-if (intp->flags & VFIO_IRQ_INFO_AUTOMASKED) {
+if (vfio_irq_is_automasked(intp)) {
 /* unmasks the physical level-sensitive IRQ */
 vfio_unmask_single_irqindex(vbasedev, intp->pin);
 }
@@ -409,15 +416,20 @@ static void vfio_start_irqfd_injection(SysBusDevice 
*sbdev, qemu_irq irq)
 if (vfio_set_trigger_eventfd(intp, NULL) < 0) {
 goto fail_vfio;
 }
-if (vfio_set_resample_eventfd(intp) < 0) {
-goto fail_vfio;
+if (vfio_irq_is_automasked(intp)) {
+if (vfio_set_resample_eventfd(intp) < 0) {
+goto fail_vfio;
+}
+trace_vfio_platform_start_level_irqfd_injection(intp->pin,
+event_notifier_get_fd(intp->interrupt),
+event_notifier_get_fd(intp->unmask));
+} else {
+trace_vfio_platform_start_edge_irqfd_injection(intp->pin,
+event_notifier_get_fd(intp->interrupt));
 }
 
 intp->kvm_accel = true;
 
-trace_vfio_platform_start_irqfd_injection(intp->pin,
- event_notifier_get_fd(intp->interrupt),
- event_notifier_get_fd(intp->unmask));
 return;
 fail_vfio:
 kvm_irqchip_remove_irqfd_notifier(kvm_state, intp->interrupt, irq);
diff --git a/trace-events b/trace-events
index 25c53e0..8f7829e 100644
--- a/trace-events
+++ b/trace-events
@@ -1621,7 +1621,9 @@ vfio_platform_intp_interrupt(int pin, int fd) "Inject IRQ 
#%d (fd = %d)"
 vfio_platform_intp_inject_pending_lockheld(int pin, int fd) "Inject pending 
IRQ #%d (fd = %d)"
 vfio_platform_populate_interrupts(int pin, int count, int flags) "- IRQ index 
%d: count %d, flags=0x%x"
 vfio_intp_interrupt_set_pending(int index) "irq %d is set PENDING"
-vfio_platform_start_irqfd_injection(int index, int fd, int resamplefd) "IRQ 
index=%d, fd = %d, resamplefd = %d"
+vfio_platform_start_level_irqfd_injection(int index, int fd, int resamplefd) 
"IRQ index=%d, fd = %d, resamplefd = %d"
+vfio_platform_start_edge_irqfd_injection(int index, int fd) "IRQ index=%d, fd 
= %d"
+
 
 #hw/acpi/memory_hotplug.c
 mhp_acpi_invalid_slot_selected(uint32_t slot) "0x%"PRIx32
-- 
1.8.3.2

[Qemu-devel] [PATCH v2 2/3] hw/vfio/platform: change interrupt/unmask fields into pointer

2015-09-24 Thread Eric Auger

unmask EventNotifier might not be initialized in case of edge
sensitive irq. Using EventNotifier pointers make life simpler to
handle the edge-sensitive irqfd setup.

Signed-off-by: Eric Auger 
---
 hw/vfio/platform.c  | 35 ---
 include/hw/vfio/vfio-platform.h |  4 ++--
 2 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index d864342..cab1517 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -57,15 +57,20 @@ static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev,
 sysbus_init_irq(sbdev, &intp->qemuirq);
 
 /* Get an eventfd for trigger */
-ret = event_notifier_init(&intp->interrupt, 0);
+intp->interrupt = g_malloc0(sizeof(EventNotifier));
+ret = event_notifier_init(intp->interrupt, 0);
 if (ret) {
+g_free(intp->interrupt);
 g_free(intp);
 error_report("vfio: Error: trigger event_notifier_init failed ");
 return NULL;
 }
 /* Get an eventfd for resample/unmask */
-ret = event_notifier_init(&intp->unmask, 0);
+intp->unmask = g_malloc0(sizeof(EventNotifier));
+ret = event_notifier_init(intp->unmask, 0);
 if (ret) {
+g_free(intp->interrupt);
+g_free(intp->unmask);
 g_free(intp);
 error_report("vfio: Error: resamplefd event_notifier_init failed");
 return NULL;
@@ -100,7 +105,7 @@ static int vfio_set_trigger_eventfd(VFIOINTp *intp,
 irq_set->start = 0;
 irq_set->count = 1;
 pfd = (int32_t *)&irq_set->data;
-*pfd = event_notifier_get_fd(&intp->interrupt);
+*pfd = event_notifier_get_fd(intp->interrupt);
 qemu_set_fd_handler(*pfd, (IOHandler *)handler, NULL, intp);
 ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
 g_free(irq_set);
@@ -182,7 +187,7 @@ static void vfio_intp_mmap_enable(void *opaque)
 static void vfio_intp_inject_pending_lockheld(VFIOINTp *intp)
 {
 trace_vfio_platform_intp_inject_pending_lockheld(intp->pin,
-  event_notifier_get_fd(&intp->interrupt));
+  event_notifier_get_fd(intp->interrupt));
 
 intp->state = VFIO_IRQ_ACTIVE;
 
@@ -224,18 +229,18 @@ static void vfio_intp_interrupt(VFIOINTp *intp)
 trace_vfio_intp_interrupt_set_pending(intp->pin);
 QSIMPLEQ_INSERT_TAIL(&vdev->pending_intp_queue,
  intp, pqnext);
-ret = event_notifier_test_and_clear(&intp->interrupt);
+ret = event_notifier_test_and_clear(intp->interrupt);
 qemu_mutex_unlock(&vdev->intp_mutex);
 return;
 }
 
 trace_vfio_platform_intp_interrupt(intp->pin,
-  event_notifier_get_fd(&intp->interrupt));
+  event_notifier_get_fd(intp->interrupt));
 
-ret = event_notifier_test_and_clear(&intp->interrupt);
+ret = event_notifier_test_and_clear(intp->interrupt);
 if (!ret) {
 error_report("Error when clearing fd=%d (ret = %d)",
- event_notifier_get_fd(&intp->interrupt), ret);
+ event_notifier_get_fd(intp->interrupt), ret);
 }
 
 intp->state = VFIO_IRQ_ACTIVE;
@@ -283,7 +288,7 @@ static void vfio_platform_eoi(VFIODevice *vbasedev)
 QLIST_FOREACH(intp, &vdev->intp_list, next) {
 if (intp->state == VFIO_IRQ_ACTIVE) {
 trace_vfio_platform_eoi(intp->pin,
-event_notifier_get_fd(&intp->interrupt));
+event_notifier_get_fd(intp->interrupt));
 intp->state = VFIO_IRQ_INACTIVE;
 
 /* deassert the virtual IRQ */
@@ -360,7 +365,7 @@ static int vfio_set_resample_eventfd(VFIOINTp *intp)
 irq_set->start = 0;
 irq_set->count = 1;
 pfd = (int32_t *)&irq_set->data;
-*pfd = event_notifier_get_fd(&intp->unmask);
+*pfd = event_notifier_get_fd(intp->unmask);
 qemu_set_fd_handler(*pfd, NULL, NULL, NULL);
 ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
 g_free(irq_set);
@@ -396,8 +401,8 @@ static void vfio_start_irqfd_injection(SysBusDevice *sbdev, 
qemu_irq irq)
 }
 assert(intp);
 
-if (kvm_irqchip_add_irqfd_notifier(kvm_state, &intp->interrupt,
-   &intp->unmask, irq) < 0) {
+if (kvm_irqchip_add_irqfd_notifier(kvm_state, intp->interrupt,
+   intp->unmask, irq) < 0) {
 goto fail_irqfd;
 }
 
@@ -411,11 +416,11 @@ static void vfio_start_irqfd_injection(SysBusDevice 
*sbdev, qemu_irq irq)
 intp->kvm_accel = true;
 
 trace_vfio_platform_start_irqfd_injection(intp->pin,
- event_notifier_get_fd(&intp->interrupt),
- event_notifier_get_fd(&intp->unmask));
+ event_notifier_get_fd(intp->interrupt),
+ event_notifier_get_fd(intp->unmask));
 return;
 fail_

[Qemu-devel] [PATCH v2 0/3] hw/vfio/platform: irqfd setup changes

2015-09-24 Thread Eric Auger

This series fixes a bug related to irqfd setup for edge sensitive IRQs
and proposes a new startup sequence for irqfd signaling.

The current startup sequence brings some issues with respect to the
oncoming ARM IRQ forwarding support. The new startup sequence starts
either irqfd signaling or eventfd signaling and there is no risk the
IRQ is active nor masked when irqfd/IRQ forwarding is setup.

---

v1 -> v2:
- add "hw/vfio/platform: change interrupt/unmask fields into pointer"
- introduce vfio_irq_is_automasked helper function and for edge-sensitive
  IRQs, do not allocate/initialize unmask EventNotifier nor call
  vfio_set_resample_eventfd

Eric Auger (3):
  hw/vfio/platform: irqfd setup sequence update
  hw/vfio/platform: change interrupt/unmask fields into pointer
  hw/vfio/platform: do not set resamplefd for edge-sensitive IRQS

 hw/vfio/platform.c  | 116 
 include/hw/vfio/vfio-platform.h |   4 +-
 trace-events|   4 +-
 3 files changed, 75 insertions(+), 49 deletions(-)

-- 
1.8.3.2

Re: [Qemu-devel] [PATCH v3 18/25] tcg: Add TCG_MAX_INSNS

2015-09-24 Thread Aurelien Jarno

On 2015-09-22 13:25, Richard Henderson wrote:
> Adjust all translators to respect it.
> 
> Reviewed-by: Peter Maydell 
> Signed-off-by: Richard Henderson 
> ---
>  target-alpha/translate.c  |  3 +++
>  target-arm/translate-a64.c|  3 +++
>  target-arm/translate.c|  6 +-
>  target-cris/translate.c   |  3 +++
>  target-i386/translate.c   |  6 +-
>  target-lm32/translate.c   |  3 +++
>  target-m68k/translate.c   |  6 +-
>  target-microblaze/translate.c |  6 +-
>  target-mips/translate.c   |  7 ++-
>  target-moxie/translate.c  | 13 +++--
>  target-openrisc/translate.c   |  3 +++
>  target-ppc/translate.c|  6 +-
>  target-s390x/translate.c  |  3 +++
>  target-sh4/translate.c|  7 ++-
>  target-sparc/translate.c  |  7 ++-
>  target-tilegx/translate.c |  3 +++
>  target-tricore/translate.c| 20 +---
>  target-unicore32/translate.c  |  3 +++
>  target-xtensa/translate.c |  3 +++
>  tcg/tcg.h |  1 +
>  20 files changed, 95 insertions(+), 17 deletions(-)
> 
> diff --git a/target-alpha/translate.c b/target-alpha/translate.c
> index c10193e..538e202 100644
> --- a/target-alpha/translate.c
> +++ b/target-alpha/translate.c
> @@ -2903,6 +2903,9 @@ static inline void 
> gen_intermediate_code_internal(AlphaCPU *cpu,
>  if (max_insns == 0) {
>  max_insns = CF_COUNT_MASK;
>  }

I guess you can change also change the value to TCG_MAX_INSNS, though I
guess the compiler will realize about that.

> +if (max_insns > TCG_MAX_INSNS) {
> +max_insns = TCG_MAX_INSNS;
> +}
>  
>  if (in_superpage(&ctx, pc_start)) {
>  pc_mask = (1ULL << 41) - 1;

Given we have the same pattern in all targets, I do wonder if it
wouldn't be better to just setup (cflags & CF_COUNT_MASK) to
TCG_MAX_INSNS instead of 0 in translate-all.c when not using icount.

That said your code is correct, so:

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net

[Qemu-devel] [PULL v2] virtio,pc features, fixes

2015-09-24 Thread Michael S. Tsirkin

Same as v1 but 2 last patches dropped.
Not reposting since patches left are the same.

The following changes since commit fefa4b128de06cec6d513f00ee61e8208aed4a87:

  Merge remote-tracking branch 'remotes/awilliam/tags/vfio-update-20150923.0' 
into staging (2015-09-23 21:39:46 +0100)

are available in the git repository at:

  git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream

for you to fetch changes up to 2d0c513cb66b21d5a7c4692e2092af9e997fd251:

  MAINTAINERS: add more devices to the PCI section (2015-09-24 16:27:53 +0300)


virtio,pc features, fixes

New features:
vhost-user multiqueue support
virtio-ccw virtio 1 support
pci bridge support in iommu

Signed-off-by: Michael S. Tsirkin 


Changchun Ouyang (2):
  vhost-user: add multiple queue support
  vhost-user: add a new message to disable/enable a specific virt queue.

Cornelia Huck (4):
  virtio: ring sizes vs. reset
  virtio-ccw: support ring size changes
  virtio-ccw: feature bits > 31 handling
  virtio-ccw: enable virtio-1

Eduardo Habkost (3):
  q35: Move options common to all classes to pc_q35_machine_options()
  q35: Move options common to all classes to pc_i440fx_machine_options()
  pc: Introduce pc-*-2.5 machine classes

Jason Wang (1):
  virtio-net: unbreak self announcement and guest offloads after migration

Knut Omang (1):
  intel_iommu: Add support for translation for devices behind bridges

Michael S. Tsirkin (1):
  vhost-user: add protocol feature negotiation

Paolo Bonzini (2):
  MAINTAINERS: add more devices to the PC section
  MAINTAINERS: add more devices to the PCI section

Pierre Morel (1):
  virtio: right size for virtio_queue_get_avail_size

Yuanhan Liu (4):
  vhost-user: use VHOST_USER_XXX macro for switch statement
  vhost: rename VHOST_RESET_OWNER to VHOST_RESET_DEVICE
  vhost-user: add VHOST_USER_GET_QUEUE_NUM message
  vhost: introduce vhost_backend_get_vq_index method

 qapi-schema.json  |   6 +-
 hw/s390x/virtio-ccw.h |   6 +-
 include/hw/compat.h   |   3 +
 include/hw/i386/intel_iommu.h |  16 -
 include/hw/i386/pc.h  |   4 ++
 include/hw/virtio/vhost-backend.h |   4 ++
 include/hw/virtio/vhost.h |   2 +
 include/net/vhost_net.h   |   3 +
 linux-headers/linux/vhost.h   |   2 +-
 hw/i386/intel_iommu.c |  90 +++-
 hw/i386/pc_piix.c |  22 --
 hw/i386/pc_q35.c  |  22 --
 hw/net/vhost_net.c|  44 ++--
 hw/net/virtio-net.c   |  48 -
 hw/pci-host/q35.c |  25 +--
 hw/s390x/s390-virtio-ccw.c|  20 ++
 hw/s390x/virtio-ccw.c |  64 -
 hw/virtio/vhost-backend.c |  10 ++-
 hw/virtio/vhost-user.c| 139 +++--
 hw/virtio/vhost.c |  20 +++---
 hw/virtio/virtio.c|  66 +-
 net/vhost-user.c  | 141 +-
 tests/vhost-user-test.c   |   2 +-
 MAINTAINERS   |  21 ++
 docs/specs/vhost-user.txt |  77 -
 qemu-options.hx   |   5 +-
 26 files changed, 681 insertions(+), 181 deletions(-)

Re: [Qemu-devel] [PATCH v3 17/25] target-*: Drop cpu_gen_code define

2015-09-24 Thread Aurelien Jarno

On 2015-09-22 13:24, Richard Henderson wrote:
> This symbol no longer exists.
> 
> Reviewed-by: Peter Maydell 
> Signed-off-by: Richard Henderson 
> ---
>  target-alpha/cpu.h  | 1 -
>  target-arm/cpu.h| 1 -
>  target-cris/cpu.h   | 1 -
>  target-i386/cpu.h   | 1 -
>  target-lm32/cpu.h   | 1 -
>  target-m68k/cpu.h   | 1 -
>  target-microblaze/cpu.h | 1 -
>  target-mips/cpu.h   | 1 -
>  target-moxie/cpu.h  | 1 -
>  target-openrisc/cpu.h   | 1 -
>  target-ppc/cpu.h| 1 -
>  target-s390x/cpu.h  | 1 -
>  target-sh4/cpu.h| 1 -
>  target-sparc/cpu.h  | 1 -
>  target-tilegx/cpu.h | 1 -
>  target-xtensa/cpu.h | 1 -
>  16 files changed, 16 deletions(-)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net

Re: [Qemu-devel] [PATCH v3 16/25] tcg: Merge cpu_gen_code into tb_gen_code

2015-09-24 Thread Aurelien Jarno

On 2015-09-22 13:24, Richard Henderson wrote:
> As it's only caller, this tidies things a bit.
> 
> Reviewed-by: Peter Maydell 
> Signed-off-by: Richard Henderson 
> ---
>  include/exec/exec-all.h |   2 -
>  translate-all.c | 131 
> ++--
>  2 files changed, 59 insertions(+), 74 deletions(-)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net

[Qemu-devel] [PATCH] target-arm: Use common CPU cycle infrastructure

2015-09-24 Thread Christopher Covington

cpu_get_ticks() provides a common interface across targets for
calculating CPU cycles. Using this fixes PMCCNTR reads when -icount
is specified (previously a non-increasing value was returned).

Signed-off-by: Christopher Covington 
---
 target-arm/helper.c | 9 +++--
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/target-arm/helper.c b/target-arm/helper.c
index 7dc49cb..32923fb 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -729,8 +729,7 @@ void pmccntr_sync(CPUARMState *env)
 {
 uint64_t temp_ticks;
 
-temp_ticks = muldiv64(qemu_clock_get_us(QEMU_CLOCK_VIRTUAL),
-  get_ticks_per_sec(), 100);
+temp_ticks = cpu_get_ticks();
 
 if (env->cp15.c9_pmcr & PMCRD) {
 /* Increment once every 64 processor clock cycles */
@@ -768,8 +767,7 @@ static uint64_t pmccntr_read(CPUARMState *env, const 
ARMCPRegInfo *ri)
 return env->cp15.c15_ccnt;
 }
 
-total_ticks = muldiv64(qemu_clock_get_us(QEMU_CLOCK_VIRTUAL),
-   get_ticks_per_sec(), 100);
+total_ticks = cpu_get_ticks();
 
 if (env->cp15.c9_pmcr & PMCRD) {
 /* Increment once every 64 processor clock cycles */
@@ -789,8 +787,7 @@ static void pmccntr_write(CPUARMState *env, const 
ARMCPRegInfo *ri,
 return;
 }
 
-total_ticks = muldiv64(qemu_clock_get_us(QEMU_CLOCK_VIRTUAL),
-   get_ticks_per_sec(), 100);
+total_ticks = cpu_get_ticks();
 
 if (env->cp15.c9_pmcr & PMCRD) {
 /* Increment once every 64 processor clock cycles */
-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

Re: [Qemu-devel] [PATCH v3 15/25] target-sparc: Add npc state to insn_start

2015-09-24 Thread Aurelien Jarno

On 2015-09-22 13:24, Richard Henderson wrote:
> Reviewed-by: Peter Maydell 
> Signed-off-by: Richard Henderson 
> ---
>  target-sparc/cpu.h   | 1 +
>  target-sparc/translate.c | 7 ++-
>  2 files changed, 7 insertions(+), 1 deletion(-)

Reviewed-by: Aurelien Jarno  

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net

Re: [Qemu-devel] [PATCH v3 12/25] target-sparc: Tidy gen_branch_a interface

2015-09-24 Thread Aurelien Jarno

On 2015-09-22 13:24, Richard Henderson wrote:
> We always pass pc2 == dc->npc and r_cond == cpu_cond,
> and always set is_br afterward.  Infer all of that.
> 
> Reviewed-by: Peter Maydell 
> Signed-off-by: Richard Henderson 
> ---
>  target-sparc/translate.c | 21 ++---
>  1 file changed, 10 insertions(+), 11 deletions(-)
> 

Reviewed-by: Aurelien Jarno 
-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net

Re: [Qemu-devel] [PATCH v3 14/25] target-sparc: Remove gen_opc_jump_pc

2015-09-24 Thread Aurelien Jarno

On 2015-09-22 13:24, Richard Henderson wrote:
> Since jump_pc[1] is always npc + 4, we can infer after incrementing
> that jump_pc[1] == pc + 4.  Because of that, we can encode the branch
> destination into a single word, and store that in npc.
> 
> Reviewed-by: Peter Maydell 
> Signed-off-by: Richard Henderson 
> ---
>  target-sparc/translate.c | 19 ++-
>  1 file changed, 10 insertions(+), 9 deletions(-)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net

Re: [Qemu-devel] [PATCH v3 13/25] target-sparc: Split out gen_branch_n

2015-09-24 Thread Aurelien Jarno

On 2015-09-22 13:24, Richard Henderson wrote:
> Unify three copies of this code from different
> branch types.  Fix the case when npc == DYNAMIC_PC,
> i.e. a branch within a delay slot.
> 
> Reviewed-by: Peter Maydell 
> Signed-off-by: Richard Henderson 
> ---
>  target-sparc/translate.c | 55 
> 
>  1 file changed, 28 insertions(+), 27 deletions(-)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net

Re: [Qemu-devel] [PATCH v3 6/7] qdev: Protect device-list-properties against broken devices

2015-09-24 Thread Eduardo Habkost

On Thu, Sep 24, 2015 at 08:57:21PM +0200, Markus Armbruster wrote:
[...]
> diff --git a/hw/arm/allwinner-a10.c b/hw/arm/allwinner-a10.c
> index ff249af..7692090 100644
> --- a/hw/arm/allwinner-a10.c
> +++ b/hw/arm/allwinner-a10.c
> @@ -103,6 +103,8 @@ static void aw_a10_class_init(ObjectClass *oc, void *data)
>  DeviceClass *dc = DEVICE_CLASS(oc);
>  
>  dc->realize = aw_a10_realize;
> +/* Reason: creates a CPU, thus use after free(), see cpu_class_init() */
> +dc->cannot_even_create_with_object_new_yet = true;

The comments at aw_a10_class_init(), digic_class_init(),
fsl_imx25_class_init(), fsl_imx31_class_init(), and
xlnx_zynqmp_class_init() are now outdated, as cpu_class_init() doesn't
set cannot_even_create_with_object_new_yet anymore.

We could do this:
* Update the comments to "Reason: creates an ARM CPU, thus use after
  free(), see arm_cpu_class_init()"
* Add a note at arm_cpu_class_init() saying that we can probably
  unset cannot_even_create_with_object_new_yet in those functions
  once we fix TYPE_ARM_CPU

-- 
Eduardo

Re: [Qemu-devel] [PATCH v2 REPOST] oslib-win32: only provide localtime_r/gmtime_r if missing

2015-09-24 Thread Stefan Weil

Am 23.09.2015 um 10:33 schrieb Daniel P. Berrange:
> On Tue, Sep 22, 2015 at 07:49:40PM +0200, Stefan Weil wrote:
>> Hi,
>>
>> I suggest cleaning some comments, mostly using the "official"
>> spellings for MinGW and Mingw-w64.

[...]

>> Otherwise this patch looks good.
>>
>> If you agree, I'd clean the comments before I add
>> the patch to my patch queue for Windows
>> (git://qemu.weilnetz.de/qemu.git wxx).
> 
> Yes, I'm fine with you applying it to your queue and adding the fixes
> mentioned.
> 
> Regards,
> Daniel

Thanks. Your patch with the fixes is now applied to
git://qemu.weilnetz.de/qemu.git wxx.

Stefan

Re: [Qemu-devel] [PATCH RFC V4 4/4] Add virt-v3 machine that uses GIC-500

2015-09-24 Thread Christopher Covington

On 09/24/2015 02:03 PM, Christopher Covington wrote:
> Hi,
> 
> On 09/17/2015 01:38 PM, Shlomo Pongratz wrote:
>> From: Pavel Fedin 
>>
>> I would like to offer this, slightly improved implementation. The key thing 
>> is a new
>> kernel_irqchip_type member in Machine class. Currently it it used only by 
>> virt machine for
>> its internal purposes, however in future it is to be passed to KVM in
>> kvm_irqchip_create(). The variable is defined as int in order to be 
>> architecture agnostic,
>> for potential future users.
>>
>> Signed-off-by: Pavel Fedin 
>> ---
>>  hw/arm/virt.c | 72 
>> +--
>>  include/hw/arm/fdt.h  |  2 ++
>>  include/hw/arm/virt.h |  1 +
>>  target-arm/machine.c  |  7 -
>>  4 files changed, 73 insertions(+), 9 deletions(-)
>>
>> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
>> index 4d15309..4c2ae7f 100644
>> --- a/hw/arm/virt.c
>> +++ b/hw/arm/virt.c
> 
>> @@ -445,6 +462,14 @@ static void create_gic(VirtBoardInfo *vbi, qemu_irq 
>> *pic, int type, bool secure)
>>  sysbus_mmio_map(gicbusdev, 1, vbi->memmap[VIRT_GIC_CPU].base);
>>  }
>>  
>> +if (type == 3) {
>> +/* Connect GIC to CPU */
>> +for (i = 0; i < smp_cpus; i++) {
>> +CPUState *cpu = qemu_get_cpu(i);
>> +aatch64_registers_with_opaque_set(OBJECT(cpu), (void *)gicdev);
> 
> Typo--should be "aarch64".
> 
> With that, feel free to add the following if it's any use:
> 
> Tested-by: Christopher Covington 

I originally tested building only for aarch64-softmmu, but I've now noticed a
build issue with arm-softmmu.

Christopher Covington

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

[Qemu-devel] [Bug 1350435] Re: tcg.c:1693: tcg fatal error

2015-09-24 Thread Serge Hallyn

Ah, thanks for setting me straight.


** Changed in: qemu (Ubuntu)
   Status: Invalid => Confirmed

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1350435

Title:
  tcg.c:1693: tcg fatal error

Status in launchpad-buildd:
  Triaged
Status in QEMU:
  Confirmed
Status in qemu package in Ubuntu:
  Confirmed

Bug description:
  this started happening after the launchpad buildd trusty deploy
  
https://code.launchpad.net/~costamagnagianfranco/+archive/ubuntu/firefox/+build/6224439

  
  debconf-updatepo
  qemu: uncaught target signal 11 (Segmentation fault) - core dumped
  Segmentation fault (core dumped)
  qemu: uncaught target signal 11 (Segmentation fault) - core dumped
  Segmentation fault (core dumped)
  /build/buildd/qemu-2.0.0+dfsg/tcg/tcg.c:1693: tcg fatal error
  /build/buildd/qemu-2.0.0+dfsg/tcg/tcg.c:1693: tcg fatal error

  this seems to be the patch needed
  https://patches.linaro.org/32473/

To manage notifications about this bug go to:
https://bugs.launchpad.net/launchpad-buildd/+bug/1350435/+subscriptions

[Qemu-devel] [PATCH v3 7/7] Revert "qdev: Use qdev_get_device_class() for -device , help"

2015-09-24 Thread Markus Armbruster

This reverts commit 31bed5509dfcbdfc293154ce81086a4dbd7a80b6.

The reverted commit changed qdev_device_help() to reject abstract
devices and devices that have cannot_instantiate_with_device_add_yet
set, to fix crash bugs like -device x86_64-cpu,help.

Rejecting abstract devices makes sense: they're purely internal, and
the implementation of the help feature can't cope with them.

Rejecting non-pluggable devices makes less sense: even though you
can't use them with -device, the help may still be useful elsewhere,
for instance with -global.  This is a regression: -device FOO,help
used to help even for FOO that aren't pluggable.

The previous two commits fixed the crash bug at a lower layer, so
reverting this one is now safe.  Fixes the -device FOO,help
regression, except for the broken devices marked
cannot_even_create_with_object_new_yet.  For those, the error message
is improved.

Example of a device where the regression is fixed:

$ qemu-system-x86_64 -device PIIX4_PM,help
PIIX4_PM.command_serr_enable=bool (on/off)
PIIX4_PM.multifunction=bool (on/off)
PIIX4_PM.rombar=uint32
PIIX4_PM.romfile=str
PIIX4_PM.addr=int32 (Slot and optional function number, example: 06.0 or 06)
PIIX4_PM.memory-hotplug-support=bool
PIIX4_PM.acpi-pci-hotplug-with-bridge-support=bool
PIIX4_PM.s4_val=uint8
PIIX4_PM.disable_s4=uint8
PIIX4_PM.disable_s3=uint8
PIIX4_PM.smb_io_base=uint32

Example of a device where it isn't fixed:

$ qemu-system-x86_64 -device host-x86_64-cpu,help
Can't list properties of device 'host-x86_64-cpu'

Both failed with "Parameter 'driver' expects pluggable device type"
before.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Markus Armbruster 
Reviewed-by: Eric Blake 
Reviewed-by: Eduardo Habkost 
---
 qdev-monitor.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/qdev-monitor.c b/qdev-monitor.c
index eb7aef2..1cadefb 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -237,9 +237,12 @@ int qdev_device_help(QemuOpts *opts)
 return 0;
 }
 
-qdev_get_device_class(&driver, &local_err);
-if (local_err) {
-goto error;
+if (!object_class_by_name(driver)) {
+const char *typename = find_typename_by_alias(driver);
+
+if (typename) {
+driver = typename;
+}
 }
 
 prop_list = qmp_device_list_properties(driver, &local_err);
-- 
2.4.3

Re: [Qemu-devel] Loading image/elf to cpu that has different not system memory address space

2015-09-24 Thread mar.krzeminski




W dniu 24.09.2015 o 20:38, Peter Crosthwaite pisze:

On Thu, Sep 24, 2015 at 10:14 AM, mar.krzeminski
 wrote:


W dniu 24.09.2015 o 05:07, Peter Crosthwaite pisze:

On Wed, Sep 23, 2015 at 8:06 PM, Peter Crosthwaite
 wrote:

On Wed, Sep 23, 2015 at 10:31 AM, mar.krzeminski
 wrote:

W dniu 23.09.2015 o 17:46, Peter Maydell pisze:

On 23 September 2015 at 08:17, Marcin Krzemiński
 wrote:

Hello,

I am trying to write a model of embedded board that have corterx-m3 and
cotex a9 processors.
Because M3 see different memory at address 0x0 than A9 (m3 has small rom,
a9
has whole ram) I created different address space for m3 (thanks Peter
Crosthwaite! for hints how to do this!).
Now I stacked at loading "kernel" to start M3. If I use default address
space for M3 I can load I run my elf filr (it can be image, but currently
it
is easiest for me with elf) all works fine.
The problem is when I switch to my new (root MR is not from
get_system_memory() call ) i got execution outside RAM exception.
That is happening because there are only zeroes in memory pointed by my
second address space.
The question is how can I load image to this memory (it might be elf, but
binary image also is fine)?
I can not even find the code that loads data to memory in fist place.
Could
you point me where the loading is done in the code?

This is going to be complicated. I suspect you will need to add
some infrastructure for specifying per-CPU image loading (maybe
via CPU properties?), which we don't have at all right now.

(Our current image loading code for arm lives in hw/arm/boot.c.)

thanks
-- PMM

I couldn't find the place were actual data are put int M-, I don't know why
I haven't seen
rom_add_blob() in boot.c.
At the machine init level I know all MRs, so I'll use
memory_region_get_ram_ptr(),
and put data there.
If you have idea how to add this into framework, and someone beside me needs
this,
maybe I can implement that?

We definately need it. We need to be able to associate multiple
softwares with multiple CPUs.

This is known to work, and could be what you are looking for:

https://github.com/Xilinx/qemu/blob/pub/2015.2.plnx/hw/misc/blob-loader.c

You pass -device loader,cpu=#,...

then various other fields, all on the command line (depending if your
loading elfs or raw blobs). It is badly named, it is more than just a
blob loader now. It works best when you don't use -kernel (you may
need to hack your machine model to disable any checks that forces
-kernel).

The key feature of that device is it loads from the argument CPUs
perspective, so if your M3 CPU AR is correctly set it will load via it
when you use -device loader,cpu=1,foo.elf.

Other key feature, is the command line options is repeatable for
multiple blobs and multiple CPUs.

Regards,
Peter

The implementation is slightly bogus, it is using a global AS pointer
loader_as to pass the cpu AS to loader infrastucture. git grep that
tree (2015.2.plnx branch) for "loader_as" to see the needed changes to
core loader infrastructure and cherry pick the device and it should be
close to working.

HTH

Regards,
Peter

Thanks,
Marcin

Great functionality, I'll probably integrate it, but for fast checking if
all works I'll use also
global pointer ( generally it is used already in load.c).
As I looked into code it seem that it is possible to pass CPU state down to
loading functions,
so those can use AS connected with CPU. If someone is interested in that
patch I can try to prepare it...


I'll take a CC :).

Ok, so I'll try to implement this idea, hope I will work :)

Today I stacked on other interesting think - and I do not want to spam this
list - it is hack in cortex-m3
from armv7m.

 /* Hack to map an additional page of ram at the top of the address
space.  This stops qemu complaining about executing code outside RAM
when returning from an exception.  */
 memory_region_init_ram(hack, NULL, "armv7m.hack", 0x1000, &error_abort);
 vmstate_register_ram_global(hack);
 memory_region_add_subregion(system_memory, 0xf000, hack);

Why it is there, seem to be old...


I'm not sure. Alistair may know more.

But for your project, I would definitely avoid that ARMv7M code and
just take M3 as a CPU. Pull out any extra pieces you want from
armv7m.c as needed to build something from scratch. To support the
multi-as work that ARMv7M stuff would need an overhaul I think. It is
stylistically out of date and due for a rewrite.

Regards,
Peter
Generally I did that, I got from that file cpu init, nvic and added 
custom AS.
I needed to make small changes in nvic to stop it from using default 
system_memory ( it might be worth to send a patch I think...).
Then took me a while to understand why qemu crash while serving M3 
exception because I haven't took this hack :)
For now it seem that this all is working fine. Last not implemented 
think is this loading firmware to proper CPU.



Thanks,
Marcin

[Qemu-devel] [PATCH v3 1/7] tests: Fix how qom-test is run

2015-09-24 Thread Markus Armbruster

We want to run qom-test for every architecture, without having to
manually add it to every architecture's list of tests.  Commit 3687d53
accomplished this by adding it to every architecture's list
automatically.

However, some architectures inherit their tests from others, like this:

check-qtest-x86_64-y = $(check-qtest-i386-y)
check-qtest-microblazeel-y = $(check-qtest-microblaze-y)
check-qtest-xtensaeb-y = $(check-qtest-xtensa-y)

For such architectures, we ended up running the (slow!) test twice.
Commit 2b8419c attempted to avoid this by adding the test only when
it's not already present.  Works only as long as we consider adding
the test to the architectures on the left hand side *after* the ones
on the right hand side: x86_64 after i386, microblazeel after
microblaze, xtensaeb after xtensa.

Turns out we consider them in $(SYSEMU_TARGET_LIST) order.  Defined as

SYSEMU_TARGET_LIST := $(subst -softmmu.mak,,$(notdir \
   $(wildcard $(SRC_PATH)/default-configs/*-softmmu.mak)))

On my machine, this results in the oder xtensa, x86_64, microblazeel,
microblaze, i386.  Consequently, qom-test runs twice for microblazeel
and x86_64.

Replace this complex and flawed machinery with a much simpler one: add
generic tests (currently just qom-test) to check-qtest-generic-y
instead of check-qtest-$(target)-y for every target, then run
$(check-qtest-generic-y) for every target.

Signed-off-by: Markus Armbruster 
---
 tests/Makefile | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/tests/Makefile b/tests/Makefile
index 4063639..9380e14 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -86,6 +86,8 @@ check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh
 # All QTests for now are POSIX-only, but the dependencies are
 # really in libqtest, not in the testcases themselves.
 
+check-qtest-generic-y =
+
 gcov-files-ipack-y += hw/ipack/ipack.c
 check-qtest-ipack-y += tests/ipoctal232-test$(EXESUF)
 gcov-files-ipack-y += hw/char/ipoctal232.c
@@ -216,10 +218,7 @@ gcov-files-ppc64-y += ppc64-softmmu/hw/ppc/spapr_pci.c
 check-qtest-microblazeel-y = $(check-qtest-microblaze-y)
 check-qtest-xtensaeb-y = $(check-qtest-xtensa-y)
 
-# qom-test works for all sysemu architectures:
-$(foreach target,$(SYSEMU_TARGET_LIST), \
-   $(if $(findstring tests/qom-test$(EXESUF), 
$(check-qtest-$(target)-y)),, \
-   $(eval check-qtest-$(target)-y += tests/qom-test$(EXESUF
+check-qtest-generic-y += tests/qom-test$(EXESUF)
 
 check-qapi-schema-y := $(addprefix tests/qapi-schema/, \
comments.json empty.json enum-empty.json enum-missing-data.json \
@@ -446,8 +445,11 @@ CFLAGS += $(TEST_CFLAGS)
 
 TARGETS=$(patsubst %-softmmu,%, $(filter %-softmmu,$(TARGET_DIRS)))
 ifeq ($(CONFIG_POSIX),y)
-QTEST_TARGETS=$(foreach TARGET,$(TARGETS), $(if $(check-qtest-$(TARGET)-y), 
$(TARGET),))
+QTEST_TARGETS = $(TARGETS)
 check-qtest-y=$(foreach TARGET,$(TARGETS), $(check-qtest-$(TARGET)-y))
+check-qtest-y += $(check-qtest-generic-y)
+else
+QTEST_TARGETS =
 endif
 
 qtest-obj-y = tests/libqtest.o $(test-util-obj-y)
@@ -485,7 +487,7 @@ $(patsubst %, check-qtest-%, $(QTEST_TARGETS)): 
check-qtest-%: $(check-qtest-y)
$(call quiet-command,QTEST_QEMU_BINARY=$*-softmmu/qemu-system-$* \
QTEST_QEMU_IMG=qemu-img$(EXESUF) \
MALLOC_PERTURB_=$${MALLOC_PERTURB_:-$$((RANDOM % 255 + 1))} \
-   gtester $(GTESTER_OPTIONS) -m=$(SPEED) 
$(check-qtest-$*-y),"GTESTER $@")
+   gtester $(GTESTER_OPTIONS) -m=$(SPEED) $(check-qtest-$*-y) 
$(check-qtest-generic-y),"GTESTER $@")
$(if $(CONFIG_GCOV),@for f in $(gcov-files-$*-y); do \
  echo Gcov report for $$f:;\
  $(GCOV) $(GCOV_OPTIONS) $$f -o `dirname $$f`; \
-- 
2.4.3

[Qemu-devel] [PATCH v3 6/7] qdev: Protect device-list-properties against broken devices

2015-09-24 Thread Markus Armbruster

Several devices don't survive object_unref(object_new(T)): they crash
or hang during cleanup, or they leave dangling pointers behind.

This breaks at least device-list-properties, because
qmp_device_list_properties() needs to create a device to find its
properties.  Broken in commit f4eb32b "qmp: show QOM properties in
device-list-properties", v2.1.  Example reproducer:

$ qemu-system-aarch64 -nodefaults -display none -machine none -S -qmp stdio
{"QMP": {"version": {"qemu": {"micro": 50, "minor": 4, "major": 2}, 
"package": ""}, "capabilities": []}}
{ "execute": "qmp_capabilities" }
{"return": {}}
{ "execute": "device-list-properties", "arguments": { "typename": 
"pxa2xx-pcmcia" } }
qemu-system-aarch64: /home/armbru/work/qemu/memory.c:1307: 
memory_region_finalize: Assertion `((&mr->subregions)->tqh_first == ((void 
*)0))' failed.
Aborted (core dumped)
[Exit 134 (SIGABRT)]

Unfortunately, I can't fix the problems in these devices right now.
Instead, add DeviceClass member cannot_even_create_with_object_new_yet
to mark them:

* Crash during init (didn't debug, so I can't say why): "spapr-rng"

* Crash or hang during cleanup (didn't debug, so I can't say why):
  "pxa2xx-pcmcia", "realview_pci", "versatile_pci",
  "s390-sclp-event-facility", "sclp"

* Dangling pointers: most CPUs, plus "allwinner-a10", "digic",
  "fsl,imx25", "fsl,imx31", "xlnx,zynqmp", because they create such
  CPUs

* Assert kvm_enabled(): "host-x86_64-cpu", host-i386-cpu",
  "host-powerpc64-cpu", "host-embedded-powerpc-cpu",
  "host-powerpc-cpu" (the powerpc ones can't currently reach the
  assertion, because the CPUs are only registered when KVM is enabled,
  but the assertion is arguably in the wrong place all the same)

Make qmp_device_list_properties() fail cleanly when the device is so
marked.  This improves device-list-properties from "crashes or hangs"
to "fails".  Not a complete fix, just a better-than-nothing
work-around.  In the above reproducer, device-list-properties now
fails with "Can't list properties of device 'pxa2xx-pcmcia'".

This also protects -device FOO,help, which uses the same machinery
since commit ef52358 "qdev-monitor: include QOM properties in -device
FOO, help output", v2.2.  Example reproducer:

$ qemu-system-aarch64 -machine none -device pxa2xx-pcmcia,help

Before:

qemu-system-aarch64: .../memory.c:1307: memory_region_finalize: Assertion 
`((&mr->subregions)->tqh_first == ((void *)0))' failed.

After:

Can't list properties of device 'pxa2xx-pcmcia'

Cc: "Andreas Färber" 
Cc: Alexander Graf 
Cc: Alistair Francis 
Cc: Antony Pavlov 
Cc: Christian Borntraeger 
Cc: Cornelia Huck 
Cc: Eduardo Habkost 
Cc: Li Guang 
Cc: Paolo Bonzini 
Cc: Peter Crosthwaite 
Cc: Peter Maydell 
Cc: Richard Henderson 
Cc: qemu-...@nongnu.org
Cc: qemu-sta...@nongnu.org
Signed-off-by: Markus Armbruster 
---
 hw/arm/allwinner-a10.c |  2 ++
 hw/arm/digic.c |  2 ++
 hw/arm/fsl-imx25.c |  2 ++
 hw/arm/fsl-imx31.c |  2 ++
 hw/arm/xlnx-zynqmp.c   |  2 ++
 hw/pci-host/versatile.c| 11 +++
 hw/pcmcia/pxa2xx.c |  9 +
 hw/ppc/spapr_rng.c |  5 +
 hw/s390x/event-facility.c  |  3 +++
 hw/s390x/sclp.c|  3 +++
 include/hw/qdev-core.h | 13 +
 qmp.c  |  5 +
 target-alpha/cpu.c |  7 +++
 target-arm/cpu.c   |  7 +++
 target-cris/cpu.c  |  7 +++
 target-i386/cpu.c  |  8 
 target-lm32/cpu.c  |  7 +++
 target-m68k/cpu.c  |  7 +++
 target-microblaze/cpu.c|  6 ++
 target-mips/cpu.c  |  7 +++
 target-moxie/cpu.c |  7 +++
 target-openrisc/cpu.c  |  7 +++
 target-ppc/kvm.c   |  4 
 target-s390x/cpu.c |  7 +++
 target-sh4/cpu.c   |  7 +++
 target-sparc/cpu.c |  7 +++
 target-tilegx/cpu.c|  7 +++
 target-tricore/cpu.c   |  6 ++
 target-unicore32/cpu.c |  7 +++
 target-xtensa/cpu.c|  7 +++
 tests/device-introspect-test.c | 29 -
 31 files changed, 181 insertions(+), 29 deletions(-)

diff --git a/hw/arm/allwinner-a10.c b/hw/arm/allwinner-a10.c
index ff249af..7692090 100644
--- a/hw/arm/allwinner-a10.c
+++ b/hw/arm/allwinner-a10.c
@@ -103,6 +103,8 @@ static void aw_a10_class_init(ObjectClass *oc, void *data)
 DeviceClass *dc = DEVICE_CLASS(oc);
 
 dc->realize = aw_a10_realize;
+/* Reason: creates a CPU, thus use after free(), see cpu_class_init() */
+dc->cannot_even_create_with_object_new_yet = true;
 }
 
 static const TypeInfo aw_a10_type_info = {
diff --git a/hw/arm/digic.c b/hw/arm/digic.c
index ec8c330..3decef4 100644
--- a/hw/arm/digic.c
+++ b/hw/arm/digic.c
@@ -97,6 +97,8 @@ static void digic_class_init(ObjectClass *oc, void *data)

Re: [Qemu-devel] [PULL 20/22] virtio: introduce virtqueue_unmap_sg()

2015-09-24 Thread Michael S. Tsirkin

On Thu, Sep 24, 2015 at 04:21:02PM +0300, Michael S. Tsirkin wrote:
> From: Jason Wang 
> 
> Factor out sg unmapping logic. This will be reused by the patch that
> can discard descriptor.
> 
> Cc: Michael S. Tsirkin 
> Signed-off-by: Jason Wang 
> Reviewed-by: Michael S. Tsirkin 
> Signed-off-by: Michael S. Tsirkin 
> ---
>  hw/virtio/virtio.c | 15 ++-
>  1 file changed, 10 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index 7504f8b..d6a2bca 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -244,15 +244,12 @@ int virtio_queue_empty(VirtQueue *vq)
>  return vring_avail_idx(vq) == vq->last_avail_idx;
>  }
>  
> -void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
> -unsigned int len, unsigned int idx)
> +static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
> +   unsigned int len)
>  {
>  unsigned int offset;
>  int i;
>  
> -trace_virtqueue_fill(vq, elem, len, idx);
> -
> -offset = 0;
>  for (i = 0; i < elem->in_num; i++) {
>  size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
>  

This part causes an unitialized variable warning to appear.
I dropped this from the pull request.
Jason, could you look into this please?


> @@ -267,6 +264,14 @@ void virtqueue_fill(VirtQueue *vq, const 
> VirtQueueElement *elem,
>  cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
>elem->out_sg[i].iov_len,
>0, elem->out_sg[i].iov_len);
> +}
> +
> +void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
> +unsigned int len, unsigned int idx)
> +{
> +trace_virtqueue_fill(vq, elem, len, idx);
> +
> +virtqueue_unmap_sg(vq, elem, len);
>  
>  idx = (idx + vring_used_idx(vq)) % vq->vring.num;
>  
> -- 
> MST
>

[Qemu-devel] [PATCH v3 0/7] Fix device introspection regressions

2015-09-24 Thread Markus Armbruster

QMP command device-list-properties regressed in 2.1: it can crash or
leave dangling pointers behind.

-device FOO,help regressed in 2.2: it no longer works for
non-pluggable devices.  I tried to fix that some time ago[*], but my
fix failed review.  This is my second, more comprehensive try.

PATCH 1,2 are preliminaries, PATCH 3 adds tests to demonstrate the
bugs, PATCH 4-6 fix them to a degree (see PATCH 5 for limitations),
and PATCH 7 cleans up.

[*] [PATCH] qdev: Make -device FOO,help help again when FOO is not
pluggable
https://lists.gnu.org/archive/html/qemu-devel/2015-03/msg03459.html
Message-Id: <1426527232-15044-1-git-send-email-arm...@redhat.com>

v3:
* PATCH 6: Mark "tilegx-cpu" [Eduardo] and new "spapr-rng", clean up
  whitespace.

v2:
* PATCH 1: New, made from old PATCH 7 and relevant Makefile parts of
  old PATCH 3, with a much improved commit message [Andreas]
* PATCH 3: Fix hmp() [Eric]
* PATCH 4: Tweak commit message and comments [Eric]
* PATCH 6: Mark only the CPUs that are actually broken [Eduardo]

Markus Armbruster (7):
  tests: Fix how qom-test is run
  libqtest: Clean up unused QTestState member sigact_old
  libqtest: New hmp() & friends
  device-introspect-test: New, covering device introspection
  qmp: Fix device-list-properties not to crash for abstract device
  qdev: Protect device-list-properties against broken devices
  Revert "qdev: Use qdev_get_device_class() for -device ,help"

 hw/arm/allwinner-a10.c |   2 +
 hw/arm/digic.c |   2 +
 hw/arm/fsl-imx25.c |   2 +
 hw/arm/fsl-imx31.c |   2 +
 hw/arm/xlnx-zynqmp.c   |   2 +
 hw/pci-host/versatile.c|  11 
 hw/pcmcia/pxa2xx.c |   9 
 hw/ppc/spapr_rng.c |   5 ++
 hw/s390x/event-facility.c  |   3 ++
 hw/s390x/sclp.c|   3 ++
 include/hw/qdev-core.h |  13 +
 qdev-monitor.c |   9 ++--
 qmp.c  |  11 
 target-alpha/cpu.c |   7 +++
 target-arm/cpu.c   |   7 +++
 target-cris/cpu.c  |   7 +++
 target-i386/cpu.c  |   8 +++
 target-lm32/cpu.c  |   7 +++
 target-m68k/cpu.c  |   7 +++
 target-microblaze/cpu.c|   6 +++
 target-mips/cpu.c  |   7 +++
 target-moxie/cpu.c |   7 +++
 target-openrisc/cpu.c  |   7 +++
 target-ppc/kvm.c   |   4 ++
 target-s390x/cpu.c |   7 +++
 target-sh4/cpu.c   |   7 +++
 target-sparc/cpu.c |   7 +++
 target-tilegx/cpu.c|   7 +++
 target-tricore/cpu.c   |   6 +++
 target-unicore32/cpu.c |   7 +++
 target-xtensa/cpu.c|   7 +++
 tests/Makefile |  20 ---
 tests/device-introspect-test.c | 117 +
 tests/drive_del-test.c |  22 +++-
 tests/ide-test.c   |   8 +--
 tests/libqtest.c   |  38 -
 tests/libqtest.h   |  33 
 37 files changed, 400 insertions(+), 34 deletions(-)
 create mode 100644 tests/device-introspect-test.c

-- 
2.4.3

[Qemu-devel] [PATCH v3 4/7] device-introspect-test: New, covering device introspection

2015-09-24 Thread Markus Armbruster

The test doesn't check that the output makes any sense, only that QEMU
survives.  Useful since we've had an astounding number of crash bugs
around there.

In fact, we have a bunch of them right now: several devices crash or
hang, and all CPUs leave a dangling pointer behind.  The test skips
testing the broken parts.  The next commits will fix them, and drop
the skipping.

Signed-off-by: Markus Armbruster 
Reviewed-by: Eric Blake 
---
 tests/Makefile |   8 ++-
 tests/device-introspect-test.c | 153 +
 2 files changed, 158 insertions(+), 3 deletions(-)
 create mode 100644 tests/device-introspect-test.c

diff --git a/tests/Makefile b/tests/Makefile
index 9380e14..2bf7ba1 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -86,7 +86,8 @@ check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh
 # All QTests for now are POSIX-only, but the dependencies are
 # really in libqtest, not in the testcases themselves.
 
-check-qtest-generic-y =
+check-qtest-generic-y = tests/device-introspect-test$(EXESUF)
+gcov-files-generic-y = qdev-monitor.c qmp.c
 
 gcov-files-ipack-y += hw/ipack/ipack.c
 check-qtest-ipack-y += tests/ipoctal232-test$(EXESUF)
@@ -381,6 +382,7 @@ libqos-imx-obj-y = $(libqos-obj-y) tests/libqos/i2c-imx.o
 libqos-usb-obj-y = $(libqos-pc-obj-y) tests/libqos/usb.o
 libqos-virtio-obj-y = $(libqos-pc-obj-y) tests/libqos/virtio.o 
tests/libqos/virtio-pci.o tests/libqos/virtio-mmio.o 
tests/libqos/malloc-generic.o
 
+tests/device-introspect-test$(EXESUF): tests/device-introspect-test.o
 tests/rtc-test$(EXESUF): tests/rtc-test.o
 tests/m48t59-test$(EXESUF): tests/m48t59-test.o
 tests/endianness-test$(EXESUF): tests/endianness-test.o
@@ -488,7 +490,7 @@ $(patsubst %, check-qtest-%, $(QTEST_TARGETS)): 
check-qtest-%: $(check-qtest-y)
QTEST_QEMU_IMG=qemu-img$(EXESUF) \
MALLOC_PERTURB_=$${MALLOC_PERTURB_:-$$((RANDOM % 255 + 1))} \
gtester $(GTESTER_OPTIONS) -m=$(SPEED) $(check-qtest-$*-y) 
$(check-qtest-generic-y),"GTESTER $@")
-   $(if $(CONFIG_GCOV),@for f in $(gcov-files-$*-y); do \
+   $(if $(CONFIG_GCOV),@for f in $(gcov-files-$*-y) 
$(gcov-files-generic-y); do \
  echo Gcov report for $$f:;\
  $(GCOV) $(GCOV_OPTIONS) $$f -o `dirname $$f`; \
done,)
@@ -499,7 +501,7 @@ $(patsubst %, check-%, $(check-unit-y)): check-%: %
$(call quiet-command, \
MALLOC_PERTURB_=$${MALLOC_PERTURB_:-$$((RANDOM % 255 + 1))} \
gtester $(GTESTER_OPTIONS) -m=$(SPEED) $*,"GTESTER $*")
-   $(if $(CONFIG_GCOV),@for f in $(gcov-files-$(subst tests/,,$*)-y); do \
+   $(if $(CONFIG_GCOV),@for f in $(gcov-files-$(subst tests/,,$*)-y) 
$(gcov-files-generic-y); do \
  echo Gcov report for $$f:;\
  $(GCOV) $(GCOV_OPTIONS) $$f -o `dirname $$f`; \
done,)
diff --git a/tests/device-introspect-test.c b/tests/device-introspect-test.c
new file mode 100644
index 000..44da30e
--- /dev/null
+++ b/tests/device-introspect-test.c
@@ -0,0 +1,153 @@
+/*
+ * Device introspection test cases
+ *
+ * Copyright (c) 2015 Red Hat Inc.
+ *
+ * Authors:
+ *  Markus Armbruster ,
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+/*
+ * Covers QMP device-list-properties and HMP device_add help.  We
+ * currently don't check that their output makes sense, only that QEMU
+ * survives.  Useful since we've had an astounding number of crash
+ * bugs around here.
+ */
+
+#include 
+#include 
+#include "qemu-common.h"
+#include "qapi/qmp/qstring.h"
+#include "libqtest.h"
+
+const char common_args[] = "-nodefaults -machine none";
+
+static QList *device_type_list(bool abstract)
+{
+QDict *resp;
+QList *ret;
+
+resp = qmp("{'execute': 'qom-list-types',"
+   " 'arguments': {'implements': 'device', 'abstract': %i}}",
+   abstract);
+g_assert(qdict_haskey(resp, "return"));
+ret = qdict_get_qlist(resp, "return");
+QINCREF(ret);
+QDECREF(resp);
+return ret;
+}
+
+static void test_one_device(const char *type)
+{
+QDict *resp;
+char *help;
+
+/*
+ * Skip this part for the abstract device test case, because
+ * device-list-properties crashes for such devices.
+ * FIXME fix it not to crash
+ */
+if (strcmp(type, "device")) {
+resp = qmp("{'execute': 'device-list-properties',"
+   " 'arguments': {'typename': %s}}",
+   type);
+QDECREF(resp);
+}
+
+help = hmp("device_add \"%s,help\"", type);
+g_free(help);
+}
+
+static void test_device_intro_list(void)
+{
+QList *types;
+char *help;
+
+qtest_start(common_args);
+
+types = device_type_list(true);
+QDECREF(types);
+
+help = hmp("device_add help");
+g_free(help);
+
+qtest_end();
+}
+
+static void test_device_intro_none(void)
+{
+qtest_start(common_args);
+t

[Qemu-devel] [PATCH v3 5/7] qmp: Fix device-list-properties not to crash for abstract device

2015-09-24 Thread Markus Armbruster

Broken in commit f4eb32b "qmp: show QOM properties in
device-list-properties", v2.1.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Markus Armbruster 
Reviewed-by: Eric Blake 
---
 qmp.c  |  6 ++
 tests/device-introspect-test.c | 15 ---
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/qmp.c b/qmp.c
index 057a7cb..1413de4 100644
--- a/qmp.c
+++ b/qmp.c
@@ -515,6 +515,12 @@ DevicePropertyInfoList *qmp_device_list_properties(const 
char *typename,
 return NULL;
 }
 
+if (object_class_is_abstract(klass)) {
+error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "name",
+   "non-abstract device type");
+return NULL;
+}
+
 obj = object_new(typename);
 
 QTAILQ_FOREACH(prop, &obj->properties, node) {
diff --git a/tests/device-introspect-test.c b/tests/device-introspect-test.c
index 44da30e..6c7366f 100644
--- a/tests/device-introspect-test.c
+++ b/tests/device-introspect-test.c
@@ -45,17 +45,10 @@ static void test_one_device(const char *type)
 QDict *resp;
 char *help;
 
-/*
- * Skip this part for the abstract device test case, because
- * device-list-properties crashes for such devices.
- * FIXME fix it not to crash
- */
-if (strcmp(type, "device")) {
-resp = qmp("{'execute': 'device-list-properties',"
-   " 'arguments': {'typename': %s}}",
-   type);
-QDECREF(resp);
-}
+resp = qmp("{'execute': 'device-list-properties',"
+   " 'arguments': {'typename': %s}}",
+   type);
+QDECREF(resp);
 
 help = hmp("device_add \"%s,help\"", type);
 g_free(help);
-- 
2.4.3

[Qemu-devel] [PATCH v3 2/7] libqtest: Clean up unused QTestState member sigact_old

2015-09-24 Thread Markus Armbruster

Unused since commit d766825.

Signed-off-by: Markus Armbruster 
Reviewed-by: Eric Blake 
---
 tests/libqtest.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/libqtest.c b/tests/libqtest.c
index e5188e0..8dede56 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -46,7 +46,6 @@ struct QTestState
 bool irq_level[MAX_IRQ];
 GString *rx;
 pid_t qemu_pid;  /* our child QEMU process */
-struct sigaction sigact_old; /* restored on exit */
 };
 
 static GList *qtest_instances;
-- 
2.4.3

Re: [Qemu-devel] [PULL 00/22] virtio,pc features, fixes

2015-09-24 Thread Michael S. Tsirkin

On Thu, Sep 24, 2015 at 11:36:35AM -0700, Peter Maydell wrote:
> On 24 September 2015 at 06:20, Michael S. Tsirkin  wrote:
> > The following changes since commit fefa4b128de06cec6d513f00ee61e8208aed4a87:
> >
> >   Merge remote-tracking branch 
> > 'remotes/awilliam/tags/vfio-update-20150923.0' into staging (2015-09-23 
> > 21:39:46 +0100)
> >
> > are available in the git repository at:
> >
> >   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream
> >
> > for you to fetch changes up to 27fa11c68072398beb31af8828e25da5c2295814:
> >
> >   virtio-net: correctly drop truncated packets (2015-09-24 16:16:24 +0300)
> >
> > 
> > virtio,pc features, fixes
> >
> > New features:
> > vhost-user multiqueue support
> > virtio-ccw virtio 1 support
> > pci bridge support in iommu
> >
> > Signed-off-by: Michael S. Tsirkin 
> 
> Fails to build:
> 
> /home/petmay01/linaro/qemu-for-merges/hw/virtio/virtio.c: In function
> ‘virtqueue_unmap_sg.isra.21’:
> /home/petmay01/linaro/qemu-for-merges/hw/virtio/virtio.c:260:16:
> error: ‘offset’ may be used uninitialized in this function
> [-Werror=maybe-uninitialized]
>  offset += size;
> ^
> 
> (both gcc and clang complain about this).
> 
> thanks
> -- PMM

Sorry about that, I will investiage.

I dropped this patch for now, since these are the last
to patches, it was easy with no need to rebase.

Could you please fetch from same tag?

-- 
MST

[Qemu-devel] [PATCH v3 3/7] libqtest: New hmp() & friends

2015-09-24 Thread Markus Armbruster

New convenience function hmp() to facilitate use of
human-monitor-command in tests.  Use it to simplify its existing uses.

To blend into existing libqtest code, also add qtest_hmpv() and
qtest_hmp().  That, and the egregiously verbose GTK-Doc comment format
make this patch look bigger than it is.

Signed-off-by: Markus Armbruster 
Reviewed-by: Eric Blake 
---
 tests/drive_del-test.c | 22 ++
 tests/ide-test.c   |  8 ++--
 tests/libqtest.c   | 37 +
 tests/libqtest.h   | 33 +
 4 files changed, 78 insertions(+), 22 deletions(-)

diff --git a/tests/drive_del-test.c b/tests/drive_del-test.c
index 8951f6f..3390946 100644
--- a/tests/drive_del-test.c
+++ b/tests/drive_del-test.c
@@ -16,28 +16,18 @@
 
 static void drive_add(void)
 {
-QDict *response;
+char *resp = hmp("drive_add 0 if=none,id=drive0");
 
-response = qmp("{'execute': 'human-monitor-command',"
-   " 'arguments': {"
-   "   'command-line': 'drive_add 0 if=none,id=drive0'"
-   "}}");
-g_assert(response);
-g_assert_cmpstr(qdict_get_try_str(response, "return"), ==, "OK\r\n");
-QDECREF(response);
+g_assert_cmpstr(resp, ==, "OK\r\n");
+g_free(resp);
 }
 
 static void drive_del(void)
 {
-QDict *response;
+char *resp = hmp("drive_del drive0");
 
-response = qmp("{'execute': 'human-monitor-command',"
-   " 'arguments': {"
-   "   'command-line': 'drive_del drive0'"
-   "}}");
-g_assert(response);
-g_assert_cmpstr(qdict_get_try_str(response, "return"), ==, "");
-QDECREF(response);
+g_assert_cmpstr(resp, ==, "");
+g_free(resp);
 }
 
 static void device_del(void)
diff --git a/tests/ide-test.c b/tests/ide-test.c
index 5594738..6173dfa 100644
--- a/tests/ide-test.c
+++ b/tests/ide-test.c
@@ -510,9 +510,7 @@ static void test_flush(void)
 tmp_path);
 
 /* Delay the completion of the flush request until we explicitly do it */
-qmp_discard_response("{'execute':'human-monitor-command', 'arguments': {"
- " 'command-line':"
- " 'qemu-io ide0-hd0 \"break flush_to_os A\"'} }");
+g_free(hmp("qemu-io ide0-hd0 \"break flush_to_os A\""));
 
 /* FLUSH CACHE command on device 0*/
 outb(IDE_BASE + reg_device, 0);
@@ -524,9 +522,7 @@ static void test_flush(void)
 assert_bit_clear(data, DF | ERR | DRQ);
 
 /* Complete the command */
-qmp_discard_response("{'execute':'human-monitor-command', 'arguments': {"
- " 'command-line':"
- " 'qemu-io ide0-hd0 \"resume A\"'} }");
+g_free(hmp("qemu-io ide0-hd0 \"resume A\""));
 
 /* Check registers */
 data = inb(IDE_BASE + reg_device);
diff --git a/tests/libqtest.c b/tests/libqtest.c
index 8dede56..2a396ba 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -483,6 +483,33 @@ void qtest_qmp_eventwait(QTestState *s, const char *event)
 }
 }
 
+char *qtest_hmpv(QTestState *s, const char *fmt, va_list ap)
+{
+char *cmd;
+QDict *resp;
+char *ret;
+
+cmd = g_strdup_vprintf(fmt, ap);
+resp = qtest_qmp(s, "{'execute': 'human-monitor-command',"
+ " 'arguments': {'command-line': %s}}",
+ cmd);
+ret = g_strdup(qdict_get_try_str(resp, "return"));
+g_assert(ret);
+QDECREF(resp);
+g_free(cmd);
+return ret;
+}
+
+char *qtest_hmp(QTestState *s, const char *fmt, ...)
+{
+va_list ap;
+char *ret;
+
+va_start(ap, fmt);
+ret = qtest_hmpv(s, fmt, ap);
+va_end(ap);
+return ret;
+}
 
 const char *qtest_get_arch(void)
 {
@@ -774,6 +801,16 @@ void qmp_discard_response(const char *fmt, ...)
 qtest_qmpv_discard_response(global_qtest, fmt, ap);
 va_end(ap);
 }
+char *hmp(const char *fmt, ...)
+{
+va_list ap;
+char *ret;
+
+va_start(ap, fmt);
+ret = qtest_hmpv(global_qtest, fmt, ap);
+va_end(ap);
+return ret;
+}
 
 bool qtest_big_endian(void)
 {
diff --git a/tests/libqtest.h b/tests/libqtest.h
index ec42031..b270f7b 100644
--- a/tests/libqtest.h
+++ b/tests/libqtest.h
@@ -120,6 +120,29 @@ QDict *qtest_qmp_receive(QTestState *s);
 void qtest_qmp_eventwait(QTestState *s, const char *event);
 
 /**
+ * qtest_hmpv:
+ * @s: #QTestState instance to operate on.
+ * @fmt...: HMP command to send to QEMU
+ *
+ * Send HMP command to QEMU via QMP's human-monitor-command.
+ *
+ * Returns: the command's output.
+ */
+char *qtest_hmp(QTestState *s, const char *fmt, ...);
+
+/**
+ * qtest_hmpv:
+ * @s: #QTestState instance to operate on.
+ * @fmt: HMP command to send to QEMU
+ * @ap: HMP command arguments
+ *
+ * Send HMP command to QEMU via QMP's human-monitor-command.
+ *
+ * Returns: the command's output.
+ */
+char *qtest_hmpv(QTestState *s, const char *fmt, va_list ap);
+
+/**
  * qtest_get_irq:
  * @s: #QTestState instance to operate on.

[Qemu-devel] [Bug 1498144] Re: Failure booting hurd with qemu-system-i386 on ARM

2015-09-24 Thread PeteVine

I've just tried again with the latest commits - hurd boots,  hooray!

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1498144

Title:
   Failure booting hurd with qemu-system-i386 on ARM

Status in QEMU:
  New

Bug description:
  Trying to boot debian-hurd-20150320.img ends with:

  qemu-system-i386: qemu-coroutine-lock.c:91: qemu_co_queue_restart_all:
  Assertion `qemu_in_coroutine()' failed.

  Program received signal SIGABRT, Aborted.
  __libc_do_syscall ()
  at ../ports/sysdeps/unix/sysv/linux/arm/libc-do-syscall.S:44
  44  ../ports/sysdeps/unix/sysv/linux/arm/libc-do-syscall.S: No such file 
or directory.
  (gdb) bt
  #0  __libc_do_syscall ()
  at ../ports/sysdeps/unix/sysv/linux/arm/libc-do-syscall.S:44
  #1  0xb6ef8f0e in __GI_raise (sig=sig@entry=6)
  at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
  #2  0xb6efb766 in __GI_abort () at abort.c:89
  #3  0xb6ef4150 in __assert_fail_base (
  fmt=0x1 ,
  assertion=0x7f89a234 "qemu_in_coroutine()", assertion@entry=0x0,
  file=0x7f89da58 "qemu-coroutine-lock.c", file@entry=0xb566 "\001",
  line=91, line@entry=3069931692,
  function=function@entry=0x7f89ab78 "qemu_co_queue_restart_all")
  at assert.c:92
  #4  0xb6ef41e6 in __GI___assert_fail (assertion=0x0, file=0xb566 "\001",
  line=3069931692, function=0x7f89ab78 "qemu_co_queue_restart_all")
  at assert.c:101
  #5  0x7f59a6b4 in ?? ()

  I was using the same setup as in Bug 893208 (i.e git checkout from
  2015-09-15, armv7 Odroid C1)

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1498144/+subscriptions

Re: [Qemu-devel] [PATCH] rocker: Use g_new() & friends where that makes obvious sense

2015-09-24 Thread Jiri Pirko

Thu, Sep 24, 2015 at 06:18:43PM CEST, arm...@redhat.com wrote:
>Michael, could you take this one through trivial?  Assuming Scott and
>Jiri don't mind, and with s/patchas/patch as/ in the commit message.

I don't mind :)

[Qemu-devel] [Bug 1350435] Re: tcg.c:1693: tcg fatal error

2015-09-24 Thread Michael Tokarev

Well.  This is definitely wrong.  It is a valid bug, but it needs quite
serious work to fix, which requires major refactoring of the tcg code.
Upstream is working on it, see http://wiki.qemu.org/Features/tcg-
multithread

** Changed in: qemu
   Status: Invalid => Confirmed

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1350435

Title:
  tcg.c:1693: tcg fatal error

Status in launchpad-buildd:
  Triaged
Status in QEMU:
  Confirmed
Status in qemu package in Ubuntu:
  Invalid

Bug description:
  this started happening after the launchpad buildd trusty deploy
  
https://code.launchpad.net/~costamagnagianfranco/+archive/ubuntu/firefox/+build/6224439

  
  debconf-updatepo
  qemu: uncaught target signal 11 (Segmentation fault) - core dumped
  Segmentation fault (core dumped)
  qemu: uncaught target signal 11 (Segmentation fault) - core dumped
  Segmentation fault (core dumped)
  /build/buildd/qemu-2.0.0+dfsg/tcg/tcg.c:1693: tcg fatal error
  /build/buildd/qemu-2.0.0+dfsg/tcg/tcg.c:1693: tcg fatal error

  this seems to be the patch needed
  https://patches.linaro.org/32473/

To manage notifications about this bug go to:
https://bugs.launchpad.net/launchpad-buildd/+bug/1350435/+subscriptions

Re: [Qemu-devel] Loading image/elf to cpu that has different not system memory address space

2015-09-24 Thread Peter Crosthwaite

On Thu, Sep 24, 2015 at 10:14 AM, mar.krzeminski
 wrote:
>
>
> W dniu 24.09.2015 o 05:07, Peter Crosthwaite pisze:
>
> On Wed, Sep 23, 2015 at 8:06 PM, Peter Crosthwaite
>  wrote:
>
> On Wed, Sep 23, 2015 at 10:31 AM, mar.krzeminski
>  wrote:
>
> W dniu 23.09.2015 o 17:46, Peter Maydell pisze:
>
> On 23 September 2015 at 08:17, Marcin Krzemiński
>  wrote:
>
> Hello,
>
> I am trying to write a model of embedded board that have corterx-m3 and
> cotex a9 processors.
> Because M3 see different memory at address 0x0 than A9 (m3 has small rom,
> a9
> has whole ram) I created different address space for m3 (thanks Peter
> Crosthwaite! for hints how to do this!).
> Now I stacked at loading "kernel" to start M3. If I use default address
> space for M3 I can load I run my elf filr (it can be image, but currently
> it
> is easiest for me with elf) all works fine.
> The problem is when I switch to my new (root MR is not from
> get_system_memory() call ) i got execution outside RAM exception.
> That is happening because there are only zeroes in memory pointed by my
> second address space.
> The question is how can I load image to this memory (it might be elf, but
> binary image also is fine)?
> I can not even find the code that loads data to memory in fist place.
> Could
> you point me where the loading is done in the code?
>
> This is going to be complicated. I suspect you will need to add
> some infrastructure for specifying per-CPU image loading (maybe
> via CPU properties?), which we don't have at all right now.
>
> (Our current image loading code for arm lives in hw/arm/boot.c.)
>
> thanks
> -- PMM
>
> I couldn't find the place were actual data are put int M-, I don't know why
> I haven't seen
> rom_add_blob() in boot.c.
> At the machine init level I know all MRs, so I'll use
> memory_region_get_ram_ptr(),
> and put data there.
> If you have idea how to add this into framework, and someone beside me needs
> this,
> maybe I can implement that?
>
> We definately need it. We need to be able to associate multiple
> softwares with multiple CPUs.
>
> This is known to work, and could be what you are looking for:
>
> https://github.com/Xilinx/qemu/blob/pub/2015.2.plnx/hw/misc/blob-loader.c
>
> You pass -device loader,cpu=#,...
>
> then various other fields, all on the command line (depending if your
> loading elfs or raw blobs). It is badly named, it is more than just a
> blob loader now. It works best when you don't use -kernel (you may
> need to hack your machine model to disable any checks that forces
> -kernel).
>
> The key feature of that device is it loads from the argument CPUs
> perspective, so if your M3 CPU AR is correctly set it will load via it
> when you use -device loader,cpu=1,foo.elf.
>
> Other key feature, is the command line options is repeatable for
> multiple blobs and multiple CPUs.
>
> Regards,
> Peter
>
> The implementation is slightly bogus, it is using a global AS pointer
> loader_as to pass the cpu AS to loader infrastucture. git grep that
> tree (2015.2.plnx branch) for "loader_as" to see the needed changes to
> core loader infrastructure and cherry pick the device and it should be
> close to working.
>
> HTH
>
> Regards,
> Peter
>
> Thanks,
> Marcin
>
> Great functionality, I'll probably integrate it, but for fast checking if
> all works I'll use also
> global pointer ( generally it is used already in load.c).
> As I looked into code it seem that it is possible to pass CPU state down to
> loading functions,
> so those can use AS connected with CPU. If someone is interested in that
> patch I can try to prepare it...
>

I'll take a CC :).

> Today I stacked on other interesting think - and I do not want to spam this
> list - it is hack in cortex-m3
> from armv7m.
>
> /* Hack to map an additional page of ram at the top of the address
>space.  This stops qemu complaining about executing code outside RAM
>when returning from an exception.  */
> memory_region_init_ram(hack, NULL, "armv7m.hack", 0x1000, &error_abort);
> vmstate_register_ram_global(hack);
> memory_region_add_subregion(system_memory, 0xf000, hack);
>
> Why it is there, seem to be old...
>

I'm not sure. Alistair may know more.

But for your project, I would definitely avoid that ARMv7M code and
just take M3 as a CPU. Pull out any extra pieces you want from
armv7m.c as needed to build something from scratch. To support the
multi-as work that ARMv7M stuff would need an overhaul I think. It is
stylistically out of date and due for a rewrite.

Regards,
Peter

> Thanks,
> Marcin

Re: [Qemu-devel] [PULL 00/22] virtio,pc features, fixes

2015-09-24 Thread Peter Maydell

On 24 September 2015 at 06:20, Michael S. Tsirkin  wrote:
> The following changes since commit fefa4b128de06cec6d513f00ee61e8208aed4a87:
>
>   Merge remote-tracking branch 'remotes/awilliam/tags/vfio-update-20150923.0' 
> into staging (2015-09-23 21:39:46 +0100)
>
> are available in the git repository at:
>
>   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream
>
> for you to fetch changes up to 27fa11c68072398beb31af8828e25da5c2295814:
>
>   virtio-net: correctly drop truncated packets (2015-09-24 16:16:24 +0300)
>
> 
> virtio,pc features, fixes
>
> New features:
> vhost-user multiqueue support
> virtio-ccw virtio 1 support
> pci bridge support in iommu
>
> Signed-off-by: Michael S. Tsirkin 

Fails to build:

/home/petmay01/linaro/qemu-for-merges/hw/virtio/virtio.c: In function
‘virtqueue_unmap_sg.isra.21’:
/home/petmay01/linaro/qemu-for-merges/hw/virtio/virtio.c:260:16:
error: ‘offset’ may be used uninitialized in this function
[-Werror=maybe-uninitialized]
 offset += size;
^

(both gcc and clang complain about this).

thanks
-- PMM

Re: [Qemu-devel] [PATCH v4] docs: describe the QEMU build system structure / design

2015-09-24 Thread Laszlo Ersek

On 09/24/15 15:41, Daniel P. Berrange wrote:
> Developers who are new to QEMU, or have a background familiarity
> with GNU autotools, can have trouble getting their head around the
> home-grown QEMU build system. This document attempts to explain
> the structure / design of the configure script and the various
> Makefile pieces that live across the source tree.
> 
> Signed-off-by: Daniel P. Berrange 
> ---
> Changed in v4:
> 
>  - One speling eror fix
>  - Listed new file in MAINTAINERS

Acked-by: Laszlo Ersek 

Thank you,
Laszlo

> 
> Changed in v3:
> 
>  - More speling eror fixes
>  - Rephrased more paragraphs as suggested
> 
> Changed in v2:
> 
>  - Misc speling eror fixes
>  - Rephrased some paragraphs as suggested
>  - Added note about config-host.h file generation & use
> 
>  MAINTAINERS   |   8 +
>  docs/build-system.txt | 507 
> ++
>  2 files changed, 515 insertions(+)
>  create mode 100644 docs/build-system.txt
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 71c652b..654fb3c 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1372,3 +1372,11 @@ M: Stefan Hajnoczi 
>  L: qemu-bl...@nongnu.org
>  S: Supported
>  F: tests/image-fuzzer/
> +
> +
> +Documentation
> +-
> +Build system architecture
> +M: Daniel P. Berrange 
> +S: Odd Fixes
> +F: docs/build-system.txt
> diff --git a/docs/build-system.txt b/docs/build-system.txt
> new file mode 100644
> index 000..5ea
> --- /dev/null
> +++ b/docs/build-system.txt
> @@ -0,0 +1,507 @@
> +The QEMU build system architecture
> +==
> +
> +This document aims to help developers understand the architecture of the
> +QEMU build system. As with projects using GNU autotools, the QEMU build
> +system has two stages, first the developer runs the "configure" script
> +to determine the local build environment characteristics, then they run
> +"make" to build the project. There is about where the similarities with
> +GNU autotools end, so try to forget what you know about them.
> +
> +
> +Stage 1: configure
> +==
> +
> +The QEMU configure script is written directly in shell, and should be
> +compatible with any POSIX shell, hence it uses #!/bin/sh. An important
> +implication of this is that it is important to avoid using bash-isms on
> +development platforms where bash is the primary host.
> +
> +In contrast to autoconf scripts, QEMU's configure is expected to be
> +silent while it is checking for features. It will only display output
> +when an error occurs, or to show the final feature enablement summary
> +on completion.
> +
> +Adding new checks to the configure script usually comprises the
> +following tasks:
> +
> + - Initialize one or more variables with the default feature state.
> +
> +   Ideally features should auto-detect whether they are present,
> +   so try to avoid hardcoding the initial state to either enabled
> +   or disabled, as that forces the user to pass a --enable-XXX
> +   / --disable-XXX flag on every invocation of configure.
> +
> + - Add support to the command line arg parser to handle any new
> +   --enable-XXX / --disable-XXX flags required by the feature XXX.
> +
> + - Add information to the help output message to report on the new
> +   feature flag.
> +
> + - Add code to perform the actual feature check. As noted above, try to
> +   be fully dynamic in checking enablement/disablement.
> +
> + - Add code to print out the feature status in the configure summary
> +   upon completion.
> +
> + - Add any new makefile variables to $config_host_mak on completion.
> +
> +
> +Taking (a simplified version of) the probe for gnutls from configure,
> +we have the following pieces:
> +
> +  # Initial variable state
> +  gnutls=""
> +
> +  ..snip..
> +
> +  # Configure flag processing
> +  --disable-gnutls) gnutls="no"
> +  ;;
> +  --enable-gnutls) gnutls="yes"
> +  ;;
> +
> +  ..snip..
> +
> +  # Help output feature message
> +  gnutls  GNUTLS cryptography support
> +
> +  ..snip..
> +
> +  # Test for gnutls
> +  if test "$gnutls" != "no"; then
> + if ! $pkg_config --exists "gnutls"; then
> +gnutls_cflags=`$pkg_config --cflags gnutls`
> +gnutls_libs=`$pkg_config --libs gnutls`
> +libs_softmmu="$gnutls_libs $libs_softmmu"
> +libs_tools="$gnutls_libs $libs_tools"
> +QEMU_CFLAGS="$QEMU_CFLAGS $gnutls_cflags"
> +gnutls="yes"
> + elif test "$gnutls" = "yes"; then
> +feature_not_found "gnutls" "Install gnutls devel"
> + else
> +gnutls="no"
> + fi
> +  fi
> +
> +  ..snip..
> +
> +  # Completion feature summary
> +  echo "GNUTLS support$gnutls"
> +
> +  ..snip..
> +
> +  # Define make variables
> +  if test "$gnutls" = "yes" ; then
> + echo "CONFIG_GNUTLS=y" >> $config_host_mak
> +  fi
> +
> +
> +Helper functions
> +
> +
> +The configure script provides a variety of helper functions to assist
> +developers in checking f

Re: [Qemu-devel] [PATCH 00/16] block: Get rid of bdrv_swap()

2015-09-24 Thread Alberto Garcia

On Thu 17 Sep 2015 03:48:04 PM CEST, Kevin Wolf wrote:

> bdrv_swap() has always been an ugly hack that we would rather have
> avoided.  When it was introduced, we simply didn't have the
> infrastructure to update pointers instead of transplanting the
> contents of BDS object, so we grudgingly added bdrv_swap() as a quick
> solution.  Meanwhile, most of the infrastructure exists and this
> series implements the final step necessary to implement the required
> functionality in a less adventurous way.

I managed to crash QEMU with this series. I don't think it's a problem
it the series itself, but rather some older bug that is triggered by
your patches.

I think I found the cause and I have a preliminary patch, I'll try to
send it tomorrow.

Berto

Re: [Qemu-devel] [PULL] remove libcacard

2015-09-24 Thread Peter Maydell

On 23 September 2015 at 14:38,   wrote:
> From: Marc-André Lureau 
>
> The following changes since commit 684bb5770ec5d72a66620f64fc5d9672bf8d3509:
>
>   Merge remote-tracking branch 'remotes/dgibson/tags/spapr-next-20150923' 
> into staging (2015-09-23 16:52:54 +0100)
>
> are available in the git repository at:
>
>   https://github.com/elmarco/qemu tags/rm-libcacard
>
> for you to fetch changes up to 7b02f5447c64d1854468f758398c9f6fe9e5721f:
>
>   libcacard: use the standalone project (2015-09-23 23:34:17 +0200)
>
> 
> Remove libcacard
>
> 
> Marc-André Lureau (1):
>   libcacard: use the standalone project

Applied, thanks.

-- PMM

Re: [Qemu-devel] [PATCH RFC V4 4/4] Add virt-v3 machine that uses GIC-500

2015-09-24 Thread Christopher Covington

Hi,

On 09/17/2015 01:38 PM, Shlomo Pongratz wrote:
> From: Pavel Fedin 
> 
> I would like to offer this, slightly improved implementation. The key thing 
> is a new
> kernel_irqchip_type member in Machine class. Currently it it used only by 
> virt machine for
> its internal purposes, however in future it is to be passed to KVM in
> kvm_irqchip_create(). The variable is defined as int in order to be 
> architecture agnostic,
> for potential future users.
> 
> Signed-off-by: Pavel Fedin 
> ---
>  hw/arm/virt.c | 72 
> +--
>  include/hw/arm/fdt.h  |  2 ++
>  include/hw/arm/virt.h |  1 +
>  target-arm/machine.c  |  7 -
>  4 files changed, 73 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 4d15309..4c2ae7f 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c

> @@ -445,6 +462,14 @@ static void create_gic(VirtBoardInfo *vbi, qemu_irq 
> *pic, int type, bool secure)
>  sysbus_mmio_map(gicbusdev, 1, vbi->memmap[VIRT_GIC_CPU].base);
>  }
>  
> +if (type == 3) {
> +/* Connect GIC to CPU */
> +for (i = 0; i < smp_cpus; i++) {
> +CPUState *cpu = qemu_get_cpu(i);
> +aatch64_registers_with_opaque_set(OBJECT(cpu), (void *)gicdev);

Typo--should be "aarch64".

With that, feel free to add the following if it's any use:

Tested-by: Christopher Covington 

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

[Qemu-devel] [PATCH] MAINTAINERS: Small IDE/FDC touchup

2015-09-24 Thread John Snow

libqos/ahci and tests/fdc-test are under my purview also,
include them in the appropriate stanzas.

Signed-off-by: John Snow 
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index cf02890..7d97bd8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -634,6 +634,7 @@ F: hw/block/cdrom.c
 F: hw/block/hd-geometry.c
 F: tests/ide-test.c
 F: tests/ahci-test.c
+F: tests/libqos/ahci*
 T: git git://github.com/jnsnow/qemu.git ide
 
 Floppy
@@ -642,6 +643,7 @@ L: qemu-bl...@nongnu.org
 S: Supported
 F: hw/block/fdc.c
 F: include/hw/block/fdc.h
+F: tests/fdc-test.c
 T: git git://github.com/jnsnow/qemu.git ide
 
 OMAP
-- 
2.4.3

Re: [Qemu-devel] [PATCH 2/2] hw/vfio/platform: do not set resamplefd for edge-sensitive IRQS

2015-09-24 Thread Eric Auger

On 09/23/2015 09:28 PM, Alex Williamson wrote:
> On Wed, 2015-09-23 at 00:00 +0100, Eric Auger wrote:
>> In irqfd mode, current code attempts to set a resamplefd whatever
>> the type of the IRQ. For an edge-sensitive IRQ this attempt fails
>> and as a consequence the whole irqfd setup fails and we fall back
>> to the slow mode. This patch bypasses the resamplefd setting for
>> non level-sentive IRQs.
>>
>> Signed-off-by: Eric Auger 
>> ---
>>  hw/vfio/platform.c | 4 
>>  1 file changed, 4 insertions(+)
>>
>> diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
>> index 38eaccf..2c91650 100644
>> --- a/hw/vfio/platform.c
>> +++ b/hw/vfio/platform.c
>> @@ -352,6 +352,10 @@ static int vfio_set_resample_eventfd(VFIOINTp *intp)
>>  int argsz, ret;
>>  int32_t *pfd;
>>  
>> +if (!(intp->flags & VFIO_IRQ_INFO_AUTOMASKED)) {
>> +return 0;
>> +}
>> +
>>  argsz = sizeof(*irq_set) + sizeof(*pfd);
>>  irq_set = g_malloc0(argsz);
>>  irq_set->argsz = argsz;
> 
> Aren't we also wasting a file descriptor for intp->unmask on edge IRQs
> as well?  I'd be tempted to make a helper function that does this test
> and use it both to avoid calling this in vfio_start_irqfd_injection()
> and also to avoid creating the unused eventfd in vfio_init_intp().
> Thanks,

Makes sense

Thanks

Eric
> 
> Alex
>

Re: [Qemu-devel] [PATCH 5/7] memory: Allow replay of IOMMU mapping notifications

2015-09-24 Thread Alex Williamson

On Thu, 2015-09-24 at 14:33 +1000, David Gibson wrote:
> When we have guest visible IOMMUs, we allow notifiers to be registered
> which will be informed of all changes to IOMMU mappings.  This is used by
> vfio to keep the host IOMMU mappings in sync with guest IOMMU mappings.
> 
> However, unlike with a memory region listener, an iommu notifier won't be
> told about any mappings which already exist in the (guest) IOMMU at the
> time it is registered.  This can cause problems if hotplugging a VFIO
> device onto a guest bus which had existing guest IOMMU mappings, but didn't
> previously have an VFIO devices (and hence no host IOMMU mappings).
> 
> This adds a memory_region_register_iommu_notifier_replay() function to
> handle this case.  As well as registering the new notifier it replays
> existing mappings.  Because the IOMMU memory region doesn't internally
> remember the granularity of the guest IOMMU it has a small hack where the
> caller must specify a granularity at which to replay mappings.
> 
> If there are finer mappings in the guest IOMMU these will be reported in
> the iotlb structures passed to the notifier which it must handle (probably
> causing it to flag an error).  This isn't new - the VFIO iommu notifier
> must already handle notifications about guest IOMMU mappings too short
> for it to represent in the host IOMMU.
> 
> Signed-off-by: David Gibson 
> ---
>  include/exec/memory.h | 17 +
>  memory.c  | 18 ++
>  2 files changed, 35 insertions(+)
> 
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 5baaf48..304f985 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -583,6 +583,23 @@ void memory_region_notify_iommu(MemoryRegion *mr,
>  void memory_region_register_iommu_notifier(MemoryRegion *mr, Notifier *n);
>  
>  /**
> + * memory_region_register_iommu_notifier_replay: register a notifier
> + * for changes to IOMMU translation entries, and replay existing IOMMU
> + * translations to the new notifier.
> + *
> + * @mr: the memory region to observe
> + * @n: the notifier to be added; the notifier receives a pointer to an
> + * #IOMMUTLBEntry as the opaque value; the pointer ceases to be
> + * valid on exit from the notifier.
> + * @granularity: Minimum page granularity to replay notifications for
> + * @is_write: Whether to treat the replay as a translate "write"
> + * through the iommu
> + */
> +void memory_region_register_iommu_notifier_replay(MemoryRegion *mr, Notifier 
> *n,
> +  hwaddr granularity,
> +  bool is_write);
> +
> +/**
>   * memory_region_unregister_iommu_notifier: unregister a notifier for
>   * changes to IOMMU translation entries.
>   *
> diff --git a/memory.c b/memory.c
> index ef87363..b4b6861 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -1403,6 +1403,24 @@ void 
> memory_region_register_iommu_notifier(MemoryRegion *mr, Notifier *n)
>  notifier_list_add(&mr->iommu_notify, n);
>  }
>  
> +void memory_region_register_iommu_notifier_replay(MemoryRegion *mr, Notifier 
> *n,
> +  hwaddr granularity,
> +  bool is_write)
> +{
> +hwaddr addr;
> +IOMMUTLBEntry iotlb;
> +
> +memory_region_register_iommu_notifier(mr, n);
> +
> +for (addr = 0; addr < memory_region_size(mr); addr += granularity) {
> +
> +iotlb = mr->iommu_ops->translate(mr, addr, is_write);
> +if (iotlb.perm != IOMMU_NONE) {
> +n->notify(n, &iotlb);
> +}
> +}
> +}
> +


When memory_listener_register() replays mappings, it does so on an rcu
copy of the flatview for each AddressSpace.  Here we don't seem to have
anything protecting against concurrency... do we need to worry about
that?

>  void memory_region_unregister_iommu_notifier(Notifier *n)
>  {
>  notifier_remove(n);

Re: [Qemu-devel] [PATCH 3/7] vfio: Check guest IOVA ranges against host IOMMU capabilities

2015-09-24 Thread Alex Williamson

On Thu, 2015-09-24 at 14:33 +1000, David Gibson wrote:
> The current vfio core code assumes that the host IOMMU is capable of
> mapping any IOVA the guest wants to use to where we need.  However, real
> IOMMUs generally only support translating a certain range of IOVAs (the
> "DMA window") not a full 64-bit address space.
> 
> The common x86 IOMMUs support a wide enough range that guests are very
> unlikely to go beyond it in practice, however the IOMMU used on IBM Power
> machines - in the default configuration - supports only a much more limited
> IOVA range, usually 0..2GiB.
> 
> If the guest attempts to set up an IOVA range that the host IOMMU can't
> map, qemu won't report an error until it actually attempts to map a bad
> IOVA.  If guest RAM is being mapped directly into the IOMMU (i.e. no guest
> visible IOMMU) then this will show up very quickly.  If there is a guest
> visible IOMMU, however, the problem might not show up until much later when
> the guest actually attempt to DMA with an IOVA the host can't handle.
> 
> This patch adds a test so that we will detect earlier if the guest is
> attempting to use IOVA ranges that the host IOMMU won't be able to deal
> with.
> 
> For now, we assume that "Type1" (x86) IOMMUs can support any IOVA, this is
> incorrect, but no worse than what we have already.  We can't do better for
> now because the Type1 kernel interface doesn't tell us what IOVA range the
> IOMMU actually supports.
> 
> For the Power "sPAPR TCE" IOMMU, however, we can retrieve the supported
> IOVA range and validate guest IOVA ranges against it, and this patch does
> so.
> 
> Signed-off-by: David Gibson 
> Reviewed-by: Laurent Vivier 
> ---
>  hw/vfio/common.c  | 40 +---
>  include/hw/vfio/vfio-common.h |  6 ++
>  2 files changed, 43 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 95a4850..f90cc75 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -343,14 +343,22 @@ static void vfio_listener_region_add(MemoryListener 
> *listener,
>  if (int128_ge(int128_make64(iova), llend)) {
>  return;
>  }
> +end = int128_get64(llend);
> +
> +if ((iova < container->min_iova) || ((end - 1) > container->max_iova)) {
> +error_report("vfio: IOMMU container %p can't map guest IOVA region"
> + " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx,
> + container, iova, end - 1);
> +ret = -EFAULT; /* FIXME: better choice here? */

"Bad address" makes sense to me.  This looks like an RFC comment, can we
remove it?

> +goto fail;
> +}
>  
>  memory_region_ref(section->mr);
>  
>  if (memory_region_is_iommu(section->mr)) {
>  VFIOGuestIOMMU *giommu;
>  
> -trace_vfio_listener_region_add_iommu(iova,
> -int128_get64(int128_sub(llend, int128_one(;
> +trace_vfio_listener_region_add_iommu(iova, end - 1);
>  /*
>   * FIXME: We should do some checking to see if the
>   * capabilities of the host VFIO IOMMU are adequate to model
> @@ -387,7 +395,6 @@ static void vfio_listener_region_add(MemoryListener 
> *listener,
>  
>  /* Here we assume that memory_region_is_ram(section->mr)==true */
>  
> -end = int128_get64(llend);
>  vaddr = memory_region_get_ram_ptr(section->mr) +
>  section->offset_within_region +
>  (iova - section->offset_within_address_space);
> @@ -685,7 +692,19 @@ static int vfio_connect_container(VFIOGroup *group, 
> AddressSpace *as)
>  ret = -errno;
>  goto free_container_exit;
>  }
> +
> +/*
> + * FIXME: This assumes that a Type1 IOMMU can map any 64-bit
> + * IOVA whatsoever.  That's not actually true, but the current
> + * kernel interface doesn't tell us what it can map, and the
> + * existing Type1 IOMMUs generally support any IOVA we're
> + * going to actually try in practice.
> + */
> +container->min_iova = 0;
> +container->max_iova = (hwaddr)-1;
>  } else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU)) {
> +struct vfio_iommu_spapr_tce_info info;
> +
>  ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
>  if (ret) {
>  error_report("vfio: failed to set group container: %m");
> @@ -710,6 +729,21 @@ static int vfio_connect_container(VFIOGroup *group, 
> AddressSpace *as)
>  ret = -errno;
>  goto free_container_exit;
>  }
> +
> +/*
> + * FIXME: This only considers the host IOMMU' 32-bit window.

IOMMU's?

> + * At some point we need to add support for the optional
> + * 64-bit window and dynamic windows
> + */
> +info.argsz = sizeof(info);
> +ret = ioctl(fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
> +if (ret) {
> +error_report("vfio: VFIO_IOMMU_SPA

1 2 3 4 >

1 - 100 of 368 matches

Mail list logo