date:20170703

[Qemu-devel] [PATCH v3 16/20] xilinx_axienet: Convert to DEFINE_PROP_LINK

2017-07-03 Thread Fam Zheng

Signed-off-by: Fam Zheng 
---
 hw/net/xilinx_axienet.c | 31 +--
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/hw/net/xilinx_axienet.c b/hw/net/xilinx_axienet.c
index b670184..e92762b 100644
--- a/hw/net/xilinx_axienet.c
+++ b/hw/net/xilinx_axienet.c
@@ -341,8 +341,10 @@ struct XilinxAXIEnet {
 SysBusDevice busdev;
 MemoryRegion iomem;
 qemu_irq irq;
-StreamSlave *tx_data_dev;
-StreamSlave *tx_control_dev;
+/* StreamSlave pointers for data and control tx devices, to be filled by
+ * link property. */
+Object *tx_data_dev;
+Object *tx_control_dev;
 XilinxAXIEnetStreamSlave rx_data_dev;
 XilinxAXIEnetStreamSlave rx_control_dev;
 NICState *nic;
@@ -688,17 +690,18 @@ static void axienet_eth_rx_notify(void *opaque)
 {
 XilinxAXIEnet *s = XILINX_AXI_ENET(opaque);
 
-while (s->rxappsize && stream_can_push(s->tx_control_dev,
+while (s->rxappsize && stream_can_push(STREAM_SLAVE(s->tx_control_dev),
axienet_eth_rx_notify, s)) {
-size_t ret = stream_push(s->tx_control_dev,
+size_t ret = stream_push(STREAM_SLAVE(s->tx_control_dev),
  (void *)s->rxapp + CONTROL_PAYLOAD_SIZE
  - s->rxappsize, s->rxappsize);
 s->rxappsize -= ret;
 }
 
-while (s->rxsize && stream_can_push(s->tx_data_dev,
+while (s->rxsize && stream_can_push(STREAM_SLAVE(s->tx_data_dev),
 axienet_eth_rx_notify, s)) {
-size_t ret = stream_push(s->tx_data_dev, (void *)s->rxmem + s->rxpos,
+size_t ret = stream_push(STREAM_SLAVE(s->tx_data_dev),
+ (void *)s->rxmem + s->rxpos,
  s->rxsize);
 s->rxsize -= ret;
 s->rxpos += ret;
@@ -991,18 +994,6 @@ static void xilinx_enet_init(Object *obj)
 XilinxAXIEnet *s = XILINX_AXI_ENET(obj);
 SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
 
-object_property_add_link(obj, "axistream-connected", TYPE_STREAM_SLAVE,
- (Object **) &s->tx_data_dev,
- qdev_prop_allow_set_link_before_realize,
- OBJ_PROP_LINK_UNREF_ON_RELEASE,
- &error_abort);
-object_property_add_link(obj, "axistream-control-connected",
- TYPE_STREAM_SLAVE,
- (Object **) &s->tx_control_dev,
- qdev_prop_allow_set_link_before_realize,
- OBJ_PROP_LINK_UNREF_ON_RELEASE,
- &error_abort);
-
 object_initialize(&s->rx_data_dev, sizeof(s->rx_data_dev),
   TYPE_XILINX_AXI_ENET_DATA_STREAM);
 object_initialize(&s->rx_control_dev, sizeof(s->rx_control_dev),
@@ -1023,6 +1014,10 @@ static Property xilinx_enet_properties[] = {
 DEFINE_PROP_UINT32("rxmem", XilinxAXIEnet, c_rxmem, 0x1000),
 DEFINE_PROP_UINT32("txmem", XilinxAXIEnet, c_txmem, 0x1000),
 DEFINE_NIC_PROPERTIES(XilinxAXIEnet, conf),
+DEFINE_PROP_LINK("axistream-connected", XilinxAXIEnet,
+ tx_data_dev, TYPE_STREAM_SLAVE),
+DEFINE_PROP_LINK("axistream-control-connected", XilinxAXIEnet,
+ tx_control_dev, TYPE_STREAM_SLAVE),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
2.9.4

Re: [Qemu-devel] [RFC v2 2/3] qemu-error: Implement a more generic error reporting

2017-07-03 Thread Markus Armbruster

"Daniel P. Berrange"  writes:

> On Mon, Jul 03, 2017 at 04:07:21PM +0200, Markus Armbruster wrote:
>> "Daniel P. Berrange"  writes:
>> 
>> > On Thu, Jun 29, 2017 at 12:42:38PM -0700, Alistair Francis wrote:
>> >> This patch removes the exisinting error_vreport() function and replaces it
>> >> with a more generic vreport() function that takes an enum describing the
>> >> information to be reported.
>> 
>> Why remove error_vreport()?
>> 
>> >> As part of this change a report() function is added as well with the
>> >> same capability.
>> >> 
>> >> To maintain full compatibility the original error_report() function is
>> >> maintained and no changes to the way errors are printed have been made.
>> >> 
>> >> Signed-off-by: Alistair Francis 
>> >> ---
>> >> 
>> >>  hw/virtio/virtio.c  |  2 +-
>> >>  include/qemu/error-report.h | 10 +-
>> >>  scripts/checkpatch.pl   |  3 ++-
>> >>  util/qemu-error.c   | 33 ++---
>> >>  4 files changed, 42 insertions(+), 6 deletions(-)
>> >> 
>> >> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
>> >> index 464947f76d..bd3d26abb7 100644
>> >> --- a/hw/virtio/virtio.c
>> >> +++ b/hw/virtio/virtio.c
>> >> @@ -2448,7 +2448,7 @@ void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice 
>> >> *vdev, const char *fmt, ...)
>> >>  va_list ap;
>> >>  
>> >>  va_start(ap, fmt);
>> >> -error_vreport(fmt, ap);
>> >> +vreport(ERROR, fmt, ap);
>> >>  va_end(ap);
>> >>  
>> >>  if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
>> >> diff --git a/include/qemu/error-report.h b/include/qemu/error-report.h
>> >> index 3001865896..39b554c3b9 100644
>> >> --- a/include/qemu/error-report.h
>> >> +++ b/include/qemu/error-report.h
>> >> @@ -21,6 +21,12 @@ typedef struct Location {
>> >>  struct Location *prev;
>> >>  } Location;
>> >>  
>> >> +typedef enum {
>> >> +ERROR,
>> >> +WARN,
>> >> +INFO,
>> >> +} report_types;
>> >
>> > Woah, those are faaar to generic names to be used. There is way too much
>> > chance of those clashing with definitions from headers we pull in - 
>> > particularly
>> > windows which pollutes its system headers with loads of generic names.
>> >
>> > I'd suggest  QMSG_ERROR, QMSG_WARN, QMSG_INFO
>> >
>> >> +
>> >>  Location *loc_push_restore(Location *loc);
>> >>  Location *loc_push_none(Location *loc);
>> >>  Location *loc_pop(Location *loc);
>> >> @@ -30,12 +36,14 @@ void loc_set_none(void);
>> >>  void loc_set_cmdline(char **argv, int idx, int cnt);
>> >>  void loc_set_file(const char *fname, int lno);
>> >>  
>> >> +void vreport(report_types type, const char *fmt, va_list ap) 
>> >> GCC_FMT_ATTR(2, 0);
>> >> +void report(report_types type, const char *fmt, ...)  GCC_FMT_ATTR(2, 3);
>> >
>> > Those names are too generic too IMHO.  I'd suggest  qmsg_report, 
>> > qmsg_vreport
>> >
>> > As mentioned in the previous review, there should be wrappers which
>> > call these with suitable enum to make usage less verbose. eg
>> >
>> >   qmsg_info(fmt, )  should call qmsg_report(QMSG_INFO, fmt, ...)
>> >   qmsg_vinfo(fmt, )  should call qmsg_vreport(QMSG_INFO, fmt, ...)
>> >
>> > likewise, for other message levels
>> 
>> We then have qmsg_warning() for warnings, and error_report() for errors.
>> Ugh!
>> 
>> If I had known back then what I know now, I wouldn't have used the
>> error_ prefix.
>> 
>> Naming things is hard.
>> 
>> Ideas anyone?
>
> I guess implicit in my suggestion would be to switch to qmsg_error()
> over some (long) period of time, but that would be a massive amount
> of churn that would harm backporting.
>
> So perhaps, just have error_report warning_report, info_report, and
> accept that the naming convention is slightly reversed from "normality"

That's not half bad.

The matching enum would be

typedef enum {
REPORT_TYPE_ERROR,
REPORT_TYPE_WARNING,
REPORT_TYPE_INFO,
} report_type;

Note the typedef name is *singular*.  Compare

report_type rtype;

to

report_types rtype;

@rtype is *one* report type, not multiple.

Let's stick report_type, report() and vreport() into qemu-error.c
(static linkage) until we have a genuine need for them elsewhere.

[Qemu-devel] [PATCH v3 17/20] xilinx_axidma: Convert to DEFINE_PROP_LINK

2017-07-03 Thread Fam Zheng

Signed-off-by: Fam Zheng 
---
 hw/dma/xilinx_axidma.c | 26 +++---
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/hw/dma/xilinx_axidma.c b/hw/dma/xilinx_axidma.c
index 6065689..df293d3 100644
--- a/hw/dma/xilinx_axidma.c
+++ b/hw/dma/xilinx_axidma.c
@@ -124,8 +124,11 @@ struct XilinxAXIDMA {
 SysBusDevice busdev;
 MemoryRegion iomem;
 uint32_t freqhz;
-StreamSlave *tx_data_dev;
-StreamSlave *tx_control_dev;
+
+/* StreamSlave pointers to be fille by link property */
+Object *tx_data_dev;
+Object *tx_control_dev;
+
 XilinxAXIDMAStreamSlave rx_data_dev;
 XilinxAXIDMAStreamSlave rx_control_dev;
 
@@ -491,7 +494,8 @@ static void axidma_write(void *opaque, hwaddr addr,
 s->regs[addr] = value;
 s->regs[R_DMASR] &= ~DMASR_IDLE; /* Not idle.  */
 if (!sid) {
-stream_process_mem2s(s, d->tx_data_dev, d->tx_control_dev);
+stream_process_mem2s(s, STREAM_SLAVE(d->tx_data_dev),
+ STREAM_SLAVE(d->tx_control_dev));
 }
 break;
 default:
@@ -564,18 +568,6 @@ static void xilinx_axidma_init(Object *obj)
 XilinxAXIDMA *s = XILINX_AXI_DMA(obj);
 SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
 
-object_property_add_link(obj, "axistream-connected", TYPE_STREAM_SLAVE,
- (Object **)&s->tx_data_dev,
- qdev_prop_allow_set_link_before_realize,
- OBJ_PROP_LINK_UNREF_ON_RELEASE,
- &error_abort);
-object_property_add_link(obj, "axistream-control-connected",
- TYPE_STREAM_SLAVE,
- (Object **)&s->tx_control_dev,
- qdev_prop_allow_set_link_before_realize,
- OBJ_PROP_LINK_UNREF_ON_RELEASE,
- &error_abort);
-
 object_initialize(&s->rx_data_dev, sizeof(s->rx_data_dev),
   TYPE_XILINX_AXI_DMA_DATA_STREAM);
 object_initialize(&s->rx_control_dev, sizeof(s->rx_control_dev),
@@ -595,6 +587,10 @@ static void xilinx_axidma_init(Object *obj)
 
 static Property axidma_properties[] = {
 DEFINE_PROP_UINT32("freqhz", XilinxAXIDMA, freqhz, 5000),
+DEFINE_PROP_LINK("axistream-connected", XilinxAXIDMA,
+ tx_data_dev, TYPE_STREAM_SLAVE),
+DEFINE_PROP_LINK("axistream-control-connected", XilinxAXIDMA,
+ tx_control_dev, TYPE_STREAM_SLAVE),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
2.9.4

[Qemu-devel] [PATCH v3 15/20] xlnx_zynqmp: Convert to DEFINE_PROP_LINK

2017-07-03 Thread Fam Zheng

Signed-off-by: Fam Zheng 
---
 hw/arm/xlnx-zynqmp.c | 12 
 include/hw/arm/xlnx-zynqmp.h |  3 ++-
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
index 64f52f8..2ce9cad 100644
--- a/hw/arm/xlnx-zynqmp.c
+++ b/hw/arm/xlnx-zynqmp.c
@@ -140,11 +140,6 @@ static void xlnx_zynqmp_init(Object *obj)
   &error_abort);
 }
 
-object_property_add_link(obj, "ddr-ram", TYPE_MEMORY_REGION,
- (Object **)&s->ddr_ram,
- qdev_prop_allow_set_link_before_realize,
- OBJ_PROP_LINK_UNREF_ON_RELEASE, &error_abort);
-
 object_initialize(&s->gic, sizeof(s->gic), gic_class_name());
 qdev_set_parent_bus(DEVICE(&s->gic), sysbus_get_default());
 
@@ -192,7 +187,7 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error 
**errp)
 qemu_irq gic_spi[GIC_NUM_SPI_INTR];
 Error *err = NULL;
 
-ram_size = memory_region_size(s->ddr_ram);
+ram_size = memory_region_size(MEMORY_REGION(s->ddr_ram));
 
 /* Create the DDR Memory Regions. User friendly checks should happen at
  * the board level
@@ -206,7 +201,7 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error 
**errp)
 ddr_high_size = ram_size - XLNX_ZYNQMP_MAX_LOW_RAM_SIZE;
 
 memory_region_init_alias(&s->ddr_ram_high, NULL,
- "ddr-ram-high", s->ddr_ram,
+ "ddr-ram-high", MEMORY_REGION(s->ddr_ram),
   ddr_low_size, ddr_high_size);
 memory_region_add_subregion(get_system_memory(),
 XLNX_ZYNQMP_HIGH_RAM_START,
@@ -218,7 +213,7 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error 
**errp)
 }
 
 memory_region_init_alias(&s->ddr_ram_low, NULL,
- "ddr-ram-low", s->ddr_ram,
+ "ddr-ram-low", MEMORY_REGION(s->ddr_ram),
   0, ddr_low_size);
 memory_region_add_subregion(get_system_memory(), 0, &s->ddr_ram_low);
 
@@ -434,6 +429,7 @@ static Property xlnx_zynqmp_props[] = {
 DEFINE_PROP_STRING("boot-cpu", XlnxZynqMPState, boot_cpu),
 DEFINE_PROP_BOOL("secure", XlnxZynqMPState, secure, false),
 DEFINE_PROP_BOOL("has_rpu", XlnxZynqMPState, has_rpu, false),
+DEFINE_PROP_LINK("ddr-ram", XlnxZynqMPState, ddr_ram, TYPE_MEMORY_REGION),
 DEFINE_PROP_END_OF_LIST()
 };
 
diff --git a/include/hw/arm/xlnx-zynqmp.h b/include/hw/arm/xlnx-zynqmp.h
index c2931bf..efd4c1a 100644
--- a/include/hw/arm/xlnx-zynqmp.h
+++ b/include/hw/arm/xlnx-zynqmp.h
@@ -75,7 +75,8 @@ typedef struct XlnxZynqMPState {
 
 MemoryRegion ocm_ram[XLNX_ZYNQMP_NUM_OCM_BANKS];
 
-MemoryRegion *ddr_ram;
+/* A MemoryRegion pointer to be filled by link property */
+Object *ddr_ram;
 MemoryRegion ddr_ram_low, ddr_ram_high;
 
 CadenceGEMState gem[XLNX_ZYNQMP_NUM_GEMS];
-- 
2.9.4

[Qemu-devel] [PATCH v3 10/20] dimm: Convert to DEFINE_PROP_LINK

2017-07-03 Thread Fam Zheng

Unlike the usual object_property_add_link() invocations in other
devices, dimm checks the "is mapped" state of the backend in addition to
qdev_prop_allow_set_link_before_realize. To convert it without
specializing DEFINE_PROP_LINK which always uses the qdev general check
callback, move the extra check to device realize time.

Signed-off-by: Fam Zheng 
---
 hw/mem/nvdimm.c  | 11 +++
 hw/mem/pc-dimm.c | 42 ++
 include/hw/mem/pc-dimm.h |  3 ++-
 3 files changed, 23 insertions(+), 33 deletions(-)

diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
index db896b0..b22f271 100644
--- a/hw/mem/nvdimm.c
+++ b/hw/mem/nvdimm.c
@@ -80,7 +80,8 @@ static MemoryRegion *nvdimm_get_memory_region(PCDIMMDevice 
*dimm)
 
 static void nvdimm_realize(PCDIMMDevice *dimm, Error **errp)
 {
-MemoryRegion *mr = host_memory_backend_get_memory(dimm->hostmem, errp);
+MemoryRegion *mr = 
host_memory_backend_get_memory(MEMORY_BACKEND(dimm->hostmem),
+  errp);
 NVDIMMDevice *nvdimm = NVDIMM(dimm);
 uint64_t align, pmem_size, size = memory_region_size(mr);
 
@@ -91,7 +92,7 @@ static void nvdimm_realize(PCDIMMDevice *dimm, Error **errp)
 pmem_size = QEMU_ALIGN_DOWN(pmem_size, align);
 
 if (size <= nvdimm->label_size || !pmem_size) {
-HostMemoryBackend *hostmem = dimm->hostmem;
+HostMemoryBackend *hostmem = MEMORY_BACKEND(dimm->hostmem);
 char *path = object_get_canonical_path_component(OBJECT(hostmem));
 
 error_setg(errp, "the size of memdev %s (0x%" PRIx64 ") is too "
@@ -136,14 +137,16 @@ static void nvdimm_write_label_data(NVDIMMDevice *nvdimm, 
const void *buf,
 
 memcpy(nvdimm->label_data + offset, buf, size);
 
-mr = host_memory_backend_get_memory(dimm->hostmem, &error_abort);
+mr = host_memory_backend_get_memory(MEMORY_BACKEND(dimm->hostmem),
+&error_abort);
 backend_offset = memory_region_size(mr) - nvdimm->label_size + offset;
 memory_region_set_dirty(mr, backend_offset, size);
 }
 
 static MemoryRegion *nvdimm_get_vmstate_memory_region(PCDIMMDevice *dimm)
 {
-return host_memory_backend_get_memory(dimm->hostmem, &error_abort);
+return host_memory_backend_get_memory(MEMORY_BACKEND(dimm->hostmem),
+  &error_abort);
 }
 
 static void nvdimm_class_init(ObjectClass *oc, void *data)
diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index 5e23495..eb8deca 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -184,7 +184,7 @@ int qmp_pc_dimm_device_list(Object *obj, void *opaque)
 di->node = dimm->node;
 di->size = object_property_get_uint(OBJECT(dimm), 
PC_DIMM_SIZE_PROP,
 NULL);
-di->memdev = object_get_canonical_path(OBJECT(dimm->hostmem));
+di->memdev = object_get_canonical_path(dimm->hostmem);
 
 info->u.dimm.data = di;
 elem->value = info;
@@ -350,6 +350,8 @@ static Property pc_dimm_properties[] = {
 DEFINE_PROP_UINT32(PC_DIMM_NODE_PROP, PCDIMMDevice, node, 0),
 DEFINE_PROP_INT32(PC_DIMM_SLOT_PROP, PCDIMMDevice, slot,
   PC_DIMM_UNASSIGNED_SLOT),
+DEFINE_PROP_LINK(PC_DIMM_MEMDEV_PROP, PCDIMMDevice, hostmem,
+ TYPE_MEMORY_BACKEND),
 DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -367,33 +369,10 @@ static void pc_dimm_get_size(Object *obj, Visitor *v, 
const char *name,
 visit_type_uint64(v, name, &value, errp);
 }
 
-static void pc_dimm_check_memdev_is_busy(const Object *obj, const char *name,
-  Object *val, Error **errp)
-{
-Error *local_err = NULL;
-
-if (host_memory_backend_is_mapped(MEMORY_BACKEND(val))) {
-char *path = object_get_canonical_path_component(val);
-error_setg(&local_err, "can't use already busy memdev: %s", path);
-g_free(path);
-} else {
-qdev_prop_allow_set_link_before_realize(obj, name, val, &local_err);
-}
-
-error_propagate(errp, local_err);
-}
-
 static void pc_dimm_init(Object *obj)
 {
-PCDIMMDevice *dimm = PC_DIMM(obj);
-
 object_property_add(obj, PC_DIMM_SIZE_PROP, "uint64", pc_dimm_get_size,
 NULL, NULL, NULL, &error_abort);
-object_property_add_link(obj, PC_DIMM_MEMDEV_PROP, TYPE_MEMORY_BACKEND,
- (Object **)&dimm->hostmem,
- pc_dimm_check_memdev_is_busy,
- OBJ_PROP_LINK_UNREF_ON_RELEASE,
- &error_abort);
 }
 
 static void pc_dimm_realize(DeviceState *dev, Error **errp)
@@ -404,6 +383,11 @@ static void pc_dimm_realize(DeviceState *dev, Error **errp)
 if (!dimm->hostmem) {
 error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property is not set");
 return;
+} else if (host_memory_backend_is_mapped(MEMORY_BACKEND(dimm->h

[Qemu-devel] [PATCH V7 04/12] net/filter-mirror.c: Make filter mirror support vnet support.

2017-07-03 Thread Zhang Chen

We add the vnet_hdr_support option for filter-mirror, default is disabled.
If you use virtio-net-pci or other driver needs vnet_hdr, please enable it.
You can use it for example:
-object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0,vnet_hdr_support

If it has vnet_hdr_support flag, we will change the sending packet format from
struct {int size; const uint8_t buf[];} to {int size; int vnet_hdr_len; const 
uint8_t buf[];}.
make other module(like colo-compare) know how to parse net packet correctly.

Signed-off-by: Zhang Chen 
---
 net/filter-mirror.c | 42 +-
 qemu-options.hx |  5 ++---
 2 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/net/filter-mirror.c b/net/filter-mirror.c
index 69cf4ca..4a849d4 100644
--- a/net/filter-mirror.c
+++ b/net/filter-mirror.c
@@ -41,12 +41,14 @@ typedef struct MirrorState {
 CharBackend chr_in;
 CharBackend chr_out;
 SocketReadState rs;
+bool vnet_hdr;
 } MirrorState;
 
 static int filter_send(MirrorState *s,
const struct iovec *iov,
int iovcnt)
 {
+NetFilterState *nf = NETFILTER(s);
 int ret = 0;
 ssize_t size = 0;
 uint32_t len = 0;
@@ -63,6 +65,23 @@ static int filter_send(MirrorState *s,
 goto err;
 }
 
+if (s->vnet_hdr) {
+/*
+ * If vnet_hdr = on, we send vnet header len to make other
+ * module(like colo-compare) know how to parse net
+ * packet correctly.
+ */
+ssize_t vnet_hdr_len;
+
+vnet_hdr_len = nf->netdev->vnet_hdr_len;
+
+len = htonl(vnet_hdr_len);
+ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
+if (ret != sizeof(len)) {
+goto err;
+}
+}
+
 buf = g_malloc(size);
 iov_to_buf(iov, iovcnt, 0, buf, size);
 ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
@@ -229,7 +248,7 @@ static void filter_redirector_setup(NetFilterState *nf, 
Error **errp)
 }
 }
 
-net_socket_rs_init(&s->rs, redirector_rs_finalize, false);
+net_socket_rs_init(&s->rs, redirector_rs_finalize, s->vnet_hdr);
 
 if (s->indev) {
 chr = qemu_chr_find(s->indev);
@@ -318,6 +337,20 @@ static void filter_mirror_set_outdev(Object *obj,
 }
 }
 
+static bool filter_mirror_get_vnet_hdr(Object *obj, Error **errp)
+{
+MirrorState *s = FILTER_MIRROR(obj);
+
+return s->vnet_hdr;
+}
+
+static void filter_mirror_set_vnet_hdr(Object *obj, bool value, Error **errp)
+{
+MirrorState *s = FILTER_MIRROR(obj);
+
+s->vnet_hdr = value;
+}
+
 static char *filter_redirector_get_outdev(Object *obj, Error **errp)
 {
 MirrorState *s = FILTER_REDIRECTOR(obj);
@@ -337,8 +370,15 @@ static void filter_redirector_set_outdev(Object *obj,
 
 static void filter_mirror_init(Object *obj)
 {
+MirrorState *s = FILTER_MIRROR(obj);
+
 object_property_add_str(obj, "outdev", filter_mirror_get_outdev,
 filter_mirror_set_outdev, NULL);
+
+s->vnet_hdr = false;
+object_property_add_bool(obj, "vnet_hdr_support",
+ filter_mirror_get_vnet_hdr,
+ filter_mirror_set_vnet_hdr, NULL);
 }
 
 static void filter_redirector_init(Object *obj)
diff --git a/qemu-options.hx b/qemu-options.hx
index 297bd8a..f0add6f 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -4238,10 +4238,9 @@ queue @var{all|rx|tx} is an option that can be applied 
to any netfilter.
 @option{tx}: the filter is attached to the transmit queue of the netdev,
  where it will receive packets sent by the netdev.
 
-@item -object 
filter-mirror,id=@var{id},netdev=@var{netdevid},outdev=@var{chardevid}[,queue=@var{all|rx|tx}]
+@item -object 
filter-mirror,id=@var{id},netdev=@var{netdevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support]
 
-filter-mirror on netdev @var{netdevid},mirror net packet to chardev
-@var{chardevid}
+filter-mirror on netdev @var{netdevid},mirror net packet to 
chardev@var{chardevid}, if it has the vnet_hdr_support flag, filter-mirror will 
mirror packet with vnet_hdr_len.
 
 @item -object 
filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},
 outdev=@var{chardevid}[,queue=@var{all|rx|tx}]
-- 
2.7.4

[Qemu-devel] [PATCH v3 14/20] gicv3: Convert to DEFINE_PROP_LINK

2017-07-03 Thread Fam Zheng

Signed-off-by: Fam Zheng 
---
 hw/intc/arm_gicv3_its_kvm.c| 18 ++
 include/hw/intc/arm_gicv3_its_common.h |  3 ++-
 2 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/hw/intc/arm_gicv3_its_kvm.c b/hw/intc/arm_gicv3_its_kvm.c
index 1f8991b..5520c28 100644
--- a/hw/intc/arm_gicv3_its_kvm.c
+++ b/hw/intc/arm_gicv3_its_kvm.c
@@ -120,17 +120,6 @@ static void kvm_arm_its_realize(DeviceState *dev, Error 
**errp)
 qemu_add_vm_change_state_handler(vm_change_state_handler, s);
 }
 
-static void kvm_arm_its_init(Object *obj)
-{
-GICv3ITSState *s = KVM_ARM_ITS(obj);
-
-object_property_add_link(obj, "parent-gicv3",
- "kvm-arm-gicv3", (Object **)&s->gicv3,
- object_property_allow_set_link,
- OBJ_PROP_LINK_UNREF_ON_RELEASE,
- &error_abort);
-}
-
 /**
  * kvm_arm_its_pre_save - handles the saving of ITS registers.
  * ITS tables are flushed into guest RAM separately and earlier,
@@ -205,12 +194,18 @@ static void kvm_arm_its_post_load(GICv3ITSState *s)
   GITS_CTLR, &s->ctlr, true, &error_abort);
 }
 
+static Property kvm_arm_its_props[] = {
+DEFINE_PROP_LINK("parent-gicv3", GICv3ITSState, gicv3, "kvm-arm-gicv3"),
+DEFINE_PROP_END_OF_LIST(),
+};
+
 static void kvm_arm_its_class_init(ObjectClass *klass, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
 GICv3ITSCommonClass *icc = ARM_GICV3_ITS_COMMON_CLASS(klass);
 
 dc->realize = kvm_arm_its_realize;
+dc->props   = kvm_arm_its_props;
 icc->send_msi = kvm_its_send_msi;
 icc->pre_save = kvm_arm_its_pre_save;
 icc->post_load = kvm_arm_its_post_load;
@@ -220,7 +215,6 @@ static const TypeInfo kvm_arm_its_info = {
 .name = TYPE_KVM_ARM_ITS,
 .parent = TYPE_ARM_GICV3_ITS_COMMON,
 .instance_size = sizeof(GICv3ITSState),
-.instance_init = kvm_arm_its_init,
 .class_init = kvm_arm_its_class_init,
 };
 
diff --git a/include/hw/intc/arm_gicv3_its_common.h 
b/include/hw/intc/arm_gicv3_its_common.h
index fd1fe64..29ec114 100644
--- a/include/hw/intc/arm_gicv3_its_common.h
+++ b/include/hw/intc/arm_gicv3_its_common.h
@@ -42,7 +42,8 @@ struct GICv3ITSState {
 MemoryRegion iomem_its_cntrl;
 MemoryRegion iomem_its_translation;
 
-GICv3State *gicv3;
+/* GICv3State pointer to be filled by link property */
+Object *gicv3;
 
 int dev_fd; /* kvm device fd if backed by kvm vgic support */
 uint64_t gits_translater_gpa;
-- 
2.9.4

[Qemu-devel] [PATCH V7 12/12] docs/colo-proxy.txt: Update colo-proxy usage of net driver with vnet_header

2017-07-03 Thread Zhang Chen

Signed-off-by: Zhang Chen 
---
 docs/colo-proxy.txt | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/docs/colo-proxy.txt b/docs/colo-proxy.txt
index c4941de..f6a624f 100644
--- a/docs/colo-proxy.txt
+++ b/docs/colo-proxy.txt
@@ -182,6 +182,32 @@ Secondary(ip:3.3.3.8):
 -chardev socket,id=red1,host=3.3.3.3,port=9004
 -object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0
 -object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1
+-object filter-rewriter,id=f3,netdev=hn0,queue=all
+
+If you want to use virtio-net-pci or other driver with vnet_header:
+
+Primary(ip:3.3.3.3):
+-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
+-device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66
+-chardev socket,id=mirror0,host=3.3.3.3,port=9003,server,nowait
+-chardev socket,id=compare1,host=3.3.3.3,port=9004,server,nowait
+-chardev socket,id=compare0,host=3.3.3.3,port=9001,server,nowait
+-chardev socket,id=compare0-0,host=3.3.3.3,port=9001
+-chardev socket,id=compare_out,host=3.3.3.3,port=9005,server,nowait
+-chardev socket,id=compare_out0,host=3.3.3.3,port=9005
+-object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0,vnet_hdr_support
+-object 
filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out,vnet_hdr_support
+-object 
filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0,vnet_hdr_support
+-object 
colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0,vnet_hdr_support
+
+Secondary(ip:3.3.3.8):
+-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,down script=/etc/qemu-ifdown
+-device e1000,netdev=hn0,mac=52:a4:00:12:78:66
+-chardev socket,id=red0,host=3.3.3.3,port=9003
+-chardev socket,id=red1,host=3.3.3.3,port=9004
+-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0,vnet_hdr_support
+-object 
filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1,vnet_hdr_support
+-object filter-rewriter,id=f3,netdev=hn0,queue=all,vnet_hdr_support
 
 Note:
   a.COLO-proxy must work with COLO-frame and Block-replication.
-- 
2.7.4

[Qemu-devel] [PATCH v3 09/20] virtio-crypto: Convert to DEFINE_PROP_LINK

2017-07-03 Thread Fam Zheng

Unlike other object_property_add_link() occurrences in virtio devices,
virtio-crypto checks the "in use" state of the linked backend object in
addition to qdev_prop_allow_set_link_before_realize. To convert it
without needing to specialize DEFINE_PROP_LINK which always uses the
qdev callback, move the "in use" check to device realize time.

Signed-off-by: Fam Zheng 
---
 hw/s390x/virtio-ccw.c |  3 ---
 hw/virtio/virtio-crypto-pci.c |  2 --
 hw/virtio/virtio-crypto.c | 56 +--
 include/hw/virtio/virtio-crypto.h |  3 ++-
 4 files changed, 21 insertions(+), 43 deletions(-)

diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index bd4a9ea..dfc55ba 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -1592,9 +1592,6 @@ static void virtio_ccw_crypto_instance_init(Object *obj)
 ccw_dev->force_revision_1 = true;
 virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
 TYPE_VIRTIO_CRYPTO);
-
-object_property_add_alias(obj, "cryptodev", OBJECT(&dev->vdev),
-  "cryptodev", &error_abort);
 }
 
 static void virtio_ccw_crypto_class_init(ObjectClass *klass, void *data)
diff --git a/hw/virtio/virtio-crypto-pci.c b/hw/virtio/virtio-crypto-pci.c
index 422aca3..bf64996 100644
--- a/hw/virtio/virtio-crypto-pci.c
+++ b/hw/virtio/virtio-crypto-pci.c
@@ -62,8 +62,6 @@ static void virtio_crypto_initfn(Object *obj)
 
 virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
 TYPE_VIRTIO_CRYPTO);
-object_property_add_alias(obj, "cryptodev", OBJECT(&dev->vdev),
-  "cryptodev", &error_abort);
 }
 
 static const TypeInfo virtio_crypto_pci_info = {
diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
index 2590893..0c44fe1 100644
--- a/hw/virtio/virtio-crypto.c
+++ b/hw/virtio/virtio-crypto.c
@@ -753,22 +753,18 @@ static void virtio_crypto_reset(VirtIODevice *vdev)
 static void virtio_crypto_init_config(VirtIODevice *vdev)
 {
 VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev);
+CryptoDevBackend *cryptodev = CRYPTODEV_BACKEND(vcrypto->conf.cryptodev);
 
-vcrypto->conf.crypto_services =
- vcrypto->conf.cryptodev->conf.crypto_services;
-vcrypto->conf.cipher_algo_l =
- vcrypto->conf.cryptodev->conf.cipher_algo_l;
-vcrypto->conf.cipher_algo_h =
- vcrypto->conf.cryptodev->conf.cipher_algo_h;
-vcrypto->conf.hash_algo = vcrypto->conf.cryptodev->conf.hash_algo;
-vcrypto->conf.mac_algo_l = vcrypto->conf.cryptodev->conf.mac_algo_l;
-vcrypto->conf.mac_algo_h = vcrypto->conf.cryptodev->conf.mac_algo_h;
-vcrypto->conf.aead_algo = vcrypto->conf.cryptodev->conf.aead_algo;
-vcrypto->conf.max_cipher_key_len =
-  vcrypto->conf.cryptodev->conf.max_cipher_key_len;
-vcrypto->conf.max_auth_key_len =
-  vcrypto->conf.cryptodev->conf.max_auth_key_len;
-vcrypto->conf.max_size = vcrypto->conf.cryptodev->conf.max_size;
+vcrypto->conf.crypto_services = cryptodev->conf.crypto_services;
+vcrypto->conf.cipher_algo_l = cryptodev->conf.cipher_algo_l;
+vcrypto->conf.cipher_algo_h = cryptodev->conf.cipher_algo_h;
+vcrypto->conf.hash_algo = cryptodev->conf.hash_algo;
+vcrypto->conf.mac_algo_l = cryptodev->conf.mac_algo_l;
+vcrypto->conf.mac_algo_h = cryptodev->conf.mac_algo_h;
+vcrypto->conf.aead_algo = cryptodev->conf.aead_algo;
+vcrypto->conf.max_cipher_key_len = cryptodev->conf.max_cipher_key_len;
+vcrypto->conf.max_auth_key_len = cryptodev->conf.max_auth_key_len;
+vcrypto->conf.max_size = cryptodev->conf.max_size;
 }
 
 static void virtio_crypto_device_realize(DeviceState *dev, Error **errp)
@@ -776,11 +772,15 @@ static void virtio_crypto_device_realize(DeviceState 
*dev, Error **errp)
 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
 VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(dev);
 int i;
-
-vcrypto->cryptodev = vcrypto->conf.cryptodev;
+vcrypto->cryptodev = CRYPTODEV_BACKEND(vcrypto->conf.cryptodev);
 if (vcrypto->cryptodev == NULL) {
 error_setg(errp, "'cryptodev' parameter expects a valid object");
 return;
+} else if (cryptodev_backend_is_used(vcrypto->cryptodev)) {
+char *path = 
object_get_canonical_path_component(vcrypto->conf.cryptodev);
+error_setg(errp, "can't use already used cryptodev backend: %s", path);
+g_free(path);
+return;
 }
 
 vcrypto->max_queues = MAX(vcrypto->cryptodev->conf.peers.queues, 1);
@@ -845,6 +845,8 @@ static const VMStateDescription vmstate_virtio_crypto = {
 };
 
 static Property virtio_crypto_properties[] = {
+DEFINE_PROP_LINK("cryptodev", VirtIOCrypto, conf.cryptodev,
+ TYPE_CRYPTODEV_BACKEND),
 DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -888,20 +890,6 @@ static void virtio_crypto_class_init(ObjectClass *klass, 
void *data)

[Qemu-devel] [PATCH V7 06/12] net/colo.c: Make vnet_hdr_len as packet property

2017-07-03 Thread Zhang Chen

We can use this property flush and send packet with vnet_hdr_len.

Signed-off-by: Zhang Chen 
---
 net/colo-compare.c| 8 ++--
 net/colo.c| 3 ++-
 net/colo.h| 4 +++-
 net/filter-rewriter.c | 2 +-
 4 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/net/colo-compare.c b/net/colo-compare.c
index 45b1584..3f914f6 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -121,9 +121,13 @@ static int packet_enqueue(CompareState *s, int mode)
 Connection *conn;
 
 if (mode == PRIMARY_IN) {
-pkt = packet_new(s->pri_rs.buf, s->pri_rs.packet_len);
+pkt = packet_new(s->pri_rs.buf,
+ s->pri_rs.packet_len,
+ s->pri_rs.vnet_hdr_len);
 } else {
-pkt = packet_new(s->sec_rs.buf, s->sec_rs.packet_len);
+pkt = packet_new(s->sec_rs.buf,
+ s->sec_rs.packet_len,
+ s->sec_rs.vnet_hdr_len);
 }
 
 if (parse_packet_early(pkt)) {
diff --git a/net/colo.c b/net/colo.c
index 8cc166b..180eaed 100644
--- a/net/colo.c
+++ b/net/colo.c
@@ -153,13 +153,14 @@ void connection_destroy(void *opaque)
 g_slice_free(Connection, conn);
 }
 
-Packet *packet_new(const void *data, int size)
+Packet *packet_new(const void *data, int size, int vnet_hdr_len)
 {
 Packet *pkt = g_slice_new(Packet);
 
 pkt->data = g_memdup(data, size);
 pkt->size = size;
 pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+pkt->vnet_hdr_len = vnet_hdr_len;
 
 return pkt;
 }
diff --git a/net/colo.h b/net/colo.h
index 7c524f3..caedb0d 100644
--- a/net/colo.h
+++ b/net/colo.h
@@ -43,6 +43,8 @@ typedef struct Packet {
 int size;
 /* Time of packet creation, in wall clock ms */
 int64_t creation_ms;
+/* Get vnet_hdr_len from filter */
+uint32_t vnet_hdr_len;
 } Packet;
 
 typedef struct ConnectionKey {
@@ -82,7 +84,7 @@ Connection *connection_get(GHashTable *connection_track_table,
ConnectionKey *key,
GQueue *conn_list);
 void connection_hashtable_reset(GHashTable *connection_track_table);
-Packet *packet_new(const void *data, int size);
+Packet *packet_new(const void *data, int size, int vnet_hdr_len);
 void packet_destroy(void *opaque, void *user_data);
 
 #endif /* QEMU_COLO_PROXY_H */
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
index afa06e8..63256c7 100644
--- a/net/filter-rewriter.c
+++ b/net/filter-rewriter.c
@@ -158,7 +158,7 @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
 char *buf = g_malloc0(size);
 
 iov_to_buf(iov, iovcnt, 0, buf, size);
-pkt = packet_new(buf, size);
+pkt = packet_new(buf, size, 0);
 g_free(buf);
 
 /*
-- 
2.7.4

[Qemu-devel] [PATCH v3 08/20] virtio-rng: Convert to DEFINE_PROP_LINK

2017-07-03 Thread Fam Zheng

Signed-off-by: Fam Zheng 
---
 hw/s390x/virtio-ccw.c  |  2 --
 hw/virtio/virtio-pci.c |  2 --
 hw/virtio/virtio-rng.c | 14 ++
 include/hw/virtio/virtio-rng.h |  3 ++-
 4 files changed, 4 insertions(+), 17 deletions(-)

diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index 9386893..bd4a9ea 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -1546,8 +1546,6 @@ static void virtio_ccw_rng_instance_init(Object *obj)
 
 virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
 TYPE_VIRTIO_RNG);
-object_property_add_alias(obj, "rng", OBJECT(&dev->vdev),
-  "rng", &error_abort);
 }
 
 static Property virtio_ccw_rng_properties[] = {
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 1eb61be..c76f3e5 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -2459,8 +2459,6 @@ static void virtio_rng_initfn(Object *obj)
 
 virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
 TYPE_VIRTIO_RNG);
-object_property_add_alias(obj, "rng", OBJECT(&dev->vdev), "rng",
-  &error_abort);
 }
 
 static const TypeInfo virtio_rng_pci_info = {
diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c
index a6ee501..4bfe9c5 100644
--- a/hw/virtio/virtio-rng.c
+++ b/hw/virtio/virtio-rng.c
@@ -199,7 +199,7 @@ static void virtio_rng_device_realize(DeviceState *dev, 
Error **errp)
  "rng", NULL);
 }
 
-vrng->rng = vrng->conf.rng;
+vrng->rng = RNG_BACKEND(vrng->conf.rng);
 if (vrng->rng == NULL) {
 error_setg(errp, "'rng' parameter expects a valid object");
 return;
@@ -246,6 +246,7 @@ static Property virtio_rng_properties[] = {
  */
 DEFINE_PROP_UINT64("max-bytes", VirtIORNG, conf.max_bytes, INT64_MAX),
 DEFINE_PROP_UINT32("period", VirtIORNG, conf.period_ms, 1 << 16),
+DEFINE_PROP_LINK("rng", VirtIORNG, conf.rng, TYPE_RNG_BACKEND),
 DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -262,21 +263,10 @@ static void virtio_rng_class_init(ObjectClass *klass, 
void *data)
 vdc->get_features = get_features;
 }
 
-static void virtio_rng_initfn(Object *obj)
-{
-VirtIORNG *vrng = VIRTIO_RNG(obj);
-
-object_property_add_link(obj, "rng", TYPE_RNG_BACKEND,
- (Object **)&vrng->conf.rng,
- qdev_prop_allow_set_link_before_realize,
- OBJ_PROP_LINK_UNREF_ON_RELEASE, NULL);
-}
-
 static const TypeInfo virtio_rng_info = {
 .name = TYPE_VIRTIO_RNG,
 .parent = TYPE_VIRTIO_DEVICE,
 .instance_size = sizeof(VirtIORNG),
-.instance_init = virtio_rng_initfn,
 .class_init = virtio_rng_class_init,
 };
 
diff --git a/include/hw/virtio/virtio-rng.h b/include/hw/virtio/virtio-rng.h
index 922dce7..f2f106b 100644
--- a/include/hw/virtio/virtio-rng.h
+++ b/include/hw/virtio/virtio-rng.h
@@ -23,7 +23,8 @@
 OBJECT_GET_PARENT_CLASS(obj, TYPE_VIRTIO_RNG)
 
 struct VirtIORNGConf {
-RngBackend *rng;
+/* RngBackend pointer to be filled by link property */
+Object *rng;
 uint64_t max_bytes;
 uint32_t period_ms;
 RngRandom *default_backend;
-- 
2.9.4

[Qemu-devel] [PATCH v3 13/20] armv7m: Convert armv7m.memory to DEFINE_PROP_LINK

2017-07-03 Thread Fam Zheng

Signed-off-by: Fam Zheng 
---
 hw/arm/armv7m.c | 10 +++---
 include/hw/arm/armv7m.h |  2 +-
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c
index 3a13d82..3b0ada0 100644
--- a/hw/arm/armv7m.c
+++ b/hw/arm/armv7m.c
@@ -132,12 +132,6 @@ static void armv7m_instance_init(Object *obj)
 
 /* Can't init the cpu here, we don't yet know which model to use */
 
-object_property_add_link(obj, "memory",
- TYPE_MEMORY_REGION,
- (Object **)&s->board_memory,
- qdev_prop_allow_set_link_before_realize,
- OBJ_PROP_LINK_UNREF_ON_RELEASE,
- &error_abort);
 memory_region_init(&s->container, obj, "armv7m-container", UINT64_MAX);
 
 object_initialize(&s->nvic, sizeof(s->nvic), "armv7m_nvic");
@@ -167,7 +161,8 @@ static void armv7m_realize(DeviceState *dev, Error **errp)
 return;
 }
 
-memory_region_add_subregion_overlap(&s->container, 0, s->board_memory, -1);
+memory_region_add_subregion_overlap(&s->container, 0,
+MEMORY_REGION(s->board_memory), -1);
 
 cpustr = g_strsplit(s->cpu_model, ",", 2);
 
@@ -248,6 +243,7 @@ static void armv7m_realize(DeviceState *dev, Error **errp)
 
 static Property armv7m_properties[] = {
 DEFINE_PROP_STRING("cpu-model", ARMv7MState, cpu_model),
+DEFINE_PROP_LINK("memory", ARMv7MState, board_memory, TYPE_MEMORY_REGION),
 DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/include/hw/arm/armv7m.h b/include/hw/arm/armv7m.h
index 1b4ce5b..1640199 100644
--- a/include/hw/arm/armv7m.h
+++ b/include/hw/arm/armv7m.h
@@ -58,7 +58,7 @@ typedef struct ARMv7MState {
 /* Properties */
 char *cpu_model;
 /* MemoryRegion the board provides to us (with its devices, RAM, etc) */
-MemoryRegion *board_memory;
+Object *board_memory;
 } ARMv7MState;
 
 #endif
-- 
2.9.4

[Qemu-devel] [PATCH v3 03/20] qdev: Introduce DEFINE_PROP_LINK

2017-07-03 Thread Fam Zheng

This property can be used to replace the object_property_add_link in
device code, to add a link to other objects, which is a common pattern.

Signed-off-by: Fam Zheng 
---
 hw/core/qdev-properties.c| 18 ++
 include/hw/qdev-core.h   |  1 +
 include/hw/qdev-properties.h |  9 +
 3 files changed, 28 insertions(+)

diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c
index 113ce7d..5429c63 100644
--- a/hw/core/qdev-properties.c
+++ b/hw/core/qdev-properties.c
@@ -1214,3 +1214,21 @@ PropertyInfo qdev_prop_size = {
 .set = set_size,
 .set_default_value = set_default_value_uint,
 };
+
+/* --- object link property --- */
+
+static void create_link_property(Object *obj, Property *prop, Error **errp)
+{
+Object **child = qdev_get_prop_ptr(DEVICE(obj), prop);
+
+object_property_add_link(obj, prop->name, prop->link_type,
+ child,
+ qdev_prop_allow_set_link_before_realize,
+ OBJ_PROP_LINK_UNREF_ON_RELEASE,
+ errp);
+}
+
+PropertyInfo qdev_prop_link = {
+.name = "link",
+.create = create_link_property,
+};
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 33518ee..08d1d2c 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -233,6 +233,7 @@ struct Property {
 int  arrayoffset;
 PropertyInfo *arrayinfo;
 int  arrayfieldsize;
+const char   *link_type;
 };
 
 struct PropertyInfo {
diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h
index 9edded2..b150378 100644
--- a/include/hw/qdev-properties.h
+++ b/include/hw/qdev-properties.h
@@ -30,6 +30,7 @@ extern PropertyInfo qdev_prop_pci_devfn;
 extern PropertyInfo qdev_prop_blocksize;
 extern PropertyInfo qdev_prop_pci_host_devaddr;
 extern PropertyInfo qdev_prop_arraylen;
+extern PropertyInfo qdev_prop_link;
 
 #define DEFINE_PROP(_name, _state, _field, _prop, _type) { \
 .name  = (_name),\
@@ -117,6 +118,14 @@ extern PropertyInfo qdev_prop_arraylen;
 .arrayoffset = offsetof(_state, _arrayfield),   \
 }
 
+#define DEFINE_PROP_LINK(_name, _state, _field, _type) {\
+.name = (_name),\
+.info = &(qdev_prop_link),  \
+.offset = offsetof(_state, _field)  \
++ type_check(Object *, typeof_field(_state, _field)),   \
+.link_type  = _type,\
+}
+
 #define DEFINE_PROP_UINT8(_n, _s, _f, _d)   \
 DEFINE_PROP_UNSIGNED(_n, _s, _f, _d, qdev_prop_uint8, uint8_t)
 #define DEFINE_PROP_UINT16(_n, _s, _f, _d)  \
-- 
2.9.4

[Qemu-devel] [PATCH v3 06/20] virtio-blk: Convert to DEFINE_PROP_LINK

2017-07-03 Thread Fam Zheng

Signed-off-by: Fam Zheng 
---
 hw/block/dataplane/virtio-blk.c | 2 +-
 hw/block/virtio-blk.c   | 5 +
 hw/s390x/virtio-ccw.c   | 2 --
 hw/virtio/virtio-pci.c  | 2 --
 include/hw/virtio/virtio-blk.h  | 3 ++-
 5 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 5556f0e..6fdc6f6 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -116,7 +116,7 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, 
VirtIOBlkConf *conf,
 s->conf = conf;
 
 if (conf->iothread) {
-s->iothread = conf->iothread;
+s->iothread = IOTHREAD(conf->iothread);
 object_ref(OBJECT(s->iothread));
 s->ctx = iothread_get_aio_context(s->iothread);
 } else {
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index c0bd247..8146306 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -983,10 +983,6 @@ static void virtio_blk_instance_init(Object *obj)
 {
 VirtIOBlock *s = VIRTIO_BLK(obj);
 
-object_property_add_link(obj, "iothread", TYPE_IOTHREAD,
- (Object **)&s->conf.iothread,
- qdev_prop_allow_set_link_before_realize,
- OBJ_PROP_LINK_UNREF_ON_RELEASE, NULL);
 device_add_bootindex_property(obj, &s->conf.conf.bootindex,
   "bootindex", "/disk@0,0",
   DEVICE(obj), NULL);
@@ -1014,6 +1010,7 @@ static Property virtio_blk_properties[] = {
 DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0,
 true),
 DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues, 1),
+DEFINE_PROP_LINK("iothread", VirtIOBlock, conf.iothread, TYPE_IOTHREAD),
 DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index 90d37cb..a17ce84 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -789,8 +789,6 @@ static void virtio_ccw_blk_instance_init(Object *obj)
 
 virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
 TYPE_VIRTIO_BLK);
-object_property_add_alias(obj, "iothread", OBJECT(&dev->vdev),"iothread",
-  &error_abort);
 object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev),
   "bootindex", &error_abort);
 }
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 301920e..984baf8 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1996,8 +1996,6 @@ static void virtio_blk_pci_instance_init(Object *obj)
 
 virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
 TYPE_VIRTIO_BLK);
-object_property_add_alias(obj, "iothread", OBJECT(&dev->vdev),"iothread",
-  &error_abort);
 object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev),
   "bootindex", &error_abort);
 }
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index d3c8a6f..2452074 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -33,7 +33,8 @@ struct virtio_blk_inhdr
 struct VirtIOBlkConf
 {
 BlockConf conf;
-IOThread *iothread;
+/* IOThread pointer to be filled by link property */
+Object *iothread;
 char *serial;
 uint32_t scsi;
 uint32_t config_wce;
-- 
2.9.4

[Qemu-devel] [PATCH v3 07/20] virtio-scsi: Convert to DEFINE_PROP_LINK

2017-07-03 Thread Fam Zheng

Signed-off-by: Fam Zheng 
---
 hw/s390x/virtio-ccw.c   |  2 --
 hw/scsi/virtio-scsi-dataplane.c |  2 +-
 hw/scsi/virtio-scsi.c   | 13 ++---
 hw/virtio/virtio-pci.c  |  2 --
 include/hw/virtio/virtio-scsi.h |  3 ++-
 5 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index a17ce84..9386893 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -872,8 +872,6 @@ static void virtio_ccw_scsi_instance_init(Object *obj)
 
 virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
 TYPE_VIRTIO_SCSI);
-object_property_add_alias(obj, "iothread", OBJECT(&dev->vdev), "iothread",
-  &error_abort);
 }
 
 #ifdef CONFIG_VHOST_SCSI
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
index 944ea4e..887c100 100644
--- a/hw/scsi/virtio-scsi-dataplane.c
+++ b/hw/scsi/virtio-scsi-dataplane.c
@@ -40,7 +40,7 @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp)
 error_setg(errp, "ioeventfd is required for iothread");
 return;
 }
-s->ctx = iothread_get_aio_context(vs->conf.iothread);
+s->ctx = iothread_get_aio_context(IOTHREAD(vs->conf.iothread));
 } else {
 if (!virtio_device_ioeventfd_enabled(vdev)) {
 return;
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index f46f06d..d48bcce 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -897,16 +897,6 @@ static void virtio_scsi_device_realize(DeviceState *dev, 
Error **errp)
 virtio_scsi_dataplane_setup(s, errp);
 }
 
-static void virtio_scsi_instance_init(Object *obj)
-{
-VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(obj);
-
-object_property_add_link(obj, "iothread", TYPE_IOTHREAD,
- (Object **)&vs->conf.iothread,
- qdev_prop_allow_set_link_before_realize,
- OBJ_PROP_LINK_UNREF_ON_RELEASE, &error_abort);
-}
-
 void virtio_scsi_common_unrealize(DeviceState *dev, Error **errp)
 {
 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
@@ -934,6 +924,8 @@ static Property virtio_scsi_properties[] = {
VIRTIO_SCSI_F_HOTPLUG, true),
 DEFINE_PROP_BIT("param_change", VirtIOSCSI, host_features,
 VIRTIO_SCSI_F_CHANGE, true),
+DEFINE_PROP_LINK("iothread", VirtIOSCSI, parent_obj.conf.iothread,
+ TYPE_IOTHREAD),
 DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -988,7 +980,6 @@ static const TypeInfo virtio_scsi_info = {
 .name = TYPE_VIRTIO_SCSI,
 .parent = TYPE_VIRTIO_SCSI_COMMON,
 .instance_size = sizeof(VirtIOSCSI),
-.instance_init = virtio_scsi_instance_init,
 .class_init = virtio_scsi_class_init,
 .interfaces = (InterfaceInfo[]) {
 { TYPE_HOTPLUG_HANDLER },
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 984baf8..1eb61be 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -2065,8 +2065,6 @@ static void virtio_scsi_pci_instance_init(Object *obj)
 
 virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
 TYPE_VIRTIO_SCSI);
-object_property_add_alias(obj, "iothread", OBJECT(&dev->vdev), "iothread",
-  &error_abort);
 }
 
 static const TypeInfo virtio_scsi_pci_info = {
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
index de6ae5a..9d9bb56 100644
--- a/include/hw/virtio/virtio-scsi.h
+++ b/include/hw/virtio/virtio-scsi.h
@@ -56,7 +56,8 @@ struct VirtIOSCSIConf {
 #endif
 CharBackend chardev;
 uint32_t boot_tpgt;
-IOThread *iothread;
+/* IOThread pointer to be filled by link property */
+Object *iothread;
 };
 
 struct VirtIOSCSI;
-- 
2.9.4

[Qemu-devel] [PATCH v3 02/20] qdev: Introduce PropertyInfo.create

2017-07-03 Thread Fam Zheng

This allows property implementation to provide a specialized property
creation method.

Update conditions guarding property types accordingly.

Signed-off-by: Fam Zheng 
---
 hw/core/qdev.c | 31 +++
 include/hw/qdev-core.h |  1 +
 qmp.c  |  2 +-
 3 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 849952a..ec63fe0 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -744,6 +744,10 @@ static void qdev_property_add_legacy(DeviceState *dev, 
Property *prop,
 return;
 }
 
+if (prop->info->create) {
+return;
+}
+
 name = g_strdup_printf("legacy-%s", prop->name);
 object_property_add(OBJECT(dev), name, "str",
 prop->info->print ? qdev_get_legacy_property : 
prop->info->get,
@@ -770,20 +774,23 @@ void qdev_property_add_static(DeviceState *dev, Property 
*prop,
 Error *local_err = NULL;
 Object *obj = OBJECT(dev);
 
-/*
- * TODO qdev_prop_ptr does not have getters or setters.  It must
- * go now that it can be replaced with links.  The test should be
- * removed along with it: all static properties are read/write.
- */
-if (!prop->info->get && !prop->info->set) {
-return;
+if (prop->info->create) {
+prop->info->create(obj, prop, &local_err);
+} else {
+/*
+ * TODO qdev_prop_ptr does not have getters or setters.  It must
+ * go now that it can be replaced with links.  The test should be
+ * removed along with it: all static properties are read/write.
+ */
+if (!prop->info->get && !prop->info->set) {
+return;
+}
+object_property_add(obj, prop->name, prop->info->name,
+prop->info->get, prop->info->set,
+prop->info->release,
+prop, &local_err);
 }
 
-object_property_add(obj, prop->name, prop->info->name,
-prop->info->get, prop->info->set,
-prop->info->release,
-prop, &local_err);
-
 if (local_err) {
 error_propagate(errp, local_err);
 return;
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 9d7c1c0..33518ee 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -241,6 +241,7 @@ struct PropertyInfo {
 const char * const *enum_table;
 int (*print)(DeviceState *dev, Property *prop, char *dest, size_t len);
 void (*set_default_value)(Object *obj, const Property *prop);
+void (*create)(Object *obj, Property *prop, Error **errp);
 ObjectPropertyAccessor *get;
 ObjectPropertyAccessor *set;
 ObjectPropertyRelease *release;
diff --git a/qmp.c b/qmp.c
index 7ee9bcf..133fdfc 100644
--- a/qmp.c
+++ b/qmp.c
@@ -490,7 +490,7 @@ static DevicePropertyInfo 
*make_device_property_info(ObjectClass *klass,
  * for removal.  This conditional should be removed along with
  * it.
  */
-if (!prop->info->set) {
+if (!prop->info->set && !prop->info->create) {
 return NULL;   /* no way to set it, don't show */
 }
 
-- 
2.9.4

[Qemu-devel] [PATCH v3 11/20] ivshmem: Convert to DEFINE_PROP_LINK

2017-07-03 Thread Fam Zheng

Unlike the usual object_property_add_link() invocations in other
devices, ivshmem checks the "is mapped" state of the backend in addition
to qdev_prop_allow_set_link_before_realize. To convert it without
specializing DEFINE_PROP_LINK which always uses the qdev callback, move
the extra check to device realize time.

Signed-off-by: Fam Zheng 
---
 hw/misc/ivshmem.c | 34 --
 1 file changed, 12 insertions(+), 22 deletions(-)

diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index 97beaf6..e318427 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -87,7 +87,8 @@ typedef struct IVShmemState {
 uint32_t features;
 
 /* exactly one of these two may be set */
-HostMemoryBackend *hostmem; /* with interrupts */
+Object *hostmem; /* HostMemoryBackend pointer to be filled by link
+property, used with interrupts */
 CharBackend server_chr; /* without interrupts */
 
 /* registers */
@@ -864,7 +865,7 @@ static void ivshmem_common_realize(PCIDevice *dev, Error 
**errp)
 if (s->hostmem != NULL) {
 IVSHMEM_DPRINTF("using hostmem\n");
 
-s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem,
+s->ivshmem_bar2 = 
host_memory_backend_get_memory(MEMORY_BACKEND(s->hostmem),
  &error_abort);
 } else {
 Chardev *chr = qemu_chr_fe_get_driver(&s->server_chr);
@@ -1009,18 +1010,6 @@ static const TypeInfo ivshmem_common_info = {
 .class_init= ivshmem_common_class_init,
 };
 
-static void ivshmem_check_memdev_is_busy(const Object *obj, const char *name,
- Object *val, Error **errp)
-{
-if (host_memory_backend_is_mapped(MEMORY_BACKEND(val))) {
-char *path = object_get_canonical_path_component(val);
-error_setg(errp, "can't use already busy memdev: %s", path);
-g_free(path);
-} else {
-qdev_prop_allow_set_link_before_realize(obj, name, val, errp);
-}
-}
-
 static const VMStateDescription ivshmem_plain_vmsd = {
 .name = TYPE_IVSHMEM_PLAIN,
 .version_id = 0,
@@ -1037,6 +1026,7 @@ static const VMStateDescription ivshmem_plain_vmsd = {
 
 static Property ivshmem_plain_properties[] = {
 DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF),
+DEFINE_PROP_LINK("memdev", IVShmemState, hostmem, TYPE_MEMORY_BACKEND),
 DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -1044,11 +1034,6 @@ static void ivshmem_plain_init(Object *obj)
 {
 IVShmemState *s = IVSHMEM_PLAIN(obj);
 
-object_property_add_link(obj, "memdev", TYPE_MEMORY_BACKEND,
- (Object **)&s->hostmem,
- ivshmem_check_memdev_is_busy,
- OBJ_PROP_LINK_UNREF_ON_RELEASE,
- &error_abort);
 s->not_legacy_32bit = 1;
 }
 
@@ -1059,17 +1044,22 @@ static void ivshmem_plain_realize(PCIDevice *dev, Error 
**errp)
 if (!s->hostmem) {
 error_setg(errp, "You must specify a 'memdev'");
 return;
+} else if (host_memory_backend_is_mapped(MEMORY_BACKEND(s->hostmem))) {
+char *path = object_get_canonical_path_component(s->hostmem);
+error_setg(errp, "can't use already busy memdev: %s", path);
+g_free(path);
+return;
 }
 
 ivshmem_common_realize(dev, errp);
-host_memory_backend_set_mapped(s->hostmem, true);
+host_memory_backend_set_mapped(MEMORY_BACKEND(s->hostmem), true);
 }
 
 static void ivshmem_plain_exit(PCIDevice *pci_dev)
 {
 IVShmemState *s = IVSHMEM_COMMON(pci_dev);
 
-host_memory_backend_set_mapped(s->hostmem, false);
+host_memory_backend_set_mapped(MEMORY_BACKEND(s->hostmem), false);
 }
 
 static void ivshmem_plain_class_init(ObjectClass *klass, void *data)
@@ -1245,7 +1235,7 @@ static void desugar_shm(IVShmemState *s)
 object_property_add_child(OBJECT(s), "internal-shm-backend", obj,
   &error_abort);
 user_creatable_complete(obj, &error_abort);
-s->hostmem = MEMORY_BACKEND(obj);
+s->hostmem = obj;
 }
 
 static void ivshmem_realize(PCIDevice *dev, Error **errp)
-- 
2.9.4

[Qemu-devel] [PATCH v3 00/20] qdev: Introduce DEFINE_PROP_LINK

2017-07-03 Thread Fam Zheng

v3: Include Igor's patch, and fix virtio-crypto too.
Always use qdev_prop_allow_set_link_before_realize and
OBJ_PROP_LINK_UNREF_ON_RELEASE.
Include as many applicable devices as possible and some more clean-ups.
There are still more left but they use uncommon check and flags parameters,
it's better to take care of them in following batches.

v2: Create a new header for link properties. [Paolo]
Don't wrap, use PropertyInfo.create() (much better diffstat, yay!).
[Paolo]

Link properties of devices created with object_property_add_link() are not
reflected in HMP "info qtree". For example, whether a virtio-blk device has an
iothread (i.e. has enabled data plane) can not be introspected easily.

Introduce a new type of qdev property macro to fix that.

Fam Zheng (19):
  qdev: Introduce PropertyInfo.create
  qdev: Introduce DEFINE_PROP_LINK
  qmp: Use ObjectProperty.type if present
  qdev: Add const qualifier to PropertyInfo definitions
  virtio-blk: Convert to DEFINE_PROP_LINK
  virtio-scsi: Convert to DEFINE_PROP_LINK
  virtio-rng: Convert to DEFINE_PROP_LINK
  virtio-crypto: Convert to DEFINE_PROP_LINK
  dimm: Convert to DEFINE_PROP_LINK
  ivshmem: Convert to DEFINE_PROP_LINK
  armv7m: Convert bitband.source-mamory to DEFINE_PROP_LINK
  armv7m: Convert armv7m.memory to DEFINE_PROP_LINK
  gicv3: Convert to DEFINE_PROP_LINK
  xlnx_zynqmp: Convert to DEFINE_PROP_LINK
  xilinx_axienet: Convert to DEFINE_PROP_LINK
  xilinx_axidma: Convert to DEFINE_PROP_LINK
  mips_cmgcr: Convert to DEFINE_PROP_LINK
  cpu: Convert to DEFINE_PROP_LINK
  spapr_rng: Convert to DEFINE_PROP_LINK

Igor Mammedov (1):
  qom: enforce readonly nature of link's check callback

 cpus.c |  5 +--
 exec.c | 30 
 hw/arm/armv7m.c| 20 ---
 hw/arm/xlnx-zynqmp.c   | 12 +++
 hw/block/dataplane/virtio-blk.c|  2 +-
 hw/block/virtio-blk.c  |  5 +--
 hw/core/qdev-properties-system.c   |  8 ++---
 hw/core/qdev-properties.c  | 63 ++
 hw/core/qdev.c | 31 ++---
 hw/display/xlnx_dp.c   |  2 +-
 hw/dma/xilinx_axidma.c | 26 ++
 hw/intc/arm_gicv3_its_kvm.c| 18 --
 hw/ipmi/ipmi.c |  2 +-
 hw/mem/nvdimm.c| 11 +++---
 hw/mem/pc-dimm.c   | 42 ---
 hw/misc/ivshmem.c  | 34 +++---
 hw/misc/mips_cmgcr.c   | 22 
 hw/net/xilinx_axienet.c| 31 +++--
 hw/ppc/spapr_rng.c | 13 +++
 hw/s390x/css.c |  4 +--
 hw/s390x/s390-pci-bus.c|  2 +-
 hw/s390x/virtio-ccw.c  |  9 -
 hw/scsi/virtio-scsi-dataplane.c|  2 +-
 hw/scsi/virtio-scsi.c  | 13 ++-
 hw/virtio/virtio-crypto-pci.c  |  2 --
 hw/virtio/virtio-crypto.c  | 56 ++
 hw/virtio/virtio-pci.c |  6 
 hw/virtio/virtio-rng.c | 14 ++--
 include/hw/arm/armv7m.h|  5 +--
 include/hw/arm/xlnx-zynqmp.h   |  3 +-
 include/hw/intc/arm_gicv3_its_common.h |  3 +-
 include/hw/mem/pc-dimm.h   |  3 +-
 include/hw/misc/mips_cmgcr.h   |  5 +--
 include/hw/qdev-core.h |  6 ++--
 include/hw/qdev-properties.h   | 62 +++--
 include/hw/s390x/css.h |  4 +--
 include/hw/virtio/virtio-blk.h |  3 +-
 include/hw/virtio/virtio-crypto.h  |  3 +-
 include/hw/virtio/virtio-rng.h |  3 +-
 include/hw/virtio/virtio-scsi.h|  3 +-
 include/qom/cpu.h  |  4 ++-
 include/qom/object.h   |  6 ++--
 qmp.c  |  5 +--
 qom/cpu.c  |  1 +
 qom/object.c   |  8 ++---
 target/arm/cpu.c   |  6 ++--
 target/i386/cpu.c  |  7 ++--
 target/ppc/translate_init.c|  2 +-
 48 files changed, 282 insertions(+), 345 deletions(-)

-- 
2.9.4

[Qemu-devel] [PATCH v3 05/20] qdev: Add const qualifier to PropertyInfo definitions

2017-07-03 Thread Fam Zheng

The remaining non-const ones are in e1000e which modifies description at
runtime. They can be addressed separatedly.

Signed-off-by: Fam Zheng 
---
 hw/core/qdev-properties-system.c |  8 +++
 hw/core/qdev-properties.c| 44 +-
 hw/s390x/css.c   |  4 ++--
 hw/s390x/s390-pci-bus.c  |  2 +-
 include/hw/qdev-core.h   |  4 ++--
 include/hw/qdev-properties.h | 52 
 include/hw/s390x/css.h   |  4 ++--
 target/i386/cpu.c|  2 +-
 target/ppc/translate_init.c  |  2 +-
 9 files changed, 61 insertions(+), 61 deletions(-)

diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 3bef419..ec10da7 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -159,7 +159,7 @@ static void set_drive(Object *obj, Visitor *v, const char 
*name, void *opaque,
 set_pointer(obj, v, opaque, parse_drive, name, errp);
 }
 
-PropertyInfo qdev_prop_drive = {
+const PropertyInfo qdev_prop_drive = {
 .name  = "str",
 .description = "Node name or ID of a block device to use as a backend",
 .get   = get_drive,
@@ -228,7 +228,7 @@ static void release_chr(Object *obj, const char *name, void 
*opaque)
 qemu_chr_fe_deinit(be, false);
 }
 
-PropertyInfo qdev_prop_chr = {
+const PropertyInfo qdev_prop_chr = {
 .name  = "str",
 .description = "ID of a chardev to use as a backend",
 .get   = get_chr,
@@ -313,7 +313,7 @@ out:
 g_free(str);
 }
 
-PropertyInfo qdev_prop_netdev = {
+const PropertyInfo qdev_prop_netdev = {
 .name  = "str",
 .description = "ID of a netdev to use as a backend",
 .get   = get_netdev,
@@ -393,7 +393,7 @@ static void set_vlan(Object *obj, Visitor *v, const char 
*name, void *opaque,
 *ptr = hubport;
 }
 
-PropertyInfo qdev_prop_vlan = {
+const PropertyInfo qdev_prop_vlan = {
 .name  = "int32",
 .description = "Integer VLAN id to connect to",
 .print = print_vlan,
diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c
index 5429c63..f22bd71 100644
--- a/hw/core/qdev-properties.c
+++ b/hw/core/qdev-properties.c
@@ -132,7 +132,7 @@ static void set_default_value_bool(Object *obj, const 
Property *prop)
 object_property_set_bool(obj, prop->defval.u, prop->name, &error_abort);
 }
 
-PropertyInfo qdev_prop_bit = {
+const PropertyInfo qdev_prop_bit = {
 .name  = "bool",
 .description = "on/off",
 .get   = prop_get_bit,
@@ -191,7 +191,7 @@ static void prop_set_bit64(Object *obj, Visitor *v, const 
char *name,
 bit64_prop_set(dev, prop, value);
 }
 
-PropertyInfo qdev_prop_bit64 = {
+const PropertyInfo qdev_prop_bit64 = {
 .name  = "bool",
 .description = "on/off",
 .get   = prop_get_bit64,
@@ -226,7 +226,7 @@ static void set_bool(Object *obj, Visitor *v, const char 
*name, void *opaque,
 visit_type_bool(v, name, ptr, errp);
 }
 
-PropertyInfo qdev_prop_bool = {
+const PropertyInfo qdev_prop_bool = {
 .name  = "bool",
 .get   = get_bool,
 .set   = set_bool,
@@ -270,7 +270,7 @@ static void set_default_value_uint(Object *obj, const 
Property *prop)
 object_property_set_uint(obj, prop->defval.u, prop->name, &error_abort);
 }
 
-PropertyInfo qdev_prop_uint8 = {
+const PropertyInfo qdev_prop_uint8 = {
 .name  = "uint8",
 .get   = get_uint8,
 .set   = set_uint8,
@@ -304,7 +304,7 @@ static void set_uint16(Object *obj, Visitor *v, const char 
*name,
 visit_type_uint16(v, name, ptr, errp);
 }
 
-PropertyInfo qdev_prop_uint16 = {
+const PropertyInfo qdev_prop_uint16 = {
 .name  = "uint16",
 .get   = get_uint16,
 .set   = set_uint16,
@@ -363,14 +363,14 @@ static void set_int32(Object *obj, Visitor *v, const char 
*name, void *opaque,
 visit_type_int32(v, name, ptr, errp);
 }
 
-PropertyInfo qdev_prop_uint32 = {
+const PropertyInfo qdev_prop_uint32 = {
 .name  = "uint32",
 .get   = get_uint32,
 .set   = set_uint32,
 .set_default_value = set_default_value_uint,
 };
 
-PropertyInfo qdev_prop_int32 = {
+const PropertyInfo qdev_prop_int32 = {
 .name  = "int32",
 .get   = get_int32,
 .set   = set_int32,
@@ -404,7 +404,7 @@ static void set_uint64(Object *obj, Visitor *v, const char 
*name,
 visit_type_uint64(v, name, ptr, errp);
 }
 
-PropertyInfo qdev_prop_uint64 = {
+const PropertyInfo qdev_prop_uint64 = {
 .name  = "uint64",
 .get   = get_uint64,
 .set   = set_uint64,
@@ -457,7 +457,7 @@ static void set_string(Object *obj, Visitor *v, const char 
*name,
 *ptr = str;
 }
 
-PropertyInfo qdev_prop_string = {
+const PropertyInfo qdev_prop_string = {
 .name  = "str",
 .release = release_string,
 .get   = get_string,
@@ -467,7 +467,7 @@ PropertyInfo qdev_prop_string = {
 /* --- pointer --- */
 
 /* Not a proper property, just for dirty hacks.  TODO Remove it!  */
-PropertyInfo qdev_prop_ptr = {
+const PropertyInfo qdev_prop_ptr = {
 .na

[Qemu-devel] [PATCH v3 12/20] armv7m: Convert bitband.source-mamory to DEFINE_PROP_LINK

2017-07-03 Thread Fam Zheng

Signed-off-by: Fam Zheng 
---
 hw/arm/armv7m.c | 10 +++---
 include/hw/arm/armv7m.h |  3 ++-
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c
index c8a11f2..3a13d82 100644
--- a/hw/arm/armv7m.c
+++ b/hw/arm/armv7m.c
@@ -97,12 +97,6 @@ static void bitband_init(Object *obj)
 BitBandState *s = BITBAND(obj);
 SysBusDevice *dev = SYS_BUS_DEVICE(obj);
 
-object_property_add_link(obj, "source-memory",
- TYPE_MEMORY_REGION,
- (Object **)&s->source_memory,
- qdev_prop_allow_set_link_before_realize,
- OBJ_PROP_LINK_UNREF_ON_RELEASE,
- &error_abort);
 memory_region_init_io(&s->iomem, obj, &bitband_ops, s,
   "bitband", 0x0200);
 sysbus_init_mmio(dev, &s->iomem);
@@ -117,7 +111,7 @@ static void bitband_realize(DeviceState *dev, Error **errp)
 return;
 }
 
-s->source_as = address_space_init_shareable(s->source_memory,
+s->source_as = 
address_space_init_shareable(MEMORY_REGION(s->source_memory),
 "bitband-source");
 }
 
@@ -349,6 +343,8 @@ void armv7m_load_kernel(ARMCPU *cpu, const char 
*kernel_filename, int mem_size)
 
 static Property bitband_properties[] = {
 DEFINE_PROP_UINT32("base", BitBandState, base, 0),
+DEFINE_PROP_LINK("source-memory", BitBandState, source_memory,
+ TYPE_MEMORY_REGION),
 DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/include/hw/arm/armv7m.h b/include/hw/arm/armv7m.h
index a9b3f2a..1b4ce5b 100644
--- a/include/hw/arm/armv7m.h
+++ b/include/hw/arm/armv7m.h
@@ -24,7 +24,8 @@ typedef struct {
 AddressSpace *source_as;
 MemoryRegion iomem;
 uint32_t base;
-MemoryRegion *source_memory;
+/* MemoryRegion pointer to be filled by link property */
+Object *source_memory;
 } BitBandState;
 
 #define TYPE_ARMV7M "armv7m"
-- 
2.9.4

[Qemu-devel] [PATCH v3 01/20] qom: enforce readonly nature of link's check callback

2017-07-03 Thread Fam Zheng

From: Igor Mammedov 

link's check callback is supposed to verify/permit setting it,
however currently nothing restricts it from misusing it
and modifying target object from within.
Make sure that readonly semantics are checked by compiler
to prevent callback's misuse.

Signed-off-by: Igor Mammedov 
Signed-off-by: Fam Zheng 
---
 hw/core/qdev-properties.c| 3 ++-
 hw/display/xlnx_dp.c | 2 +-
 hw/ipmi/ipmi.c   | 2 +-
 hw/mem/pc-dimm.c | 2 +-
 hw/misc/ivshmem.c| 2 +-
 hw/virtio/virtio-crypto.c| 2 +-
 include/hw/qdev-properties.h | 3 ++-
 include/qom/object.h | 6 +++---
 qom/object.c | 8 
 9 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c
index f11d578..113ce7d 100644
--- a/hw/core/qdev-properties.c
+++ b/hw/core/qdev-properties.c
@@ -25,7 +25,8 @@ void qdev_prop_set_after_realize(DeviceState *dev, const char 
*name,
 }
 }
 
-void qdev_prop_allow_set_link_before_realize(Object *obj, const char *name,
+void qdev_prop_allow_set_link_before_realize(const Object *obj,
+ const char *name,
  Object *val, Error **errp)
 {
 DeviceState *dev = DEVICE(obj);
diff --git a/hw/display/xlnx_dp.c b/hw/display/xlnx_dp.c
index f7b7b80..e99bba4 100644
--- a/hw/display/xlnx_dp.c
+++ b/hw/display/xlnx_dp.c
@@ -515,7 +515,7 @@ static void xlnx_dp_aux_set_command(XlnxDPState *s, 
uint32_t value)
 s->core_registers[DP_INTERRUPT_SIGNAL_STATE] |= 0x04;
 }
 
-static void xlnx_dp_set_dpdma(Object *obj, const char *name, Object *val,
+static void xlnx_dp_set_dpdma(const Object *obj, const char *name, Object *val,
   Error **errp)
 {
 XlnxDPState *s = XLNX_DP(obj);
diff --git a/hw/ipmi/ipmi.c b/hw/ipmi/ipmi.c
index afafe14..b27babd 100644
--- a/hw/ipmi/ipmi.c
+++ b/hw/ipmi/ipmi.c
@@ -90,7 +90,7 @@ static TypeInfo ipmi_interface_type_info = {
 .class_init = ipmi_interface_class_init,
 };
 
-static void isa_ipmi_bmc_check(Object *obj, const char *name,
+static void isa_ipmi_bmc_check(const Object *obj, const char *name,
Object *val, Error **errp)
 {
 IPMIBmc *bmc = IPMI_BMC(val);
diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index b72258e..5e23495 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -367,7 +367,7 @@ static void pc_dimm_get_size(Object *obj, Visitor *v, const 
char *name,
 visit_type_uint64(v, name, &value, errp);
 }
 
-static void pc_dimm_check_memdev_is_busy(Object *obj, const char *name,
+static void pc_dimm_check_memdev_is_busy(const Object *obj, const char *name,
   Object *val, Error **errp)
 {
 Error *local_err = NULL;
diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index 2f0819d..97beaf6 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -1009,7 +1009,7 @@ static const TypeInfo ivshmem_common_info = {
 .class_init= ivshmem_common_class_init,
 };
 
-static void ivshmem_check_memdev_is_busy(Object *obj, const char *name,
+static void ivshmem_check_memdev_is_busy(const Object *obj, const char *name,
  Object *val, Error **errp)
 {
 if (host_memory_backend_is_mapped(MEMORY_BACKEND(val))) {
diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
index 0353eb6..2590893 100644
--- a/hw/virtio/virtio-crypto.c
+++ b/hw/virtio/virtio-crypto.c
@@ -889,7 +889,7 @@ static void virtio_crypto_class_init(ObjectClass *klass, 
void *data)
 }
 
 static void
-virtio_crypto_check_cryptodev_is_used(Object *obj, const char *name,
+virtio_crypto_check_cryptodev_is_used(const Object *obj, const char *name,
   Object *val, Error **errp)
 {
 if (cryptodev_backend_is_used(CRYPTODEV_BACKEND(val))) {
diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h
index 0604c33..9edded2 100644
--- a/include/hw/qdev-properties.h
+++ b/include/hw/qdev-properties.h
@@ -272,7 +272,8 @@ void qdev_prop_set_after_realize(DeviceState *dev, const 
char *name,
  * This function should be used as the check() argument to
  * object_property_add_link().
  */
-void qdev_prop_allow_set_link_before_realize(Object *obj, const char *name,
+void qdev_prop_allow_set_link_before_realize(const Object *obj,
+ const char *name,
  Object *val, Error **errp);
 
 #endif
diff --git a/include/qom/object.h b/include/qom/object.h
index abaeb8c..1b82899 100644
--- a/include/qom/object.h
+++ b/include/qom/object.h
@@ -764,7 +764,7 @@ ObjectClass *object_get_class(Object *obj);
  *
  * Returns: The QOM typename of @obj.
  */
-const char *object_get_typename(Object *obj);
+const char *object_get_typename(const Object *obj);
 
 /**
  * type_register_static:
@@ -1319,7 +1319,7 @@ typedef enum {
  * callback fun

[Qemu-devel] [PATCH v3 04/20] qmp: Use ObjectProperty.type if present

2017-07-03 Thread Fam Zheng

The dynamic value is more informative in the case of link property,
otherwise it is the same.

Signed-off-by: Fam Zheng 
---
 qmp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/qmp.c b/qmp.c
index 133fdfc..9f8a167 100644
--- a/qmp.c
+++ b/qmp.c
@@ -496,7 +496,8 @@ static DevicePropertyInfo 
*make_device_property_info(ObjectClass *klass,
 
 info = g_malloc0(sizeof(*info));
 info->name = g_strdup(prop->name);
-info->type = g_strdup(prop->info->name);
+info->type = default_type ? g_strdup(default_type)
+  : g_strdup(prop->info->name);
 info->has_description = !!prop->info->description;
 info->description = g_strdup(prop->info->description);
 return info;
-- 
2.9.4

Re: [Qemu-devel] [RFC PATCH 1/3] vmstate: error hint for failed equal checks

2017-07-03 Thread Markus Armbruster

Halil Pasic  writes:

> On 07/03/2017 03:52 PM, Markus Armbruster wrote:
>> Halil Pasic  writes:
>> 
>>> On 06/30/2017 04:54 PM, Eric Blake wrote:
 On 06/30/2017 09:41 AM, Halil Pasic wrote:
>>> 'This' basically boils down to the question and
>>> 'Why aren't hints reported in QMP context?'
>>
>> QMP is supposed to be machine-parseable.  Hints are supposed to be
>> human-readable. If you have a machine managing the monitor, the hint
>> adds nothing but bandwidth consumption, because machine should not be
>> parsing the human portion of the error message in the first place (as it
>> is, libvirt already just logs the human-readable portion of a message,
>> and bases its actions solely on the machine-stable portions of an error
>> reply: namely, whether an error was sent at all, and occasionally, what
>> error class was used for that error - there's no guarantee a human will
>> be reading the log, though).
[...]
> From prior experiences I'm more used to think about error messages as
> something meant for human consumption, and expressing things expected to
> be relevant for some kind of client code in a different way (optimized
> for machine consumption).
>
> If however the error message ain't part of the machine relevant portion,
> then the same argument applies as to the 'hint', and I don't see the
> reason for handling hints differently. Do you agree with my
> argumentation?

 Indeed, it may not hurt to start passing the hints over the wire (errors
 would then consume more bandwidth, but errors are not the hot path).
 And I'm not necessarily opposed to that change, so much as trying to
 document why it is not currently the case.  At the same time, I probably
 won't be the one writing a path to populate the hint information into
 the QMP error, as I don't have any reason to use the hint when
 controlling libvirt (except maybe for logging, but there, the hint is
 not going to help the end user, because it's not the end-user's fault
 that libvirt used the API wrong to get a hint in the first place).
>>>
>>> For me both human readable things make sense only for error reporting
>>> (effectively logging). Error.msg should IMHO be different, than Error.hint.
>>> The existence of an error should be indicated by the Error object.
>> 
>> Consider this one from qemu-option.c:
>> 
>> error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name,
>>"a non-negative number below 2^64");
>> error_append_hint(errp, "Optional suffix k, M, G, T, P or E means"
>>   " kilo-, mega-, giga-, tera-, peta-\n"
>>   "and exabytes, respectively.\n");
>> 
>> The hint is helpful for a human command line or HMP user.  It's actively
>> misleading in QMP.
>
> I agree.
>
>> Totally fine, it's how the "hint" feature is meant
>> to be used.
>> 
>
> Was not aware.
>
>> If we have errors that can't be adequately explained in a single error
>> message, we may need a way to add more explanation.  error_append_hint()
>> isn't.
>> 
>
> Was not aware. Using hint in this very situation was suggested by Connie,
> and I assumed she is long enough with the project to know...
>
> In fact looking at  include/qapi/error.h:
> """
> /*
>  * Error reporting system loosely patterned after Glib's GError.
>  *
>  * Create an error:
>  * error_setg(&err, "situation normal, all fouled up");
>  *
>  * Create an error and add additional explanation:
>  * error_setg(&err, "invalid quark");
>  * error_append_hint(&err, "Valid quarks are up, down, strange, "
>  *   "charm, top, bottom.\n");
>  *
>  * Do *not* contract this to
>  * error_setg(&err, "invalid quark\n"
>  *"Valid quarks are up, down, strange, charm, top, bottom.");
> """
>
> my understanding was and is still the exact opposite of what you say:
> error_append_hint is for adding more explanation.
>
> Furthermore 
> """
> /*
>  * Append a printf-style human-readable explanation to an existing error.
>  * @errp may be NULL, but not &error_fatal or &error_abort.
>  * Trivially the case if you call it only after error_setg() or
>  * error_propagate().
>  * May be called multiple times.  The resulting hint should end with a
>  * newline.
>  */
> void error_append_hint(Error **errp, const char *fmt, ...)
> """
>
> Assuming that error_append_hint() isn't for adding more explanation,
> IMHO the doc does not adequately explain what it is for.

You're right, it doesn't.

> I have also failed to find any hint in qapi/error.h which is AFAIU
> documenting the error api about this human-readable explanation
> appended to an existing error by error_append_hint() is to be discarded
> if the error is reported in QMP context.
>
> Am I reading the api doc incorrectly, or did the documentation and
> de-facto api diverge (behavior)?

I added documentation after I inherited this su

Re: [Qemu-devel] [PATCH v4 11/13] virtio-console: chardev hotswap support

2017-07-03 Thread Amit Shah

On (Mon) 03 Jul 2017 [14:50:07], Anton Nefedov wrote:
> On 06/29/2017 01:02 PM, Marc-André Lureau wrote:
> > Hi
> > 
> > Looks good, but please write something in the commit message about what 
> > needs to be done for be-change (what this patch does).
> > 
> > thanks
> > 
> 
> Hi,
> 
> thank you! I guess the description should look like
> 
>   virtio-console: chardev hotswap support
> 
>   In case of a backend change, the handler functions and the watch have
>   to be reset.
>   Also, avoid unsafe qemu_chr_fe_get_driver() usage even though the pointer
>   is not really stored.
> 
> 
> 
> Amit, have you had a chance to kindly look at this?

If it gets a reviewed-by by someone who's looked at the series,
Michael could pull it all in.

I could look at it if you CC'ed me on the series (hint ;)

Amit
-- 
http://log.amitshah.net/

Re: [Qemu-devel] [PATCH] include/hw/ptimer.h: Add documentation comments

2017-07-03 Thread Philippe Mathieu-Daudé

On Mon, Jul 3, 2017 at 12:13 PM, Peter Maydell  wrote:
> Add documentation comments describing the public API of the
> ptimer countdown timer.
>
> Signed-off-by: Peter Maydell 

Reviewed-by: Philippe Mathieu-Daudé 

> ---
> I was trying to write a timer device and discovered that the ptimer
> API wasn't actually documented, so I wrote some basic notes for it...

I used to believe this file had no comments on purpose, feeling hazed
after reading "only the source code tells the full story" from the
GettingStartedDevelopers wiki entry.

/me gives Peter a big hug!

>
>  include/hw/ptimer.h | 120 
> 
>  1 file changed, 120 insertions(+)
>
> diff --git a/include/hw/ptimer.h b/include/hw/ptimer.h
> index eafc3f0..fc4ef5c 100644
> --- a/include/hw/ptimer.h
> +++ b/include/hw/ptimer.h
> @@ -12,6 +12,20 @@
>  #include "qemu/timer.h"
>  #include "migration/vmstate.h"
>
> +/* The ptimer API implements a simple periodic countdown timer.
> + * The countdown timer has a value (which can be read and written via
> + * ptimer_get_count() and ptimer_set_count()). When it is enabled
> + * using ptimer_run(), the value will count downwards at the frequency
> + * which has been configured using ptimer_set_period() or ptimer_set_freq().
> + * When it reaches zero it will trigger a QEMU bottom half handler, and
> + * can be set to either reload itself from a specified limit value
> + * and keep counting down, or to stop (as a one-shot timer).
> + *
> + * Forgetting to set the period/frequency (or setting it to zero) is a
> + * bug in the QEMU device and will cause warning messages to be printed
> + * to stderr when the guest attempts to enable the timer.
> + */
> +
>  /* The default ptimer policy retains backward compatibility with the legacy
>   * timers. Custom policies are adjusting the default one. Consider providing
>   * a correct policy for your timer.
> @@ -59,15 +73,121 @@
>  typedef struct ptimer_state ptimer_state;
>  typedef void (*ptimer_cb)(void *opaque);
>
> +/**
> + * ptimer_init - Allocate and return a new ptimer
> + * @bh: QEMU bottom half which is run on timer expiry
> + * @policy: PTIMER_POLICY_* bits specifying behaviour
> + *
> + * The ptimer returned must be freed using ptimer_free().
> + * The ptimer takes ownership of @bh and will delete it
> + * when the ptimer is eventually freed.
> + */
>  ptimer_state *ptimer_init(QEMUBH *bh, uint8_t policy_mask);
> +
> +/**
> + * ptimer_free - Free a ptimer
> + * @s: timer to free
> + *
> + * Free a ptimer created using ptimer_init() (including
> + * deleting the bottom half which it is using).
> + */
>  void ptimer_free(ptimer_state *s);
> +
> +/**
> + * ptimer_set_period - Set counter increment interval in nanoseconds
> + * @s: ptimer to configure
> + * @period: period of the counter in nanoseconds
> + *
> + * Note that if your counter behaviour is specified as having a
> + * particular frequency rather than a period then ptimer_set_freq()
> + * may be more appropriate.
> + */
>  void ptimer_set_period(ptimer_state *s, int64_t period);

I like to use explicit unit in variable name, i.e. period_ns.

> +
> +/**
> + * ptimer_set_freq - Set counter frequency in Hz
> + * @s: ptimer to configure
> + * @freq: counter frequency in Hz
> + *
> + * This does the same thing as ptimer_set_period(), so you only
> + * need to call one of them. If the counter behaviour is specified
> + * as setting the frequency then this function is more appropriate,
> + * because it allows specifying an effective period which is
> + * precise to fractions of a nanosecond, avoiding rounding errors.
> + */
>  void ptimer_set_freq(ptimer_state *s, uint32_t freq);
> +
> +/**
> + * ptimer_get_limit - Get the configured limit of the ptimer
> + * @s: ptimer to query
> + *
> + * This function returns the current limit (reload) value
> + * of the down-counter; that is, the value which it will be
> + * reset to when it hits zero.
> + *
> + * Generally timer devices using ptimers should be able to keep
> + * their reload register state inside the ptimer using the get
> + * and set limit functions rather than needing to also track it
> + * in their own state structure.
> + */
>  uint64_t ptimer_get_limit(ptimer_state *s);
> +
> +/**
> + * ptimer_set_limit - Set the limit of the ptimer
> + * @s: ptimer
> + * @limit: initial countdown value
> + * @reload: if nonzero, then reset the counter to the new limit
> + *
> + * Set the limit value of the down-counter. The @reload flag can
> + * be used to emulate the behaviour of timers which immediately
> + * reload the counter when their reload register is written to.
> + */
>  void ptimer_set_limit(ptimer_state *s, uint64_t limit, int reload);
> +
> +/**
> + * ptimer_get_count - Get the current value of the ptimer
> + * @s: ptimer
> + *
> + * Return the current value of the down-counter. This will
> + * return the correct value whether the counter is enabled or
> + * disabled.
> + */
>  uint64_t ptimer_get_count(pt

Re: [Qemu-devel] [PATCH] spapr: fix memory hotplug error path

2017-07-03 Thread Bharata B Rao

On Tue, Jul 04, 2017 at 09:01:43AM +0530, Bharata B Rao wrote:
> On Mon, Jul 03, 2017 at 02:21:31PM +0200, Greg Kurz wrote:
> > QEMU shouldn't abort if spapr_add_lmbs()->spapr_drc_attach() fails.
> > Let's propagate the error instead, like it is done everywhere else
> > where spapr_drc_attach() is called.
> > 
> > Signed-off-by: Greg Kurz 
> > ---
> >  hw/ppc/spapr.c |   10 --
> >  1 file changed, 8 insertions(+), 2 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 70b3fd374e2b..e103be500189 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -2601,6 +2601,7 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t 
> > addr_start, uint64_t size,
> >  int i, fdt_offset, fdt_size;
> >  void *fdt;
> >  uint64_t addr = addr_start;
> > +Error *local_err = NULL;
> > 
> >  for (i = 0; i < nr_lmbs; i++) {
> >  drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
> > @@ -2611,7 +2612,12 @@ static void spapr_add_lmbs(DeviceState *dev, 
> > uint64_t addr_start, uint64_t size,
> >  fdt_offset = spapr_populate_memory_node(fdt, node, addr,
> >  SPAPR_MEMORY_BLOCK_SIZE);
> > 
> > -spapr_drc_attach(drc, dev, fdt, fdt_offset, errp);
> > +spapr_drc_attach(drc, dev, fdt, fdt_offset, &local_err);
> > +if (local_err) {
> > +g_free(fdt);
> > +error_propagate(errp, local_err);
> > +return;
> > +}
> 
> There is some history to this. I was doing error recovery and propagation
> here similarly during memory hotplug development phase until Igor
> suggested that we shoudn't try to recover after we have done guest
> visible changes.
> 
> Refer to "changes in v6" section in this post:
> https://lists.gnu.org/archive/html/qemu-ppc/2015-06/msg00296.html
> 
> However at that time we were doing memory add by DRC index method
> and hence would attach and online one LMB at a time.
> In that method, if an intermediate attach fails we would end up with a few
> LMBs being onlined by the guest already. However subsequently
> we have switched (optionally, based on dedicated_hp_event_source) to
> count-indexed method of hotplug where we do attach of all LMBs one by one
> and then request the guest to hotplug all of them at once using count-indexed
> method.
> 
> So it will be a bit tricky to abort for index based case and recover
> correctly for count-indexed case.

Looked at the code again and realized that though we started with
index based LMB addition, we later switched to count based addition. Then
we added support for count-indexed type subject to the presence
of dedidated hotplug event source while still retaining the support
for count based addition.

So presently we do attach of all LMBs one by one and then do onlining
(count based or count-indexed based) once. Hence error recovery
for both cases would be similar now. So I guess you should take care of
undoing pc_dimm_memory_plug() like Igor mentioned and also undo the
effects of partial successful attaches.

> 
> Regards,
> Bharata.

Re: [Qemu-devel] [PATCH 1/1] virtio-scsi-ccw: use ioeventfd even when KVM is disabled

2017-07-03 Thread QingFeng Hao




在 2017/7/3 19:48, Cornelia Huck 写道:

On Mon,  3 Jul 2017 09:38:36 +0200
QingFeng Hao  wrote:


Do not check kvm_eventfds_enabled() when KVM is disabled since it
always returns 0.  Since commit
8c56c1a592b5092d91da8d8943c1d6462a6f ("memory: emulate
ioeventfd") it has been possible to use ioeventfds in qtest or TCG
mode.

This patch makes -device virtio-scsi-ccw,iothread=iothread0 work even
when KVM is disabled.

I have tested that virtio-scsi-ccw works under tcg both with and
without iothread.

This patch fixes qemu-iotests 068, which was accidentally merged early
despite the dependency on ioeventfd.

Signed-off-by: QingFeng Hao 
Signed-off-by: Stefan Hajnoczi 
---
  hw/s390x/virtio-ccw.c | 2 +-
  target/s390x/kvm.c| 3 +++
  2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index 90d37cb9ff..35896eb007 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -711,7 +711,7 @@ static void
virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp)
sch->cssid, sch->ssid, sch->schid, sch->devno, ccw_dev->devno.valid ?
"user-configured" : "auto-configured");
-if (!kvm_eventfds_enabled()) {
+if (kvm_enabled() && !kvm_eventfds_enabled()) {
  dev->flags &= ~VIRTIO_CCW_FLAG_USE_IOEVENTFD;
  }
  
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c

index a3d00196f4..c37f9c3b9e 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -2220,6 +2220,9 @@ int
kvm_s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t
sch, .addr = sch, .len = 8,
  };
+if (!kvm_enabled()) {
+return 0;
+}

I'd prefer if you moved the kvm_enabled() check into
s390_assign_subch_ioeventfd().

Thanks and I'll change it just as Christian's comment.



  if (!kvm_check_extension(kvm_state, KVM_CAP_IOEVENTFD)) {
  return -ENOSYS;
  }


--
Regards
QingFeng Hao

Re: [Qemu-devel] [PATCH v2 1/1] virtio-scsi-ccw: use ioeventfd even when KVM is disabled

2017-07-03 Thread QingFeng Hao




在 2017/7/3 18:20, Christian Borntraeger 写道:

On 07/03/2017 10:51 AM, QingFeng Hao wrote:

This patch is based on a similar patch from Stefan Hajnoczi -
commit c324fd0a39c (" virtio-pci: use ioeventfd even when KVM is disabled)

Do not check kvm_eventfds_enabled() when KVM is disabled since it
always returns 0.  Since commit 8c56c1a592b5092d91da8d8943c1d6462a6f
("memory: emulate ioeventfd") it has been possible to use ioeventfds in
qtest or TCG mode.

This patch makes -device virtio-scsi-ccw,iothread=iothread0 work even
when KVM is disabled.

I have tested that virtio-scsi-ccw works under tcg both with and without
iothread.

This patch fixes qemu-iotests 068, which was accidentally merged early
despite the dependency on ioeventfd.

Signed-off-by: QingFeng Hao 
---
  hw/s390x/virtio-ccw.c | 2 +-
  target/s390x/kvm.c| 3 +++
  2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index 90d37cb9ff..35896eb007 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -711,7 +711,7 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, 
Error **errp)
  sch->cssid, sch->ssid, sch->schid, sch->devno,
  ccw_dev->devno.valid ? "user-configured" : "auto-configured");

-if (!kvm_eventfds_enabled()) {
+if (kvm_enabled() && !kvm_eventfds_enabled()) {
  dev->flags &= ~VIRTIO_CCW_FLAG_USE_IOEVENTFD;
  }

diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index a3d00196f4..c37f9c3b9e 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -2220,6 +2220,9 @@ int kvm_s390_assign_subch_ioeventfd(EventNotifier 
*notifier, uint32_t sch,
  .addr = sch,
  .len = 8,
  };
+if (!kvm_enabled()) {
+return 0;
+}

thinking more about it. wouldnt it be better to do something like this instead

diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index 058ddad..cc47831 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -1240,7 +1240,11 @@ static inline int 
s390_assign_subch_ioeventfd(EventNotifier *notifier,
uint32_t sch_id, int vq,
bool assign)
  {
-return kvm_s390_assign_subch_ioeventfd(notifier, sch_id, vq, assign);
+if (kvm_enabled()) {
+return kvm_s390_assign_subch_ioeventfd(notifier, sch_id, vq, assign);
+} else {
+return 0;
+}
  }

Thanks. It makes sense. I'll change it.

  static inline void s390_crypto_reset(void)


FWIW, it seems that we (s390) do not have a functional equivalent function as 
commit
8c56c1a592b5092d91da8d8943c1d6462a6f ("memory: emulate ioeventfd") , so we 
will
not use the iothreads.
Ok, but might s390 have skipped the iothread arguments and passed the 
test, so we could

still keep this test case?



  if (!kvm_check_extension(kvm_state, KVM_CAP_IOEVENTFD)) {
  return -ENOSYS;
  }



--
Regards
QingFeng Hao

Re: [Qemu-devel] [PATCH] spapr: fix memory hotplug error path

2017-07-03 Thread Bharata B Rao

On Mon, Jul 03, 2017 at 02:21:31PM +0200, Greg Kurz wrote:
> QEMU shouldn't abort if spapr_add_lmbs()->spapr_drc_attach() fails.
> Let's propagate the error instead, like it is done everywhere else
> where spapr_drc_attach() is called.
> 
> Signed-off-by: Greg Kurz 
> ---
>  hw/ppc/spapr.c |   10 --
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 70b3fd374e2b..e103be500189 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2601,6 +2601,7 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t 
> addr_start, uint64_t size,
>  int i, fdt_offset, fdt_size;
>  void *fdt;
>  uint64_t addr = addr_start;
> +Error *local_err = NULL;
> 
>  for (i = 0; i < nr_lmbs; i++) {
>  drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
> @@ -2611,7 +2612,12 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t 
> addr_start, uint64_t size,
>  fdt_offset = spapr_populate_memory_node(fdt, node, addr,
>  SPAPR_MEMORY_BLOCK_SIZE);
> 
> -spapr_drc_attach(drc, dev, fdt, fdt_offset, errp);
> +spapr_drc_attach(drc, dev, fdt, fdt_offset, &local_err);
> +if (local_err) {
> +g_free(fdt);
> +error_propagate(errp, local_err);
> +return;
> +}

There is some history to this. I was doing error recovery and propagation
here similarly during memory hotplug development phase until Igor
suggested that we shoudn't try to recover after we have done guest
visible changes.

Refer to "changes in v6" section in this post:
https://lists.gnu.org/archive/html/qemu-ppc/2015-06/msg00296.html

However at that time we were doing memory add by DRC index method
and hence would attach and online one LMB at a time.
In that method, if an intermediate attach fails we would end up with a few
LMBs being onlined by the guest already. However subsequently
we have switched (optionally, based on dedicated_hp_event_source) to
count-indexed method of hotplug where we do attach of all LMBs one by one
and then request the guest to hotplug all of them at once using count-indexed
method.

So it will be a bit tricky to abort for index based case and recover
correctly for count-indexed case.

Regards,
Bharata.

Re: [Qemu-devel] [PATCH v2 0/7] KVM: MMU: fast write protect

2017-07-03 Thread Xiao Guangrong




On 07/03/2017 11:47 PM, Paolo Bonzini wrote:



On 03/07/2017 16:39, Xiao Guangrong wrote:



On 06/20/2017 05:15 PM, guangrong.x...@gmail.com wrote:

From: Xiao Guangrong 

Changelog in v2:
thanks to Paolo's review, this version disables write-protect-all if
PML is supported


Hi Paolo,

Do you have time to have a look at this new version? ;)
Or I should wait until the patchset of dirty ring-buffer is merged?


I will look at it soon, but I still plan to merge dirty ring buffer first.

Thanks for your understanding,


Sure, i fully understand, thank you for bearing my push. :)

Re: [Qemu-devel] [PATCH v2 06/15] kvmvapic: remove tcg related code

2017-07-03 Thread Zhong Yang

On Mon, Jul 03, 2017 at 04:28:00PM +0200, Paolo Bonzini wrote:
> 
> 
> On 03/07/2017 12:12, Yang Zhong wrote:
> > Since Paolo's below patch has fixed A20 issue
> > commit bbfa326fc8028e275eddf8c9965c2a1b59405b2e
> > target/i386: enable A20 automatically in system management mod
> 
> How is this patch related to kvmvapic?  It fixes a bug with vapic
> disabled, but it's not a reason to disable kvmvapic.
> 
> Paolo
> 
  Hello Paolo,

  In that time, in order to verify the tcg code is not useful in kvmvapic.c 
  file, we disabled the kvmvapic rom in the seabios and use the tcg accelator 
  to boot guest image, we found this A20 bug.

  Once this A20 bug has been fixed, we can use the tcg to bootup the guest
  image without kvmvapic rom, which show the tcg code in kvmvapic is not useful.
  This is reason why i removed the tcg code replace of using tcg_enabled() to 
check
  the tcg code in kvmvapic.c. thanks!

  Regards,

  Yang  

> > The tcg code in kvmvapic.c is NOT useful, those code need remove.
> > 
> > Signed-off-by: Yang Zhong 
> > ---
> >  hw/i386/kvmvapic.c | 24 
> >  1 file changed, 24 deletions(-)

Re: [Qemu-devel] [PATCH 1/3] i386/msi: Correct mask of destination ID in MSI address

2017-07-03 Thread Peter Xu

On Thu, Jun 29, 2017 at 01:49:52AM -0400, Lan Tianyu wrote:
> From: Chao Gao 
> 
> According to SDM 10.11.1, only [19:12] bits of MSI address are
> Destination ID, change the mask to avoid ambiguity for VT-d spec
> has used the bit 4 to indicate a remappable interrupt request.
> 
> Signed-off-by: Chao Gao 
> Signed-off-by: Lan Tianyu 

Reviewed-by: Peter Xu 

> ---
>  include/hw/i386/apic-msidef.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/include/hw/i386/apic-msidef.h b/include/hw/i386/apic-msidef.h
> index 8b4d4cc..420b411 100644
> --- a/include/hw/i386/apic-msidef.h
> +++ b/include/hw/i386/apic-msidef.h
> @@ -26,6 +26,6 @@
>  
>  #define MSI_ADDR_DEST_ID_SHIFT  12
>  #define MSI_ADDR_DEST_IDX_SHIFT 4
> -#define  MSI_ADDR_DEST_ID_MASK  0x000
> +#define  MSI_ADDR_DEST_ID_MASK  0x000ff000
>  
>  #endif /* HW_APIC_MSIDEF_H */
> -- 
> 1.8.3.1
> 
> 

-- 
Peter Xu

Re: [Qemu-devel] [PATCH] i386/kvm: mask MSR_IA32_BNDCFGS if MPX is not enabled in guest cpuid

2017-07-03 Thread Haozhong Zhang

On 07/03/17 17:45 +0200, Paolo Bonzini wrote:
> 
> 
> On 03/07/2017 17:23, Haozhong Zhang wrote:
> > Otherwise, QEMU on a host with MPX support will try to set guest
> > MSR_IA32_BNDCFGS although guest MPX is not enabled, and result in
> > abort.
> > 
> > For example,
> >qemu-system-x86_64 -enable-kvm -cpu qemu64,-mpx ...
> > aborts with messages:
> >qemu-system-x86_64: error: failed to set MSR 0xd90 to 0x0
> >qemu-system-x86_64: /root/qemu.git/target/i386/kvm.c:1832: kvm_put_msrs: 
> > Assertion `ret == cpu->kvm_msr_buf->nmsrs' failed
> 
> I think the fix should be in KVM, allowing BNDCFGS = 0 if
> host_initiated, even if MPX is not enabled.
> 

Agree. I've sent a KVM patch to fix this issue.

Thanks,
Haozhong

Re: [Qemu-devel] [PATCH 3/4] doc: add item for "-M enforce-config-section"

2017-07-03 Thread Peter Xu

On Mon, Jul 03, 2017 at 02:07:12PM -0300, Eduardo Habkost wrote:
> On Mon, Jul 03, 2017 at 10:44:07AM +0800, Peter Xu wrote:
> > It's never documented, and now we have one more parameter for it (which
> > means this one can be obsolete in the future). Document it properly.
> > 
> > Although now when enforce-config-section is set, it'll override the
> > other "-global" parameter, that is not necessarily a rule. Forbid that
> > usage in the document.
> > 
> > Suggested-by: Eduardo Habkost 
> > Signed-off-by: Peter Xu 
> > ---
> >  qemu-options.hx | 8 
> >  1 file changed, 8 insertions(+)
> > 
> > diff --git a/qemu-options.hx b/qemu-options.hx
> > index 297bd8a..927c51f 100644
> > --- a/qemu-options.hx
> > +++ b/qemu-options.hx
> > @@ -85,6 +85,14 @@ Enables or disables NVDIMM support. The default is off.
> >  @item s390-squash-mcss=on|off
> >  Enables or disables squashing subchannels into the default css.
> >  The default is off.
> > +@item enforce-config-section=on|off
> > +Decides whether we will send the configuration section when doing
> > +migration. By default, it is turned on. We can set this to off to
> > +explicitly disable it.
> [...]
> 
> Wait, isn't it off by default?
> 
> This seems to imply that "-machine enforce-config-section=on" would have
> no effect at all, as the option would be already on by default.  This is
> not the case.
> 
> I suggest rewriting this as:
> 
>   If set to "on, force migration code to send configuration section even
>   if the machine-type sets the "migration.send-configuration" property
>   to "off".
>   Note: this parameter is obsolete, please use "-global
>   migration.send-configuration=on|off" instead.
>   Behavior is undefined if "enforce-config-section" and "-global
>   migration.send-configuration" are used together.
> 
> (Note: we probably should use proper markup (@option/@var/@samp?)
> instead of quotes above, to format the option names properly in the
> generated documentation.)

Yes, you are right.  How's this one? (markup used this time)

If @option{enforce-config-section} is set to @var{on}, force migration
code to send configuration section even if the machine-type sets the
@option{migration.send-configuration} property to @var{off}.
@option{enforce-config-section} cannot be used together with
@option{-global} @option{migration.send-configuration}. Behavior is
undefined if @option{enforce-config-section} and @option{-global}
@option{migration.send-configuration} are used together.

NOTE: this parameter is obsolete. Please use @option{-global}
@option{migration.send-configuration}=@var{on|off} instead.

Thanks,

-- 
Peter Xu

Re: [Qemu-devel] [PATCH 2/3] xen-pt: bind/unbind interrupt remapping format MSI

2017-07-03 Thread Lan Tianyu

Hi Anthony:

On 2017年06月30日 23:48, Anthony PERARD wrote:
> On Thu, Jun 29, 2017 at 01:49:53AM -0400, Lan Tianyu wrote:
>> From: Chao Gao 
>>
>> If a vIOMMU is exposed to guest, guest will configure the msi to remapping
>> format. The original code isn't suitable to the new format. A new pair
>> bind/unbind interfaces are added for this usage. This patch recognizes
>> this case and uses new interfaces to bind/unbind msi.
>>
>> Signed-off-by: Chao Gao 
>> Signed-off-by: Lan Tianyu 
> 
> Hi,
> 
> The patch series is going to need to be rebased on top of QEMU upstream.
> For starter, configure have changed a bit.

Thanks for your reminder. Will do that.


> 
>> ---
>>  configure | 54 
>> +++
>>  hw/xen/xen_pt_msi.c   | 50 ---
>>  include/hw/i386/apic-msidef.h |  1 +
>>  include/hw/xen/xen_common.h   | 25 
>>  4 files changed, 117 insertions(+), 13 deletions(-)
>>
>> diff --git a/configure b/configure
>> index 476210b..b3ac49f 100755
>> --- a/configure
>> +++ b/configure
>> @@ -1982,6 +1982,60 @@ EOF
>>  /*
>>   * If we have stable libs the we don't want the libxc compat
>>   * layers, regardless of what CFLAGS we may have been given.
>> + */
>> +#undef XC_WANT_COMPAT_EVTCHN_API
>> +#undef XC_WANT_COMPAT_GNTTAB_API
>> +#undef XC_WANT_COMPAT_MAP_FOREIGN_API
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#if !defined(HVM_MAX_VCPUS)
>> +# error HVM_MAX_VCPUS not defined
>> +#endif
>> +int main(void) {
>> +  xc_interface *xc = NULL;
>> +  xenforeignmemory_handle *xfmem;
>> +  xenevtchn_handle *xe;
>> +  xengnttab_handle *xg;
>> +  xen_domain_handle_t handle;
>> +  xengnttab_grant_copy_segment_t* seg = NULL;
>> +
>> +  xs_daemon_open();
>> +
>> +  xc = xc_interface_open(0, 0, 0);
>> +  xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0);
>> +  xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0);
>> +  xc_hvm_inject_msi(xc, 0, 0xf000, 0x);
>> +  xc_hvm_create_ioreq_server(xc, 0, HVM_IOREQSRV_BUFIOREQ_ATOMIC, NULL);
>> +  xc_domain_create(xc, 0, handle, 0, NULL, NULL);
>> +
>> +  xfmem = xenforeignmemory_open(0, 0);
>> +  xenforeignmemory_map(xfmem, 0, 0, 0, 0, 0);
>> +
>> +  xe = xenevtchn_open(0, 0);
>> +  xenevtchn_fd(xe);
>> +
>> +  xg = xengnttab_open(0, 0);
>> +  xengnttab_grant_copy(xg, 0, seg);
>> +
>> +  xc_domain_update_msi_irq_remapping(xc, 0, 0, 0, 0, 0 ,0);
>> +
>> +  return 0;
>> +}
>> +EOF
>> +  compile_prog "" "$xen_libs $xen_stable_libs"
>> +then
>> +xen_ctrl_version=4100
>> +xen=yes
> 
> There have been some change/refactoring in configure, so this won't
> work. The xen_ctrl_version got one more digit.
> 
> Can you try with this patch? Which is also simpler.

Sure. Thanks.

> diff --git a/configure b/configure
> index c571ad14e5..a06f2c0b92 100755
> --- a/configure
> +++ b/configure
> @@ -2021,6 +2021,24 @@ EOF
>  # Xen unstable
>  elif
>  cat > $TMPC < +#include 
> +int main(void) {
> +  xc_interface *xc = NULL;
> +
> +  xc_domain_update_msi_irq_remapping(xc, 0, 0, 0, 0, 0 ,0);
> +
> +  return 0;
> +}
> +EOF
> +compile_prog "" "$xen_libs -lxendevicemodel $xen_stable_libs"
> +  then
> +  xen_stable_libs="-lxendevicemodel $xen_stable_libs"
> +  xen_ctrl_version=41000
> +  xen=yes
> +
> +# Xen 4.9
> +elif
> +cat > $TMPC <  #undef XC_WANT_COMPAT_DEVICEMODEL_API
>  #define __XEN_TOOLS__
>  #include 
> 
> 
>> index 8e1580d..4ba43a8 100644
>> --- a/include/hw/xen/xen_common.h
>> +++ b/include/hw/xen/xen_common.h
>> @@ -438,4 +438,29 @@ static inline int xengnttab_grant_copy(xengnttab_handle 
>> *xgt, uint32_t count,
>>  }
>>  #endif
>>  
>> +/* Xen before 4.10 */
>> +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 4100
> 
> This will needs to be
> CONFIG_XEN_CTRL_INTERFACE_VERSION < 41000
> 

Will update.

-- 
Best regards
Tianyu Lan

Re: [Qemu-devel] [PATCH v7 0/9] Convert to realize and cleanup

2017-07-03 Thread Mao Zhongyi


Hi, Michael

On 07/04/2017 01:41 AM, Michael S. Tsirkin wrote:

On Tue, Jun 27, 2017 at 02:16:46PM +0800, Mao Zhongyi wrote:

This series mainly implements the conversions of pci-bridge devices
i82801b11, io3130_upstream/downstream and so on to realize(). Naturally
part of error messages need to be converted to Error, then propagate
to its callers via the argument errp, bonus clean related minor flaw
up. In short, the former patches are prerequisites for latter ones.



Applied, thanks!

Please remember to Cc maintainers on all patches, this was
missing on patch 3.


Thank you for your reminding. I remember.

Thanks,
Mao



v7:
* patch7: -drop the !local_err assert is really not appropriate,
   now revert it.[Marcel Apfelbaum]

v6:
* patch3: -rename the subject.
* patch6: -simplify the commit message.
  -use error_append_hint replace original error_setg rather
   than remove it directly. [Marcel Apfelbaum]
  -report the error message from local_err. [Marcel Apfelbaum]
v5:
* patch5: replace pci_add_capability2() with pci_add_capability(), because
  it's confusing to have a function named pci_add_capability2() if
  pci_add_capability() doesn't exist anymore. [Eduardo Habkost]
* patch8: a new patch that fix the return type of verify_irqchip_kernel().
* patch9: a new patch that use the errp directly instead of the local_err to
  propagate the error messages.

v4:
* patch4: changed from patch 5 in v3. use a elegant way to check
  the error, like

  function(...);
  if (function succeeded) {
 /* non-error code path here */
 foo = bar;
  }

  or

  function(...);
  if (function succeeded) {
  /* non-error code path here */
  foo = bar;
  } else {
  /* error path here */
  return ret;
  }

  for readability, instead of this:

  function(...)
  if (function failed) {
  return ...;  /* or: "goto out" */
  }

  /* non-error code path here */
  foo = bar; [Eduardo Habkost]

  meanwhile, split previous patch4 out. [Michael S. Tsirkin]
* patch5: a new patch that replace pci_add_capability() with
  pci_add_capability2(). [Eduardo Habkost]

v3:
* patch2: explain the specified means of the return value, also
  improve the commit message. [Marcel Apfelbaum]
* patch3: simplify the subject and commit message, fix another
  wrong assert. [Marcel Apfelbaum]
* patch4: adjust the subject.
* patch5: fix a wrong optimization for errp. [Eduardo Habkost]
* patch7: a new patch that converts shpc_init() to Error in order
  to propagate the error better.
v2:
* patch1: subject and commit message was rewrited by markus.
* patch2: comment was added to pci_add_capability2().
* patch3: a new patch that fix the wrong return value judgment condition.
* patch4: a new patch that fix code style problems.
* patch5: add an errp argument for pci_add_capability to pass
  error for its callers.
* patch6: convert part of pci-bridge device to realize.

v1:
* patch1: fix unreasonable return value check

Cc: m...@redhat.com
Cc: mar...@redhat.com
Cc: arm...@redhat.com
Cc: dmi...@daynix.com
Cc: jasow...@redhat.com
Cc: kra...@redhat.com
Cc: alex.william...@redhat.com
Cc: pbonz...@redhat.com
Cc: r...@twiddle.net
Cc: ehabk...@redhat.com

Mao Zhongyi (9):
  pci: Clean up error checking in pci_add_capability()
  pci: Add comment for pci_add_capability2()
  pci: Fix the wrong assertion.
  pci: Make errp the last parameter of pci_add_capability()
  pci: Replace pci_add_capability2() with pci_add_capability()
  pci: Convert to realize
  pci: Convert shpc_init() to Error
  i386/kvm/pci-assign: Fix return type of verify_irqchip_kernel()
  i386/kvm/pci-assign: Use errp directly rather than local_err

 hw/i386/amd_iommu.c| 24 -
 hw/i386/kvm/pci-assign.c   | 54 ++
 hw/ide/ich.c   |  2 +-
 hw/net/e1000e.c| 30 -
 hw/net/eepro100.c  | 18 ++---
 hw/pci-bridge/i82801b11.c  | 12 -
 hw/pci-bridge/pci_bridge_dev.c | 14 +-
 hw/pci-bridge/pcie_root_port.c | 18 ++---
 hw/pci-bridge/xio3130_downstream.c | 20 +++---
 hw/pci-bridge/xio3130_upstream.c   | 20 +++---
 hw/pci/msi.c   |  2 +-
 hw/pci/msix.c  |  2 +-
 hw/pci/pci.c   | 24 +++--
 hw/pci/pci_bridge.c|  8 --
 hw/pci/pcie.c  | 28 +++-
 hw/pci/shpc.c  | 10 ---
 hw/pci/slotid_cap.c| 12 ++---
 hw/usb/hcd-xhci.c  |  2 +-
 hw/vfio/pci.c  | 15 ++-
 hw/virtio/virtio-pci.c | 12 +++

Re: [Qemu-devel] [PATCH 2/4] vl: move global property, migrate init earlier

2017-07-03 Thread Peter Xu

On Mon, Jul 03, 2017 at 11:59:03AM -0300, Eduardo Habkost wrote:
> On Mon, Jul 03, 2017 at 10:44:06AM +0800, Peter Xu wrote:
> > Currently drive_init_func() may call migrate_get_current() while the
> > migrate object is still not ready yet at that time. Move the migration
> > object init earlier, along with the global properties, right after
> > acceleration init.
> > 
> > This fixes a breakage for iotest 055, which caused an assertion failure.
> > 
> > Reported-by: Max Reitz 
> > Reported-by: Philippe Mathieu-Daudé 
> > Fixes: 3df663 ("migration: move only_migratable to MigrationState")
> > Signed-off-by: Peter Xu 
> > ---
> >  vl.c | 24 
> >  1 file changed, 12 insertions(+), 12 deletions(-)
> > 
> > diff --git a/vl.c b/vl.c
> > index 0c497a3..2ae4313 100644
> > --- a/vl.c
> > +++ b/vl.c
> > @@ -4414,6 +4414,18 @@ int main(int argc, char **argv, char **envp)
> >  
> >  configure_accelerator(current_machine);
> >  
> > +/*
> > + * Register all the global properties, including accel properties,
> > + * machine properties, and user-specified ones.
> > + */
> > +register_global_properties(current_machine);
> > +
> > +/*
> > + * Migration object can only be created after global properties
> > + * are applied correctly.
> > + */
> > +migration_object_init();
> > +
> 
> So, things that might introduce bugs here are:
> 1) Unexpected qdev_prop_register_global() calls between this place and
>the original register_global_properties() call (that would now happen
>in a different order).
> 2) register_global_properties() seeing a different global property list
>because it is being called earlier.
>2.1) AccelClass::global_props is statically defined and will be the
> same here. Not a problem.
>2.2) MachineClass::compat_props: same as above.
>2.3) user-provided global properties: we need to ensure all
> properties in the "global" QemuOpts section are already
> available at this point.
> 
> 
> To ensure (1) is not a problem, we need to check all calls for
> qdev_prop_register_global().  The callers are:
> 
> * configure_rtc()
>   - Called very early, when parsing command-line options.  Not a problem.[1]
> * global_init_func()
>   * Called by user_register_global_props()
> * Called by register_global_properties().
>   - That's the code we're moving.  Not a problem.
> * QEMU_OPTION_rtc_td_hack case in main()
>   - Called very early, when parsing command-line options.  Not a problem.[1]
> * QEMU_OPTION_no_kvm_pit_reinjection case in main()
>   - Called very early, when parsing command-line options.  Not a problem.[1]
> * register_compat_prop()
>   * Called by machine_register_compat_props()
> * Called by register_global_properties().
>   - That's the code we're moving.  Not a problem.
>   * Called by machine_register_compat_for_subclass()
> * Called by machine_register_compat_props() (see above)
>   * Called by register_compat_props_array()
> * Called by accel_register_compat_props()
>   * Called by register_global_properties().
> - That's the code we're moving.  Not a problem.
> * qdev_prop_register_global_list()
>   - Used only by unit test code.
> * cpu_common_parse_features()
>   - Used when initializing CPUs, which is done much later, when
> machine_run_board_init() is called (or when -device is handled by
> device_init_func()).  Not a problem.[2]
> * x86_cpu_parse_featurestr()
>   - Same as above.
> 
> 
> To ensure (2.3) is not a problem, we need to look for references to
> qemu_find_opts("global") or qemu_global_opts.  They are:
> 
> * user_register_global_props()
>   - This is the code we're moving.  Not a problem.
> * default_driver_check() call at main()
>   - This happens earlier, before the code we're moving.  Not a problem.
> * qemu_global_option()
>   - Called very early, when parsing command-line options.  Not a problem.
> 
> 
> So the code reordering looks OK.
> 
> Reviewed-by: Eduardo Habkost 

I really appreciate for such an in-depth review on this patch.

> 
> 
> Notes about things we need to fix in the future:
> 
> [1] I think they should be replaced by qemu_global_option() calls, though.
> [2] This is a fragile portion of the global property code and should be
> eventually moved to the command-line parsing section of main().

Agree on both.

Thanks again!

-- 
Peter Xu

Re: [Qemu-devel] [PULL 01/21] virtio-net: enable configurable tx queue size

2017-07-03 Thread Wei Wang


On 07/04/2017 03:44 AM, Michael S. Tsirkin wrote:

From: Wei Wang 

This patch enables the virtio-net tx queue size to be configurable
between 256 (the default queue size) and 1024 by the user when the
vhost-user backend is used.

Currently, the maximum tx queue size for other backends is 512 due
to the following limitations:
- QEMU backend: the QEMU backend implementation in some cases may
send 1024+1 iovs to writev.
- Vhost_net backend: there are possibilities that the guest sends
a vring_desc of memory which crosses a MemoryRegion thereby
generating more than 1024 iovs after translation from guest-physical
address in the backend.

Signed-off-by: Wei Wang 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
  include/hw/virtio/virtio-net.h |  1 +
  hw/net/virtio-net.c| 32 ++--
  2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index 602b486..b81b6a4 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -36,6 +36,7 @@ typedef struct virtio_net_conf
  int32_t txburst;
  char *tx;
  uint16_t rx_queue_size;
+uint16_t tx_queue_size;
  uint16_t mtu;
  } virtio_net_conf;
  
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c

index 91eddaf..a1fc0db 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -34,8 +34,11 @@
  
  /* previously fixed value */

  #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
+#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
+
  /* for now, only allow larger queues; with virtio-1, guest can downsize */
  #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
+#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
  
  /*

   * Calculate the number of bytes up to and including the given 'field' of
@@ -1508,15 +1511,18 @@ static void virtio_net_add_queue(VirtIONet *n, int 
index)
  
  n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,

 virtio_net_handle_rx);
+
  if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
  n->vqs[index].tx_vq =
-virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer);
+virtio_add_queue(vdev, n->net_conf.tx_queue_size,
+ virtio_net_handle_tx_timer);
  n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
virtio_net_tx_timer,
&n->vqs[index]);
  } else {
  n->vqs[index].tx_vq =
-virtio_add_queue(vdev, 256, virtio_net_handle_tx_bh);
+virtio_add_queue(vdev, n->net_conf.tx_queue_size,
+ virtio_net_handle_tx_bh);
  n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
  }
  
@@ -1927,6 +1933,17 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)

  return;
  }
  
+if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||

+n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
+!is_power_of_2(n->net_conf.tx_queue_size)) {
+error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
+   "must be a power of 2 between %d and %d",
+   n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
+   VIRTQUEUE_MAX_SIZE);
+virtio_cleanup(vdev);
+return;
+}
+
  n->max_queues = MAX(n->nic_conf.peers.queues, 1);
  if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
  error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
@@ -1947,6 +1964,15 @@ static void virtio_net_device_realize(DeviceState *dev, 
Error **errp)
  error_report("Defaulting to \"bh\"");
  }
  
+/*

+ * Currently, backends other than vhost-user don't support 1024 queue
+ * size.
+ */
+if (n->net_conf.tx_queue_size == VIRTQUEUE_MAX_SIZE &&
+n->nic_conf.peers.ncs[0]->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
+n->net_conf.tx_queue_size = VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
+}
+
  for (i = 0; i < n->max_queues; i++) {
  virtio_net_add_queue(n, i);
  }
@@ -2106,6 +2132,8 @@ static Property virtio_net_properties[] = {
  DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
  DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
+DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
+   VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
  DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
  DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
   true),


Btw, users also expect the support of configuring the tx queue size could
be added to libvirt soon.

Best,
Wei

Re: [Qemu-devel] [PATCH RFC 0/6] q35: add acpi pci hotplug support

2017-07-03 Thread Alexander Bezzubikov

That is why I think we can consider a possibility of forgetting about ACPI
hot plug in pcie-pci bridge and use only SHPC (with some correcting work).
Especially since q35 is used only for 'modern' Windows guests and there're
no big problems with SHPC on Linux guests.

вт, 4 июля 2017 г. в 1:06, Alexander Bezzubikov :

> Tried it on Win7 Enterprise SP1 - SHPC works well,  _OSC patches aren't
> necessary (since pci-bridge has its own controller, I suppose).
> On Linux guests it works when adding device from CLI with -device, but OS
> seems to fail detecting the device when I add it with device_add from
> monitor.
> Also there're some issues with unplugging on Linux (haven't tested
> unplugging on WIndows yet). That's the news.
>
> 2017-07-03 21:29 GMT+03:00 Michael S. Tsirkin :
>
>> On Mon, Jul 03, 2017 at 09:26:33PM +0300, Marcel Apfelbaum wrote:
>> > On 03/07/2017 19:34, Michael S. Tsirkin wrote:
>> > > On Mon, Jul 03, 2017 at 02:27:11PM +0200, Igor Mammedov wrote:
>> > > > On Fri, 30 Jun 2017 10:25:05 +0300
>> > > > Marcel Apfelbaum  wrote:
>> > > >
>> > > > [...]
>> > > > >
>> > > > > So for the modern systems not supporting PCI ACPI hotplug
>> > > > > we don't need pci-bridges anyway, but for the older ones
>> > > > > the ACPI code of the pci-bridge will be loaded into the
>> > > > > ACPI namespace only if a pci-bridge is actually hot-plugged.
>> > > >
>> > > > just note that the set of 'older' guest OSes is limited to
>> > > > one that do not support SHPC (i.e. to EOLed WinXP & co)
>> > > > as for linux and more modern Windows SHPC hotplug should
>> > > > just work without our ACPI hack (which taxes low memory
>> > > > to keep acpi tables for bridges).
>> > > >
>> > > > So I'm in favor of Michael's suggestion to leave ACPI PCI
>> > > > only in PC machine for old WinXP guests and to keep Q35
>> > > > clean, where linux or newer Windows guests could just
>> > > > use standard SHPC.
>> > > >
>> > > > [...]
>> > >
>> > > I didn't realize windows actually supports SHPC for PCI.
>> >
>> > Me neither, if Igor is right I am all for shpc hotplug
>> > since Q35 is not supposed to support older guests.
>> >
>> > I remember I succeeded to enable shpc hotplug some time
>> > ago, but only for Linux guests.
>> >
>> > Igor, do you have some spec/doc on newer Windows OSes that confirm
>> > PCI shpc hotplug support?
>>
>> Just try it, easier than poking at specs which aren't always up to date.
>>
>> > >
>> > > Do they correctly set _OSC Arg3, bit offset 1?
>> > > SHPC Native Hot Plug control
>> > > The OS sets this bit to 1 to request control over PCI/PCI-X
>> Standard Hot-Plug Controller
>> > > (SHPC) hot plug. If the OS successfully receives control of this
>> feature, it must track and
>> > > update the status of hot plug slots and handle hot plug events as
>> described in the SHPC
>> > > Specification.
>> > > I was under impression they only set bit 0.
>> > >
>> >
>> > Alexandr, if modern Windows OSes do support shpc, it makes our
>> > job easier, can you please try to enable shpc hotplug?
>> >
>> > Thanks,
>> > Marcel
>>
>> No need to enable or even have a bridge for that at all -
>> set the bit in _OSC, see what does guest enable.
>>
>
>
>
> --
> Alexander Bezzubikov
>
-- 
Alexander Bezzubikov

Re: [Qemu-devel] [PATCH 4/4] target-m68k: add fscale, fgetman and fgetexp

2017-07-03 Thread Laurent Vivier

Le 03/07/2017 à 22:31, Richard Henderson a écrit :
> On 07/03/2017 12:50 PM, Laurent Vivier wrote:
>> fmove.x #0x0ABCDEF12345,%fp0
>> fgetman.x %fp0,%fp6
>> fp02.1518178707571747286191852003521627e-4938(raw
>> 0x0abcdef12345)
>> fp61.34103012886691431049257516861(raw
>> 0x3fffabcdef123450)
> 
> This one shows exactly what I was thinking about.
> 
>> So I guess the mantissa must be shifted to left until we have a 1 in the
>> explicit integer part bit?
> 
> Yes.
> 
> Please try fgetexp on this same input.

fmove.x #0x0ABCDEF12345,%fp0
fgetexp.x %fp0,%fp6
fp0(raw 0x0abcdef12345)
fp6-16403

> I suspect the answer is -16384 - clz64(val->l.lower).

It looks like -16383 - clz64(val->l.lower)

fmove.x #0x56789ABCDEF12345,%fp0
fgetexp.x %fp0,%fp6
fp0(raw 0x56789abcdef12345)
fp6-16384

fmove.x #0x86789ABCDEF12345,%fp0
fgetexp.x %fp0,%fp6
fp0(raw 0x86789abcdef12345)
fp6-16383

Thanks,
Laurent

[Qemu-devel] [PATCH v2 15/15] qemu-io: Relax 'alloc' now that block-status doesn't assert

2017-07-03 Thread Eric Blake

Previously, the alloc command required that input parameters be
sector-aligned and clamped to 32 bits, because the underlying
bdrv_is_allocated used a 32-bit parameter and asserted aligned
inputs.  But now that we have fixed block status to report a
64-bit bytes value, and to properly round requests on behalf of
guests, we can pass any values, and can use qemu-io to add
coverage that our rounding is correct regardless of the guest
alignment constraints.

Update iotest 177 to intentionally probe block status at
unaligned boundaries, which also required tweaking the image
prep to leave an unallocated portion to the image under test.

Signed-off-by: Eric Blake 

---
v2: new patch
---
 qemu-io-cmds.c | 13 -
 tests/qemu-iotests/177 | 11 +--
 tests/qemu-iotests/177.out | 18 +-
 3 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index e529b8f..84c2e47 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -1769,10 +1769,6 @@ static int alloc_f(BlockBackend *blk, int argc, char 
**argv)
 if (offset < 0) {
 print_cvtnum_err(offset, argv[1]);
 return 0;
-} else if (!QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)) {
-printf("%" PRId64 " is not a sector-aligned value for 'offset'\n",
-   offset);
-return 0;
 }

 if (argc == 3) {
@@ -1780,19 +1776,10 @@ static int alloc_f(BlockBackend *blk, int argc, char 
**argv)
 if (count < 0) {
 print_cvtnum_err(count, argv[2]);
 return 0;
-} else if (count > INT_MAX * BDRV_SECTOR_SIZE) {
-printf("length argument cannot exceed %llu, given %s\n",
-   INT_MAX * BDRV_SECTOR_SIZE, argv[2]);
-return 0;
 }
 } else {
 count = BDRV_SECTOR_SIZE;
 }
-if (!QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE)) {
-printf("%" PRId64 " is not a sector-aligned value for 'count'\n",
-   count);
-return 0;
-}

 remaining = count;
 sum_alloc = 0;
diff --git a/tests/qemu-iotests/177 b/tests/qemu-iotests/177
index f8ed8fb..36e3b87 100755
--- a/tests/qemu-iotests/177
+++ b/tests/qemu-iotests/177
@@ -51,7 +51,7 @@ echo "== setting up files =="
 TEST_IMG="$TEST_IMG.base" _make_test_img $size
 $QEMU_IO -c "write -P 11 0 $size" "$TEST_IMG.base" | _filter_qemu_io
 _make_test_img -b "$TEST_IMG.base"
-$QEMU_IO -c "write -P 22 0 $size" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "write -P 22 0 110M" "$TEST_IMG" | _filter_qemu_io

 # Limited to 64k max-transfer
 echo
@@ -82,6 +82,12 @@ $QEMU_IO -c "open -o $options,$limits blkdebug::$TEST_IMG" \
  -c "discard 8001 30M" | _filter_qemu_io

 echo
+echo "== block status smaller than alignment =="
+limits=align=4k
+$QEMU_IO -c "open -o $options,$limits blkdebug::$TEST_IMG" \
+-c "alloc 1 1" -c "alloc 0x6d0 1000" -c map | _filter_qemu_io
+
+echo
 echo "== verify image content =="

 function verify_io()
@@ -103,7 +109,8 @@ function verify_io()
 echo read -P 0 32M 32M
 echo read -P 22 64M 13M
 echo read -P $discarded 77M 29M
-echo read -P 22 106M 22M
+echo read -P 22 106M 4M
+echo read -P 11 110M 18M
 }

 verify_io | $QEMU_IO -r "$TEST_IMG" | _filter_qemu_io
diff --git a/tests/qemu-iotests/177.out b/tests/qemu-iotests/177.out
index 43a7778..8c05f69 100644
--- a/tests/qemu-iotests/177.out
+++ b/tests/qemu-iotests/177.out
@@ -5,8 +5,8 @@ Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728
 wrote 134217728/134217728 bytes at offset 0
 128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 
backing_file=TEST_DIR/t.IMGFMT.base
-wrote 134217728/134217728 bytes at offset 0
-128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 115343360/115343360 bytes at offset 0
+110 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)

 == constrained alignment and max-transfer ==
 wrote 131072/131072 bytes at offset 1000
@@ -26,6 +26,12 @@ wrote 33554432/33554432 bytes at offset 33554432
 discard 31457280/31457280 bytes at offset 8001
 30 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)

+== block status smaller than alignment ==
+1/1 bytes allocated at offset 1 bytes
+16/1000 bytes allocated at offset 110 MiB
+110 MiB (0x6e0) bytes allocated at offset 0 bytes (0x0)
+18 MiB (0x120) bytes not allocated at offset 110 MiB (0x6e0)
+
 == verify image content ==
 read 1000/1000 bytes at offset 0
 1000 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
@@ -43,12 +49,14 @@ read 13631488/13631488 bytes at offset 67108864
 13 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 30408704/30408704 bytes at offset 80740352
 29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 23068672/23068672 bytes at offset 49056
-22 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4194304/4194304 bytes at offset 49056
+4 MiB, X ops; XX:XX:XX.X (XXX

[Qemu-devel] [PATCH v2 09/15] block: Switch bdrv_co_get_block_status() to byte-based

2017-07-03 Thread Eric Blake

We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Convert another internal
function (no semantic change), and as with its public counterpart,
rename to bdrv_co_block_status() to make the compiler enforce that
we catch all uses.  For now, we assert that callers still pass
aligned data, but ultimately, this will be the function where we
hand off to a byte-based driver callback, and will eventually need
to add logic to ensure we round calls according to the driver's
request_alignment then touch up the result handed back to the
caller, to start permitting a caller to pass unaligned offsets.

Signed-off-by: Eric Blake 

---
v2: rebase to earlier changes
---
 block/io.c | 90 --
 1 file changed, 53 insertions(+), 37 deletions(-)

diff --git a/block/io.c b/block/io.c
index 2662f37..2d324af 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1721,42 +1721,43 @@ int64_t coroutine_fn 
bdrv_co_get_block_status_from_backing(BlockDriverState *bs,
  * status; this is a hint that a larger 'pnum' result is more
  * important than including BDRV_BLOCK_OFFSET_VALID in the return.
  *
- * If 'sector_num' is beyond the end of the disk image the return value is
+ * If 'offset' is beyond the end of the disk image the return value is
  * BDRV_BLOCK_EOF and 'pnum' is set to 0.
  *
- * 'pnum' is set to the number of sectors (including and immediately following
- * the specified sector) that are known to be in the same
- * allocated/unallocated state.
+ * 'pnum' is set to the number of bytes (including and immediately following
+ * the specified offset) that are known to be in the same
+ * allocated/unallocated state.  It may be NULL.
  *
- * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
+ * 'bytes' is the max value 'pnum' should be set to.  If bytes goes
  * beyond the end of the disk image it will be clamped; if 'pnum' is set to
  * the end of the image, then the returned value will include BDRV_BLOCK_EOF.
  *
  * If returned value is positive, BDRV_BLOCK_OFFSET_VALID bit is set, and
- * 'file' is non-NULL, then '*file' points to the BDS which the sector range
- * is allocated in.
+ * 'file' is non-NULL, then '*file' points to the BDS which owns the
+ * allocated sector that contains offset.
  */
-static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
- bool allocation,
- int64_t sector_num,
- int nb_sectors, int *pnum,
- BlockDriverState **file)
+static int64_t coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
+ bool allocation,
+ int64_t offset, int64_t bytes,
+ int64_t *pnum,
+ BlockDriverState **file)
 {
-int64_t total_sectors;
-int64_t n;
+int64_t total_size;
+int64_t n; /* bytes */
 int64_t ret, ret2;
 BlockDriverState *local_file = NULL;
+int count; /* sectors */

 assert(pnum);
-total_sectors = bdrv_nb_sectors(bs);
-if (total_sectors < 0) {
+total_size = bdrv_getlength(bs);
+if (total_size < 0) {
 if (file) {
 *file = NULL;
 }
-return total_sectors;
+return total_size;
 }

-if (sector_num >= total_sectors) {
+if (offset >= total_size) {
 *pnum = 0;
 if (file) {
 *file = NULL;
@@ -1764,19 +1765,19 @@ static int64_t coroutine_fn 
bdrv_co_get_block_status(BlockDriverState *bs,
 return BDRV_BLOCK_EOF;
 }

-n = total_sectors - sector_num;
-if (n < nb_sectors) {
-nb_sectors = n;
+n = total_size - offset;
+if (n < bytes) {
+bytes = n;
 }

 if (!bs->drv->bdrv_co_get_block_status) {
-*pnum = nb_sectors;
+*pnum = bytes;
 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
-if (sector_num + nb_sectors == total_sectors) {
+if (offset + bytes == total_size) {
 ret |= BDRV_BLOCK_EOF;
 }
 if (bs->drv->protocol_name) {
-ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
+ret |= BDRV_BLOCK_OFFSET_VALID | (offset & BDRV_BLOCK_OFFSET_MASK);
 if (file) {
 *file = bs;
 }
@@ -1787,18 +1788,27 @@ static int64_t coroutine_fn 
bdrv_co_get_block_status(BlockDriverState *bs,
 }

 bdrv_inc_in_flight(bs);
-ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum,
+/*
+ * TODO: Rather than require aligned offsets, we could instead
+ * round to the driver's request_alignment here, then touch up
+ * count afterwards back to the caller's

[Qemu-devel] [PATCH v2 13/15] block: Convert bdrv_get_block_status_above() to bytes

2017-07-03 Thread Eric Blake

We are gradually moving away from sector-based interfaces, towards
byte-based.  In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.

Changing the name of the function from bdrv_get_block_status_above()
to bdrv_block_status_above() ensures that the compiler enforces that
all callers are updated.  For now, the io.c layer still assert()s
that all callers are sector-aligned, but that can be relaxed when a
later patch implements byte-based block status in the drivers.

For the most part this patch is just the addition of scaling at the
callers followed by inverse scaling at bdrv_block_status().  But some
code, particularly bdrv_block_status(), gets a lot simpler because
it no longer has to mess with sectors.

For ease of review, bdrv_get_block_status() was tackled separately.

Signed-off-by: Eric Blake 

---
v2: rebase to earlier changes
---
 include/block/block.h | 10 +-
 block/io.c| 39 ---
 block/mirror.c| 12 
 block/qcow2.c |  8 +++-
 qemu-img.c| 39 +++
 5 files changed, 43 insertions(+), 65 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index e3e6582..ed2ea69 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -424,11 +424,11 @@ bool bdrv_can_write_zeroes_with_unmap(BlockDriverState 
*bs);
 int64_t bdrv_block_status(BlockDriverState *bs, int64_t offset,
   int64_t bytes, int64_t *pnum,
   BlockDriverState **file);
-int64_t bdrv_get_block_status_above(BlockDriverState *bs,
-BlockDriverState *base,
-int64_t sector_num,
-int nb_sectors, int *pnum,
-BlockDriverState **file);
+int64_t bdrv_block_status_above(BlockDriverState *bs,
+BlockDriverState *base,
+int64_t offset,
+int64_t bytes, int64_t *pnum,
+BlockDriverState **file);
 int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
   int64_t *pnum);
 int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
diff --git a/block/io.c b/block/io.c
index 85353fa..91d3e99 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1903,7 +1903,7 @@ static int64_t coroutine_fn 
bdrv_co_block_status_above(BlockDriverState *bs,
 return ret;
 }

-/* Coroutine wrapper for bdrv_get_block_status_above() */
+/* Coroutine wrapper for bdrv_block_status_above() */
 static void coroutine_fn bdrv_block_status_above_co_entry(void *opaque)
 {
 BdrvCoBlockStatusData *data = opaque;
@@ -1950,43 +1950,20 @@ static int64_t 
bdrv_common_block_status_above(BlockDriverState *bs,
 return data.ret;
 }

-int64_t bdrv_get_block_status_above(BlockDriverState *bs,
-BlockDriverState *base,
-int64_t sector_num,
-int nb_sectors, int *pnum,
-BlockDriverState **file)
+int64_t bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
+int64_t offset, int64_t bytes, int64_t *pnum,
+BlockDriverState **file)
 {
-int64_t ret;
-int64_t n;
-
-ret = bdrv_common_block_status_above(bs, base, false,
- sector_num * BDRV_SECTOR_SIZE,
- nb_sectors * BDRV_SECTOR_SIZE,
- &n, file);
-if (ret < 0) {
-return ret;
-}
-assert(QEMU_IS_ALIGNED(n, BDRV_SECTOR_SIZE));
-*pnum = n >> BDRV_SECTOR_BITS;
-return ret;
+return bdrv_common_block_status_above(bs, base, false, offset, bytes,
+  pnum, file);
 }

 int64_t bdrv_block_status(BlockDriverState *bs,
   int64_t offset, int64_t bytes, int64_t *pnum,
   BlockDriverState **file)
 {
-int64_t ret;
-int n;
-
-assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE));
-assert(bytes <= BDRV_REQUEST_MAX_BYTES);
-ret = bdrv_get_block_status_above(bs, backing_bs(bs),
-  offset >> BDRV_SECTOR_BITS,
-  bytes >> BDRV_SECTOR_BITS, &n, file);
-if (pnum) {
-*pnum = n * BDRV_SECTOR_SIZE;
-}
-return ret;
+return bdrv_block_status_above(bs, backing_bs(bs),
+   offset, bytes, pnum, file);
 }

 int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, in

[Qemu-devel] [PATCH v2 07/15] qemu-img: Switch get_block_status() to byte-based

2017-07-03 Thread Eric Blake

We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Continue by converting
an internal function (no semantic change), and simplifying its
caller accordingly.

Signed-off-by: Eric Blake 

---
v2: no change
---
 qemu-img.c | 24 +++-
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/qemu-img.c b/qemu-img.c
index daba954..7b7f992 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -2674,14 +2674,16 @@ static void dump_map_entry(OutputFormat output_format, 
MapEntry *e,
 }
 }

-static int get_block_status(BlockDriverState *bs, int64_t sector_num,
-int nb_sectors, MapEntry *e)
+static int get_block_status(BlockDriverState *bs, int64_t offset,
+int64_t bytes, MapEntry *e)
 {
 int64_t ret;
 int depth;
 BlockDriverState *file;
 bool has_offset;
+int nb_sectors = bytes >> BDRV_SECTOR_BITS;

+assert(bytes < INT_MAX);
 /* As an optimization, we could cache the current range of unallocated
  * clusters in each file of the chain, and avoid querying the same
  * range repeatedly.
@@ -2689,8 +2691,8 @@ static int get_block_status(BlockDriverState *bs, int64_t 
sector_num,

 depth = 0;
 for (;;) {
-ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &nb_sectors,
-&file);
+ret = bdrv_get_block_status(bs, offset >> BDRV_SECTOR_BITS, nb_sectors,
+&nb_sectors, &file);
 if (ret < 0) {
 return ret;
 }
@@ -2710,7 +2712,7 @@ static int get_block_status(BlockDriverState *bs, int64_t 
sector_num,
 has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID);

 *e = (MapEntry) {
-.start = sector_num * BDRV_SECTOR_SIZE,
+.start = offset,
 .length = nb_sectors * BDRV_SECTOR_SIZE,
 .data = !!(ret & BDRV_BLOCK_DATA),
 .zero = !!(ret & BDRV_BLOCK_ZERO),
@@ -2840,16 +2842,12 @@ static int img_map(int argc, char **argv)

 length = blk_getlength(blk);
 while (curr.start + curr.length < length) {
-int64_t nsectors_left;
-int64_t sector_num;
-int n;
-
-sector_num = (curr.start + curr.length) >> BDRV_SECTOR_BITS;
+int64_t offset = curr.start + curr.length;
+int64_t n;

 /* Probe up to 1 GiB at a time.  */
-nsectors_left = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE) - sector_num;
-n = MIN(1 << (30 - BDRV_SECTOR_BITS), nsectors_left);
-ret = get_block_status(bs, sector_num, n, &next);
+n = QEMU_ALIGN_DOWN(MIN(1 << 30, length - offset), BDRV_SECTOR_SIZE);
+ret = get_block_status(bs, offset, n, &next);

 if (ret < 0) {
 error_report("Could not read file metadata: %s", strerror(-ret));
-- 
2.9.4

[Qemu-devel] [PATCH v2 08/15] block: Convert bdrv_get_block_status() to bytes

2017-07-03 Thread Eric Blake

We are gradually moving away from sector-based interfaces, towards
byte-based.  In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.

Changing the name of the function from bdrv_get_block_status() to
bdrv_block_status() ensures that the compiler enforces that all
callers are updated.  For now, the io.c layer still assert()s that
all callers are sector-aligned, but that can be relaxed when a later
patch implements byte-based block status in the drivers.

Note that we have an inherent limitation in the BDRV_BLOCK_* return
values: BDRV_BLOCK_OFFSET_VALID can only return the start of a
sector, even if we later relax the interface to query for the status
starting at an intermediate byte; document the obvious interpretation
that valid offsets are always sector-relative.

Therefore, for the most part this patch is just the addition of scaling
at the callers followed by inverse scaling at bdrv_block_status().  But
some code, particularly bdrv_is_allocated(), gets a lot simpler because
it no longer has to mess with sectors.

For ease of review, bdrv_get_block_status_above() will be tackled
separately.

Signed-off-by: Eric Blake 

---
v2: rebase to earlier changes
---
 include/block/block.h | 12 +++-
 block/io.c| 31 +++
 block/qcow2-cluster.c |  2 +-
 qemu-img.c| 20 +++-
 4 files changed, 38 insertions(+), 27 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index cc82a0d..e3e6582 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -138,8 +138,10 @@ typedef struct HDGeometry {
  *
  * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 (BDRV_BLOCK_OFFSET_MASK)
  * represent the offset in the returned BDS that is allocated for the
- * corresponding raw data; however, whether that offset actually contains
- * data also depends on BDRV_BLOCK_DATA and BDRV_BLOCK_ZERO, as follows:
+ * corresponding raw data.  Individual bytes are at the same sector-relative
+ * locations (and thus, this bit cannot be set for mappings which are
+ * not equivalent modulo 512).  However, whether that offset actually
+ * contains data also depends on BDRV_BLOCK_DATA, as follows:
  *
  * DATA ZERO OFFSET_VALID
  *  ttt   sectors read as zero, returned file is zero at offset
@@ -419,9 +421,9 @@ int bdrv_has_zero_init_1(BlockDriverState *bs);
 int bdrv_has_zero_init(BlockDriverState *bs);
 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs);
 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
-int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
-  int nb_sectors, int *pnum,
-  BlockDriverState **file);
+int64_t bdrv_block_status(BlockDriverState *bs, int64_t offset,
+  int64_t bytes, int64_t *pnum,
+  BlockDriverState **file);
 int64_t bdrv_get_block_status_above(BlockDriverState *bs,
 BlockDriverState *base,
 int64_t sector_num,
diff --git a/block/io.c b/block/io.c
index b3ba9af..2662f37 100644
--- a/block/io.c
+++ b/block/io.c
@@ -671,7 +671,6 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
 {
 int64_t target_size, ret, bytes, offset = 0;
 BlockDriverState *bs = child->bs;
-int n; /* sectors */

 target_size = bdrv_getlength(bs);
 if (target_size < 0) {
@@ -683,24 +682,23 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags 
flags)
 if (bytes <= 0) {
 return 0;
 }
-ret = bdrv_get_block_status(bs, offset >> BDRV_SECTOR_BITS,
-bytes >> BDRV_SECTOR_BITS, &n, NULL);
+ret = bdrv_block_status(bs, offset, bytes, &bytes, NULL);
 if (ret < 0) {
 error_report("error getting block status at offset %" PRId64 ": 
%s",
  offset, strerror(-ret));
 return ret;
 }
 if (ret & BDRV_BLOCK_ZERO) {
-offset += n * BDRV_SECTOR_BITS;
+offset += bytes;
 continue;
 }
-ret = bdrv_pwrite_zeroes(child, offset, n * BDRV_SECTOR_SIZE, flags);
+ret = bdrv_pwrite_zeroes(child, offset, bytes, flags);
 if (ret < 0) {
 error_report("error writing zeroes at offset %" PRId64 ": %s",
  offset, strerror(-ret));
 return ret;
 }
-offset += n * BDRV_SECTOR_SIZE;
+offset += bytes;
 }
 }

@@ -1956,13 +1954,22 @@ int64_t bdrv_get_block_status_above(BlockDriverState 
*bs,
   nb_sectors, pnum, file);
 }

-int64_t bdrv_get_block_status(BlockDriverState *bs,
-  int64_t sect

[Qemu-devel] [PATCH v2 06/15] block: Switch bdrv_make_zero() to byte-based

2017-07-03 Thread Eric Blake

We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Change the internal
loop iteration of zeroing a device to track by bytes instead of
sectors (although we are still guaranteed that we iterate by steps
that are sector-aligned).

Signed-off-by: Eric Blake 

---
v2: rebase to earlier changes
---
 block/io.c | 32 
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/block/io.c b/block/io.c
index 2377f3a..b3ba9af 100644
--- a/block/io.c
+++ b/block/io.c
@@ -669,38 +669,38 @@ int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
  */
 int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
 {
-int64_t target_sectors, ret, nb_sectors, sector_num = 0;
+int64_t target_size, ret, bytes, offset = 0;
 BlockDriverState *bs = child->bs;
-int n;
+int n; /* sectors */

-target_sectors = bdrv_nb_sectors(bs);
-if (target_sectors < 0) {
-return target_sectors;
+target_size = bdrv_getlength(bs);
+if (target_size < 0) {
+return target_size;
 }

 for (;;) {
-nb_sectors = MIN(target_sectors - sector_num, 
BDRV_REQUEST_MAX_SECTORS);
-if (nb_sectors <= 0) {
+bytes = MIN(target_size - offset, BDRV_REQUEST_MAX_BYTES);
+if (bytes <= 0) {
 return 0;
 }
-ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n, NULL);
+ret = bdrv_get_block_status(bs, offset >> BDRV_SECTOR_BITS,
+bytes >> BDRV_SECTOR_BITS, &n, NULL);
 if (ret < 0) {
-error_report("error getting block status at sector %" PRId64 ": 
%s",
- sector_num, strerror(-ret));
+error_report("error getting block status at offset %" PRId64 ": 
%s",
+ offset, strerror(-ret));
 return ret;
 }
 if (ret & BDRV_BLOCK_ZERO) {
-sector_num += n;
+offset += n * BDRV_SECTOR_BITS;
 continue;
 }
-ret = bdrv_pwrite_zeroes(child, sector_num << BDRV_SECTOR_BITS,
- n << BDRV_SECTOR_BITS, flags);
+ret = bdrv_pwrite_zeroes(child, offset, n * BDRV_SECTOR_SIZE, flags);
 if (ret < 0) {
-error_report("error writing zeroes at sector %" PRId64 ": %s",
- sector_num, strerror(-ret));
+error_report("error writing zeroes at offset %" PRId64 ": %s",
+ offset, strerror(-ret));
 return ret;
 }
-sector_num += n;
+offset += n * BDRV_SECTOR_SIZE;
 }
 }

-- 
2.9.4

Re: [Qemu-devel] [PATCH v2 3/3] block: add default implementations for bdrv_co_get_block_status()

2017-07-03 Thread Eric Blake

On 07/03/2017 01:31 PM, Eric Blake wrote:
>> The throttle driver I'm working on passes bdrv_co_get_block_status() to
>> bs->file. If there is a problem with an unused default function (it's
>> not static so will compile, but it might not be up to standard), you can
>> just remove it and I will reintroduce it when it's needed. CC me on
>> those patches when you send them if you can.
> 
> Sure.

Now posted:
https://lists.gnu.org/archive/html/qemu-devel/2017-07/msg00427.html

> The other thing I can do is have:
> 
> blkdebug_version() {
>assert(...);
>return common version;
> }

That's the approach I went with in 14/15 of that series.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

[Qemu-devel] [RFC PATCH v2 16/15] block: Add .bdrv_co_block_status() callback

2017-07-03 Thread Eric Blake

We are gradually moving away from sector-based interfaces, towards
byte-based. Now that the block layer exposes byte-based allocation,
it's time to tackle the drivers.  Add a new callback that operates
on as small as byte boundaries. Subsequent patches will then update
individual drivers, then finally remove .bdrv_co_get_block_status().
The old code now uses a goto in order to minimize churn at that later
removal.

The new code also passes through the 'allocation' hint, which will
allow subsequent patches to further optimize callers that only care
about how much of the image is allocated, rather than which offsets
the allocation actually maps to.

Note that most drivers give sector-aligned answers, except at
end-of-file, even when request_alignment is smaller than a sector.
However, bdrv_getlength() is sector-aligned (even though it gives a
byte answer), often by exceeding the actual file size.  If we were to
give back strict results, at least file-posix.c would report a
transition from DATA to HOLE at the end of a file even in the middle
of a sector, which can throw off callers; so we intentionally lie and
state that any partial sector at the end of a file has the same
status for the entire sector.

Signed-off-by: Eric Blake 

---
v2: improve alignment handling, add additional 'allocation' argument
for future optimization potential, ensure all iotests still pass

Sending as an RFC as part of my third series; this patch is technically
the start of my fourth series, but given the rebase churn I've had
elsewhere, it can't hurt to get the interface looked at in case it
needs tweaking
---
 include/block/block_int.h |  9 -
 block/io.c| 27 ---
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index 5f6ba5d..45ff534 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -172,13 +172,20 @@ struct BlockDriver {
  * bdrv_is_allocated[_above].  The driver should answer only
  * according to the current layer, and should not set
  * BDRV_BLOCK_ALLOCATED, but may set BDRV_BLOCK_RAW.  See block.h
- * for the meaning of _DATA, _ZERO, and _OFFSET_VALID.  The block
+ * for the meaning of _DATA, _ZERO, and _OFFSET_VALID.  As a hint,
+ * the flag allocation is true if the caller cares more about
+ * learning how much of the image is allocated, without regards to
+ * a breakdown by offset (a driver may either ignore the hint, or
+ * avoid _OFFSET_VALID to provide a larger *pnum).  The block
  * layer guarantees input aligned to request_alignment, as well as
  * non-NULL pnum and file.
  */
 int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs,
 int64_t sector_num, int nb_sectors, int *pnum,
 BlockDriverState **file);
+int64_t coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bd,
+bool allocation, int64_t offset, int64_t bytes, int64_t *pnum,
+BlockDriverState **file);

 /*
  * Invalidate any cached meta-data.
diff --git a/block/io.c b/block/io.c
index 5ed1ac7..00cdac1 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1771,7 +1771,7 @@ static int64_t coroutine_fn 
bdrv_co_block_status(BlockDriverState *bs,
 bytes = n;
 }

-if (!bs->drv->bdrv_co_get_block_status) {
+if (!bs->drv->bdrv_co_get_block_status && !bs->drv->bdrv_co_block_status) {
 *pnum = bytes;
 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
 if (offset + bytes == total_size) {
@@ -1791,11 +1791,14 @@ static int64_t coroutine_fn 
bdrv_co_block_status(BlockDriverState *bs,
 bdrv_inc_in_flight(bs);

 /* Round out to request_alignment boundaries */
-align = MAX(bs->bl.request_alignment, BDRV_SECTOR_SIZE);
+align = bs->bl.request_alignment;
+if (bs->drv->bdrv_co_get_block_status && align < BDRV_SECTOR_SIZE) {
+align = BDRV_SECTOR_SIZE;
+}
 aligned_offset = QEMU_ALIGN_DOWN(offset, align);
 aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;

-{
+if (bs->drv->bdrv_co_get_block_status) {
 int count; /* sectors */

 assert(QEMU_IS_ALIGNED(aligned_offset | aligned_bytes,
@@ -1808,8 +1811,26 @@ static int64_t coroutine_fn 
bdrv_co_block_status(BlockDriverState *bs,
 goto out;
 }
 *pnum = count * BDRV_SECTOR_SIZE;
+goto refine;
 }

+ret = bs->drv->bdrv_co_block_status(bs, false, aligned_offset,
+aligned_bytes, pnum, &local_file);
+if (ret < 0) {
+*pnum = 0;
+goto out;
+}
+
+/*
+ * total_size is always sector-aligned, by sometimes exceeding actual
+ * file size. Expand pnum if it lands mid-sector due to end-of-file.
+ */
+if (QEMU_ALIGN_UP(*pnum + aligned_offset,
+  BDRV_SECTOR_SIZE) == total_size) {
+*pnum = total_size - aligned_offset;
+}
+
+ refine:

[Qemu-devel] [PATCH v2 05/15] qcow2: Switch is_zero_sectors() to byte-based

2017-07-03 Thread Eric Blake

We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Convert another internal
function (no semantic change), and rename it to is_zero() in the
process.

Signed-off-by: Eric Blake 

---
v2: rename function, rebase to upstream changes
---
 block/qcow2.c | 32 ++--
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index d75f248..9754193 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2905,21 +2905,28 @@ finish:
 }


-static bool is_zero_sectors(BlockDriverState *bs, int64_t start,
-uint32_t count)
+static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
 int nr;
 int64_t res;
+int64_t start;

-if (start + count > bs->total_sectors) {
-count = bs->total_sectors - start;
+/* Widen to sector boundaries, then clamp to image length, before
+ * checking status of underlying sectors */
+start = QEMU_ALIGN_DOWN(offset, BDRV_SECTOR_SIZE);
+bytes = QEMU_ALIGN_UP(offset + bytes, BDRV_SECTOR_SIZE) - start;
+
+if (start + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) {
+bytes = bs->total_sectors * BDRV_SECTOR_SIZE - start;
 }

-if (!count) {
+if (!bytes) {
 return true;
 }
-res = bdrv_get_block_status_above(bs, NULL, start, count, &nr, NULL);
-return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == count;
+res = bdrv_get_block_status_above(bs, NULL, start >> BDRV_SECTOR_BITS,
+  bytes >> BDRV_SECTOR_BITS, &nr, NULL);
+return res >= 0 && (res & BDRV_BLOCK_ZERO) &&
+nr * BDRV_SECTOR_SIZE == bytes;
 }

 static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
@@ -2937,24 +2944,21 @@ static coroutine_fn int 
qcow2_co_pwrite_zeroes(BlockDriverState *bs,
 }

 if (head || tail) {
-int64_t cl_start = (offset - head) >> BDRV_SECTOR_BITS;
 uint64_t off;
 unsigned int nr;

 assert(head + bytes <= s->cluster_size);

 /* check whether remainder of cluster already reads as zero */
-if (!(is_zero_sectors(bs, cl_start,
-  DIV_ROUND_UP(head, BDRV_SECTOR_SIZE)) &&
-  is_zero_sectors(bs, (offset + bytes) >> BDRV_SECTOR_BITS,
-  DIV_ROUND_UP(-tail & (s->cluster_size - 1),
-   BDRV_SECTOR_SIZE {
+if (!(is_zero(bs, offset - head, head) &&
+  is_zero(bs, offset + bytes,
+  tail ? s->cluster_size - tail : 0))) {
 return -ENOTSUP;
 }

 qemu_co_mutex_lock(&s->lock);
 /* We can have new write after previous check */
-offset = cl_start << BDRV_SECTOR_BITS;
+offset = QEMU_ALIGN_DOWN(offset, s->cluster_size);
 bytes = s->cluster_size;
 nr = s->cluster_size;
 ret = qcow2_get_cluster_offset(bs, offset, &nr, &off);
-- 
2.9.4

[Qemu-devel] [PATCH v2 04/15] block: Make bdrv_round_to_clusters() signature more useful

2017-07-03 Thread Eric Blake

In the process of converting sector-based interfaces to bytes,
I'm finding it easier to represent a byte count as a 64-bit
integer at the block layer (even if we are internally capped
by SIZE_MAX or even INT_MAX for individual transactions, it's
still nicer to not have to worry about truncation/overflow
issues on as many variables).  Update the signature of
bdrv_round_to_clusters() to uniformly use int64_t, matching
the signature already chosen for bdrv_is_allocated and the
fact that off_t is also a signed type, then adjust clients
according to the required fallout.

Signed-off-by: Eric Blake 

---
v2: fix commit message [John], rebase to earlier changes, including
mirror_clip_bytes() signature update
---
 include/block/block.h | 4 ++--
 block/io.c| 7 ---
 block/mirror.c| 7 +++
 block/trace-events| 2 +-
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index f0fdbe8..cc82a0d 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -473,9 +473,9 @@ int bdrv_get_flags(BlockDriverState *bs);
 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs);
 void bdrv_round_to_clusters(BlockDriverState *bs,
-int64_t offset, unsigned int bytes,
+int64_t offset, int64_t bytes,
 int64_t *cluster_offset,
-unsigned int *cluster_bytes);
+int64_t *cluster_bytes);

 const char *bdrv_get_encrypted_filename(BlockDriverState *bs);
 void bdrv_get_backing_filename(BlockDriverState *bs,
diff --git a/block/io.c b/block/io.c
index 719a6b0..2377f3a 100644
--- a/block/io.c
+++ b/block/io.c
@@ -422,9 +422,9 @@ static void mark_request_serialising(BdrvTrackedRequest 
*req, uint64_t align)
  * Round a region to cluster boundaries
  */
 void bdrv_round_to_clusters(BlockDriverState *bs,
-int64_t offset, unsigned int bytes,
+int64_t offset, int64_t bytes,
 int64_t *cluster_offset,
-unsigned int *cluster_bytes)
+int64_t *cluster_bytes)
 {
 BlockDriverInfo bdi;

@@ -922,7 +922,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild 
*child,
 struct iovec iov;
 QEMUIOVector bounce_qiov;
 int64_t cluster_offset;
-unsigned int cluster_bytes;
+int64_t cluster_bytes;
 size_t skip_bytes;
 int ret;

@@ -943,6 +943,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild 
*child,
 trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
cluster_offset, cluster_bytes);

+assert(cluster_bytes < SIZE_MAX);
 iov.iov_len = cluster_bytes;
 iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
 if (bounce_buffer == NULL) {
diff --git a/block/mirror.c b/block/mirror.c
index 21758a8..08c0c27 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -190,10 +190,9 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t 
*offset,
 bool need_cow;
 int ret = 0;
 int64_t align_offset = *offset;
-unsigned int align_bytes = *bytes;
+int64_t align_bytes = *bytes;
 int max_bytes = s->granularity * s->max_iov;

-assert(*bytes < INT_MAX);
 need_cow = !test_bit(*offset / s->granularity, s->cow_bitmap);
 need_cow |= !test_bit((*offset + *bytes - 1) / s->granularity,
   s->cow_bitmap);
@@ -389,7 +388,7 @@ static uint64_t coroutine_fn 
mirror_iteration(MirrorBlockJob *s)
 while (nb_chunks > 0 && offset < s->bdev_length) {
 int64_t ret;
 int io_sectors;
-unsigned int io_bytes;
+int64_t io_bytes;
 int64_t io_bytes_acct;
 enum MirrorMethod {
 MIRROR_METHOD_COPY,
@@ -414,7 +413,7 @@ static uint64_t coroutine_fn 
mirror_iteration(MirrorBlockJob *s)
 io_bytes = s->granularity;
 } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) {
 int64_t target_offset;
-unsigned int target_bytes;
+int64_t target_bytes;
 bdrv_round_to_clusters(blk_bs(s->target), offset, io_bytes,
&target_offset, &target_bytes);
 if (target_offset == offset &&
diff --git a/block/trace-events b/block/trace-events
index 4a4df25..13a5a87 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -12,7 +12,7 @@ blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned 
int bytes, int flag
 bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num 
%"PRId64" nb_sectors %d"
 bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num 
%"PRId64" nb_sectors %d"
 bdrv_co_pwrite_zeroes(void *bs, int64_t offset, int count, int flags) "bs %p 
offset %"PRId64" count %d flags %#x"
-bdrv_co_do_copy_on_rea

[Qemu-devel] [PATCH v2 12/15] block: Switch bdrv_co_get_block_status_above() to byte-based

2017-07-03 Thread Eric Blake

We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Convert another internal
type (no semantic change), and rename it to match the corresponding
public function rename.

Signed-off-by: Eric Blake 

---
v2: rebase to earlier changes
---
 block/io.c | 48 ++--
 1 file changed, 18 insertions(+), 30 deletions(-)

diff --git a/block/io.c b/block/io.c
index 697db75..85353fa 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1865,12 +1865,12 @@ out:
 return ret;
 }

-static int64_t coroutine_fn bdrv_co_get_block_status_above(BlockDriverState 
*bs,
+static int64_t coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
 BlockDriverState *base,
 bool allocation,
-int64_t sector_num,
-int nb_sectors,
-int *pnum,
+int64_t offset,
+int64_t bytes,
+int64_t *pnum,
 BlockDriverState **file)
 {
 BlockDriverState *p;
@@ -1879,17 +1879,10 @@ static int64_t coroutine_fn 
bdrv_co_get_block_status_above(BlockDriverState *bs,

 assert(bs != base);
 for (p = bs; p != base; p = backing_bs(p)) {
-int64_t count;
-
-ret = bdrv_co_block_status(p, allocation,
-   sector_num * BDRV_SECTOR_SIZE,
-   nb_sectors * BDRV_SECTOR_SIZE, &count,
-   file);
+ret = bdrv_co_block_status(p, allocation, offset, bytes, pnum, file);
 if (ret < 0) {
 break;
 }
-assert(QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE));
-*pnum = count >> BDRV_SECTOR_BITS;
 if (ret & BDRV_BLOCK_ZERO && ret & BDRV_BLOCK_EOF && !first) {
 /*
  * Reading beyond the end of the file continues to read
@@ -1897,39 +1890,35 @@ static int64_t coroutine_fn 
bdrv_co_get_block_status_above(BlockDriverState *bs,
  * unallocated length we learned from an earlier
  * iteration.
  */
-*pnum = nb_sectors;
+*pnum = bytes;
 }
 if (ret & (BDRV_BLOCK_ZERO | BDRV_BLOCK_DATA)) {
 break;
 }
-/* [sector_num, pnum] unallocated on this layer, which could be only
- * the first part of [sector_num, nb_sectors].  */
-nb_sectors = MIN(nb_sectors, *pnum);
+/* [offset, pnum] unallocated on this layer, which could be only
+ * the first part of [offset, bytes].  */
+bytes = MIN(bytes, *pnum);
 first = false;
 }
 return ret;
 }

 /* Coroutine wrapper for bdrv_get_block_status_above() */
-static void coroutine_fn bdrv_get_block_status_above_co_entry(void *opaque)
+static void coroutine_fn bdrv_block_status_above_co_entry(void *opaque)
 {
 BdrvCoBlockStatusData *data = opaque;
-int n;

-data->ret = bdrv_co_get_block_status_above(data->bs, data->base,
-   data->allocation,
-   data->offset >> 
BDRV_SECTOR_BITS,
-   data->bytes >> BDRV_SECTOR_BITS,
-   &n,
-   data->file);
-*data->pnum = n * BDRV_SECTOR_SIZE;
+data->ret = bdrv_co_block_status_above(data->bs, data->base,
+   data->allocation,
+   data->offset, data->bytes,
+   data->pnum, data->file);
 data->done = true;
 }

 /*
- * Synchronous wrapper around bdrv_co_get_block_status_above().
+ * Synchronous wrapper around bdrv_co_block_status_above().
  *
- * See bdrv_co_get_block_status_above() for details.
+ * See bdrv_co_block_status_above() for details.
  */
 static int64_t bdrv_common_block_status_above(BlockDriverState *bs,
   BlockDriverState *base,
@@ -1952,10 +1941,9 @@ static int64_t 
bdrv_common_block_status_above(BlockDriverState *bs,

 if (qemu_in_coroutine()) {
 /* Fast-path if already in coroutine context */
-bdrv_get_block_status_above_co_entry(&data);
+bdrv_block_status_above_co_entry(&data);
 } else {
-co = qemu_coroutine_create(bdrv_get_block_status_above_co_entry,
-   &data);
+co = qemu_coroutine_create(bdrv_block_status_above_co_entry, &data);
 bdrv_coroutine_enter(bs, co);
 BDRV_POLL_WHILE(bs, !data.done);
 }
-- 
2.9.4

[Qemu-devel] [PATCH v2 14/15] block: Align block status requests

2017-07-03 Thread Eric Blake

Any device that has request_alignment greater than 512 should be
unable to report status at a finer granularity; it may also be
simpler for such devices to be guaranteed that the block layer
has rounded things out to the granularity boundary (the way the
block layer already rounds all other I/O out).  Besides, getting
the code correct for super-sector alignment also benefits us
for the fact that our public interface now has byte granularity,
even though none of our drivers have byte-level callbacks.

Add an assertion in blkdebug that proves that the block layer
never requests status of unaligned sections, similar to what it
does on other requests (while still keeping the generic helper
in place for when future patches add a throttle driver).  Note
note that iotest 177 already covers this (it would fail if you
use just the blkdebug.c hunk without the io.c changes).
Meanwhile, we can drop assertions in callers that no longer have
to pass in sector-aligned addresses.

Signed-off-by: Eric Blake 

---
v2: new patch
---
 include/block/block_int.h |  3 ++-
 block/blkdebug.c  | 13 +++-
 block/io.c| 53 ---
 3 files changed, 50 insertions(+), 19 deletions(-)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index ffa22c7..5f6ba5d 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -173,7 +173,8 @@ struct BlockDriver {
  * according to the current layer, and should not set
  * BDRV_BLOCK_ALLOCATED, but may set BDRV_BLOCK_RAW.  See block.h
  * for the meaning of _DATA, _ZERO, and _OFFSET_VALID.  The block
- * layer guarantees non-NULL pnum and file.
+ * layer guarantees input aligned to request_alignment, as well as
+ * non-NULL pnum and file.
  */
 int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs,
 int64_t sector_num, int nb_sectors, int *pnum,
diff --git a/block/blkdebug.c b/block/blkdebug.c
index f1539db..67736b4 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -641,6 +641,17 @@ static int coroutine_fn 
blkdebug_co_pdiscard(BlockDriverState *bs,
 return bdrv_co_pdiscard(bs->file->bs, offset, bytes);
 }

+static int64_t coroutine_fn blkdebug_co_get_block_status(
+BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
+BlockDriverState **file)
+{
+assert(QEMU_IS_ALIGNED(sector_num | nb_sectors,
+   DIV_ROUND_UP(bs->bl.request_alignment,
+BDRV_SECTOR_SIZE)));
+return bdrv_co_get_block_status_from_file(bs, sector_num, nb_sectors,
+  pnum, file);
+}
+
 static void blkdebug_close(BlockDriverState *bs)
 {
 BDRVBlkdebugState *s = bs->opaque;
@@ -915,7 +926,7 @@ static BlockDriver bdrv_blkdebug = {
 .bdrv_co_flush_to_disk  = blkdebug_co_flush,
 .bdrv_co_pwrite_zeroes  = blkdebug_co_pwrite_zeroes,
 .bdrv_co_pdiscard   = blkdebug_co_pdiscard,
-.bdrv_co_get_block_status = bdrv_co_get_block_status_from_file,
+.bdrv_co_get_block_status = blkdebug_co_get_block_status,

 .bdrv_debug_event   = blkdebug_debug_event,
 .bdrv_debug_breakpoint  = blkdebug_debug_breakpoint,
diff --git a/block/io.c b/block/io.c
index 91d3e99..5ed1ac7 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1746,7 +1746,8 @@ static int64_t coroutine_fn 
bdrv_co_block_status(BlockDriverState *bs,
 int64_t n; /* bytes */
 int64_t ret, ret2;
 BlockDriverState *local_file = NULL;
-int count; /* sectors */
+int64_t aligned_offset, aligned_bytes;
+uint32_t align;

 assert(pnum);
 total_size = bdrv_getlength(bs);
@@ -1788,27 +1789,44 @@ static int64_t coroutine_fn 
bdrv_co_block_status(BlockDriverState *bs,
 }

 bdrv_inc_in_flight(bs);
-/*
- * TODO: Rather than require aligned offsets, we could instead
- * round to the driver's request_alignment here, then touch up
- * count afterwards back to the caller's expectations.
- */
-assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE));
-ret = bs->drv->bdrv_co_get_block_status(bs, offset >> BDRV_SECTOR_BITS,
-bytes >> BDRV_SECTOR_BITS, &count,
-&local_file);
-if (ret < 0) {
-*pnum = 0;
-goto out;
+
+/* Round out to request_alignment boundaries */
+align = MAX(bs->bl.request_alignment, BDRV_SECTOR_SIZE);
+aligned_offset = QEMU_ALIGN_DOWN(offset, align);
+aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;
+
+{
+int count; /* sectors */
+
+assert(QEMU_IS_ALIGNED(aligned_offset | aligned_bytes,
+   BDRV_SECTOR_SIZE));
+ret = bs->drv->bdrv_co_get_block_status(
+bs, aligned_offset >> BDRV_SECTOR_BITS,
+aligned_bytes >> BDRV_SECTOR_BITS, &count, &local_file);
+if (ret < 0) {
+

[Qemu-devel] [PATCH v2 02/15] block: Allow NULL file for bdrv_get_block_status()

2017-07-03 Thread Eric Blake

Not all callers care about which BDS owns the mapping for a given
range of the file.  This patch merely simplifies the callers by
consolidating the logic in the common call point, while guaranteeing
a non-NULL file to all the driver callbacks, for no semantic change.
The only caller that does not care about pnum is bdrv_is_allocated,
as invoked by vvfat; we can likewise add assertions that the rest
of the stack does not have to worry about a NULL pnum.

Furthermore, this will also set the stage for a future cleanup: when
a caller does not care about which BDS owns an offset, it would be
nice to allow the driver to optimize things to not have to return
BDRV_BLOCK_OFFSET_VALID in the first place.  In the case of fragmented
allocation (for example, it's fairly easy to create a qcow2 image
where consecutive guest addresses are not at consecutive host
addresses), the current contract requires bdrv_get_block_status()
to clamp *pnum to the limit where host addresses are no longer
consecutive, but allowing a NULL file means that *pnum could be
set to the full length of known-allocated data.

Signed-off-by: Eric Blake 

---
v2: use local variable and final transfer, rather than assignment
of parameter to local
[previously in different series]:
v2: new patch, 
https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg05645.html
---
 include/block/block_int.h | 10 ++
 block/io.c| 44 
 block/mirror.c|  3 +--
 block/qcow2.c |  4 +---
 qemu-img.c| 10 --
 5 files changed, 40 insertions(+), 31 deletions(-)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index 724799c..ffa22c7 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -168,10 +168,12 @@ struct BlockDriver {
 int64_t offset, int bytes);

 /*
- * Building block for bdrv_block_status[_above]. The driver should
- * answer only according to the current layer, and should not
- * set BDRV_BLOCK_ALLOCATED, but may set BDRV_BLOCK_RAW.  See block.h
- * for the meaning of _DATA, _ZERO, and _OFFSET_VALID.
+ * Building block for bdrv_block_status[_above] and
+ * bdrv_is_allocated[_above].  The driver should answer only
+ * according to the current layer, and should not set
+ * BDRV_BLOCK_ALLOCATED, but may set BDRV_BLOCK_RAW.  See block.h
+ * for the meaning of _DATA, _ZERO, and _OFFSET_VALID.  The block
+ * layer guarantees non-NULL pnum and file.
  */
 int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs,
 int64_t sector_num, int nb_sectors, int *pnum,
diff --git a/block/io.c b/block/io.c
index 8c67ba8..6358d07 100644
--- a/block/io.c
+++ b/block/io.c
@@ -671,7 +671,6 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
 {
 int64_t target_sectors, ret, nb_sectors, sector_num = 0;
 BlockDriverState *bs = child->bs;
-BlockDriverState *file;
 int n;

 target_sectors = bdrv_nb_sectors(bs);
@@ -684,7 +683,7 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
 if (nb_sectors <= 0) {
 return 0;
 }
-ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n, &file);
+ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n, NULL);
 if (ret < 0) {
 error_report("error getting block status at sector %" PRId64 ": 
%s",
  sector_num, strerror(-ret));
@@ -1729,8 +1728,9 @@ int64_t coroutine_fn 
bdrv_co_get_block_status_from_backing(BlockDriverState *bs,
  * beyond the end of the disk image it will be clamped; if 'pnum' is set to
  * the end of the image, then the returned value will include BDRV_BLOCK_EOF.
  *
- * If returned value is positive and BDRV_BLOCK_OFFSET_VALID bit is set, 'file'
- * points to the BDS which the sector range is allocated in.
+ * If returned value is positive, BDRV_BLOCK_OFFSET_VALID bit is set, and
+ * 'file' is non-NULL, then '*file' points to the BDS which the sector range
+ * is allocated in.
  */
 static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
  int64_t sector_num,
@@ -1740,15 +1740,22 @@ static int64_t coroutine_fn 
bdrv_co_get_block_status(BlockDriverState *bs,
 int64_t total_sectors;
 int64_t n;
 int64_t ret, ret2;
+BlockDriverState *local_file = NULL;

-*file = NULL;
+assert(pnum);
 total_sectors = bdrv_nb_sectors(bs);
 if (total_sectors < 0) {
+if (file) {
+*file = NULL;
+}
 return total_sectors;
 }

 if (sector_num >= total_sectors) {
 *pnum = 0;
+if (file) {
+*file = NULL;
+}
 return BDRV_BLOCK_EOF;
 }

@@ -1765,23 +1772,27 @@ static int64_t coroutine_fn 
bdrv_co_get_block_status(BlockDriverState *bs,
 }
 if (bs->drv->protocol_name) {
 ret |= BDRV_BLOCK_O

[Qemu-devel] [PATCH v2 01/15] block: add default implementations for bdrv_co_get_block_status()

2017-07-03 Thread Eric Blake

From: Manos Pitsidianakis 

bdrv_co_get_block_status_from_file() and
bdrv_co_get_block_status_from_backing() set *file to bs->file and
bs->backing respectively, so that bdrv_co_get_block_status() can recurse
to them. Future block drivers won't have to duplicate code to implement
this.

Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Manos Pitsidianakis 
Message-Id: <20170629184320.7151-4-el13...@mail.ntua.gr>

---
v2: Including this patch from Manos, since it affects my later patches;
however, I anticipate that we will get a full v3 series from Manos
merged first
---
 include/block/block_int.h | 16 
 block/blkdebug.c  | 12 +---
 block/commit.c| 12 +---
 block/io.c| 24 
 block/mirror.c| 12 +---
 5 files changed, 43 insertions(+), 33 deletions(-)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index 226232d..724799c 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -958,6 +958,22 @@ void bdrv_format_default_perms(BlockDriverState *bs, 
BdrvChild *c,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared);

+/*
+ * Default implementation for drivers to pass bdrv_co_get_block_status() to
+ * their file
+ * */
+int64_t coroutine_fn bdrv_co_get_block_status_from_file(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors, int *pnum,
+ BlockDriverState **file);
+/*
+ * Default implementation for drivers to pass bdrv_co_get_block_status() to
+ * their backing file
+ * */
+int64_t coroutine_fn bdrv_co_get_block_status_from_backing(BlockDriverState 
*bs,
+ int64_t sector_num,
+ int nb_sectors, int *pnum,
+ BlockDriverState **file);
 const char *bdrv_get_parent_name(const BlockDriverState *bs);
 void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp);
 bool blk_dev_has_removable_media(BlockBackend *blk);
diff --git a/block/blkdebug.c b/block/blkdebug.c
index b25856c..f1539db 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -641,16 +641,6 @@ static int coroutine_fn 
blkdebug_co_pdiscard(BlockDriverState *bs,
 return bdrv_co_pdiscard(bs->file->bs, offset, bytes);
 }

-static int64_t coroutine_fn blkdebug_co_get_block_status(
-BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
-BlockDriverState **file)
-{
-*pnum = nb_sectors;
-*file = bs->file->bs;
-return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
-(sector_num << BDRV_SECTOR_BITS);
-}
-
 static void blkdebug_close(BlockDriverState *bs)
 {
 BDRVBlkdebugState *s = bs->opaque;
@@ -925,7 +915,7 @@ static BlockDriver bdrv_blkdebug = {
 .bdrv_co_flush_to_disk  = blkdebug_co_flush,
 .bdrv_co_pwrite_zeroes  = blkdebug_co_pwrite_zeroes,
 .bdrv_co_pdiscard   = blkdebug_co_pdiscard,
-.bdrv_co_get_block_status = blkdebug_co_get_block_status,
+.bdrv_co_get_block_status = bdrv_co_get_block_status_from_file,

 .bdrv_debug_event   = blkdebug_debug_event,
 .bdrv_debug_breakpoint  = blkdebug_debug_breakpoint,
diff --git a/block/commit.c b/block/commit.c
index 774a8a5..9e875a6 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -242,16 +242,6 @@ static int coroutine_fn 
bdrv_commit_top_preadv(BlockDriverState *bs,
 return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
 }

-static int64_t coroutine_fn bdrv_commit_top_get_block_status(
-BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
-BlockDriverState **file)
-{
-*pnum = nb_sectors;
-*file = bs->backing->bs;
-return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
-   (sector_num << BDRV_SECTOR_BITS);
-}
-
 static void bdrv_commit_top_refresh_filename(BlockDriverState *bs, QDict *opts)
 {
 bdrv_refresh_filename(bs->backing->bs);
@@ -277,7 +267,7 @@ static void bdrv_commit_top_child_perm(BlockDriverState 
*bs, BdrvChild *c,
 static BlockDriver bdrv_commit_top = {
 .format_name= "commit_top",
 .bdrv_co_preadv = bdrv_commit_top_preadv,
-.bdrv_co_get_block_status   = bdrv_commit_top_get_block_status,
+.bdrv_co_get_block_status   = bdrv_co_get_block_status_from_backing,
 .bdrv_refresh_filename  = bdrv_commit_top_refresh_filename,
 .bdrv_close = bdrv_commit_top_close,
 .bdrv_child_perm= bdrv_commit_top_child_perm,
diff --git a/block/io.c b/block/io.c
index 53c01cf..8c67ba8 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1689,6 +1689,30 @@ typedef struct BdrvCoGetBlockStatusData {
 bool done;
 } BdrvCoGetBlockStatusData;

+int64_t coroutine_fn

[Qemu-devel] [PATCH v2 10/15] block: Switch BdrvCoGetBlockStatusData to byte-based

2017-07-03 Thread Eric Blake

We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Convert another internal
type (no semantic change), and rename it to match the corresponding
public function rename.

Signed-off-by: Eric Blake 

---
v2: rebase to earlier changes
---
 block/io.c | 31 ++-
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/block/io.c b/block/io.c
index 2d324af..888f7a1 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1676,17 +1676,17 @@ int bdrv_flush_all(void)
 }


-typedef struct BdrvCoGetBlockStatusData {
+typedef struct BdrvCoBlockStatusData {
 BlockDriverState *bs;
 BlockDriverState *base;
 BlockDriverState **file;
-int64_t sector_num;
-int nb_sectors;
-int *pnum;
+int64_t offset;
+int64_t bytes;
+int64_t *pnum;
 int64_t ret;
 bool allocation;
 bool done;
-} BdrvCoGetBlockStatusData;
+} BdrvCoBlockStatusData;

 int64_t coroutine_fn bdrv_co_get_block_status_from_file(BlockDriverState *bs,
  int64_t sector_num,
@@ -1913,14 +1913,16 @@ static int64_t coroutine_fn 
bdrv_co_get_block_status_above(BlockDriverState *bs,
 /* Coroutine wrapper for bdrv_get_block_status_above() */
 static void coroutine_fn bdrv_get_block_status_above_co_entry(void *opaque)
 {
-BdrvCoGetBlockStatusData *data = opaque;
+BdrvCoBlockStatusData *data = opaque;
+int n;

 data->ret = bdrv_co_get_block_status_above(data->bs, data->base,
data->allocation,
-   data->sector_num,
-   data->nb_sectors,
-   data->pnum,
+   data->offset >> 
BDRV_SECTOR_BITS,
+   data->bytes >> BDRV_SECTOR_BITS,
+   &n,
data->file);
+*data->pnum = n * BDRV_SECTOR_SIZE;
 data->done = true;
 }

@@ -1937,13 +1939,14 @@ static int64_t 
bdrv_common_block_status_above(BlockDriverState *bs,
   BlockDriverState **file)
 {
 Coroutine *co;
-BdrvCoGetBlockStatusData data = {
+int64_t n;
+BdrvCoBlockStatusData data = {
 .bs = bs,
 .base = base,
 .file = file,
-.sector_num = sector_num,
-.nb_sectors = nb_sectors,
-.pnum = pnum,
+.offset = sector_num * BDRV_SECTOR_SIZE,
+.bytes = nb_sectors * BDRV_SECTOR_SIZE,
+.pnum = &n,
 .allocation = allocation,
 .done = false,
 };
@@ -1957,6 +1960,8 @@ static int64_t 
bdrv_common_block_status_above(BlockDriverState *bs,
 bdrv_coroutine_enter(bs, co);
 BDRV_POLL_WHILE(bs, !data.done);
 }
+assert(data.ret < 0 || QEMU_IS_ALIGNED(n, BDRV_SECTOR_SIZE));
+*pnum = n >> BDRV_SECTOR_BITS;
 return data.ret;
 }

-- 
2.9.4

[Qemu-devel] [PATCH v2 11/15] block: Switch bdrv_common_block_status_above() to byte-based

2017-07-03 Thread Eric Blake

We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Convert another internal
function (no semantic change).

Signed-off-by: Eric Blake 

---
v2: new patch
---
 block/io.c | 42 +-
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/block/io.c b/block/io.c
index 888f7a1..697db75 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1934,19 +1934,18 @@ static void coroutine_fn 
bdrv_get_block_status_above_co_entry(void *opaque)
 static int64_t bdrv_common_block_status_above(BlockDriverState *bs,
   BlockDriverState *base,
   bool allocation,
-  int64_t sector_num,
-  int nb_sectors, int *pnum,
+  int64_t offset,
+  int64_t bytes, int64_t *pnum,
   BlockDriverState **file)
 {
 Coroutine *co;
-int64_t n;
 BdrvCoBlockStatusData data = {
 .bs = bs,
 .base = base,
 .file = file,
-.offset = sector_num * BDRV_SECTOR_SIZE,
-.bytes = nb_sectors * BDRV_SECTOR_SIZE,
-.pnum = &n,
+.offset = offset,
+.bytes = bytes,
+.pnum = pnum,
 .allocation = allocation,
 .done = false,
 };
@@ -1960,8 +1959,6 @@ static int64_t 
bdrv_common_block_status_above(BlockDriverState *bs,
 bdrv_coroutine_enter(bs, co);
 BDRV_POLL_WHILE(bs, !data.done);
 }
-assert(data.ret < 0 || QEMU_IS_ALIGNED(n, BDRV_SECTOR_SIZE));
-*pnum = n >> BDRV_SECTOR_BITS;
 return data.ret;
 }

@@ -1971,8 +1968,19 @@ int64_t bdrv_get_block_status_above(BlockDriverState *bs,
 int nb_sectors, int *pnum,
 BlockDriverState **file)
 {
-return bdrv_common_block_status_above(bs, base, false, sector_num,
-  nb_sectors, pnum, file);
+int64_t ret;
+int64_t n;
+
+ret = bdrv_common_block_status_above(bs, base, false,
+ sector_num * BDRV_SECTOR_SIZE,
+ nb_sectors * BDRV_SECTOR_SIZE,
+ &n, file);
+if (ret < 0) {
+return ret;
+}
+assert(QEMU_IS_ALIGNED(n, BDRV_SECTOR_SIZE));
+*pnum = n >> BDRV_SECTOR_BITS;
+return ret;
 }

 int64_t bdrv_block_status(BlockDriverState *bs,
@@ -1997,21 +2005,13 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState 
*bs, int64_t offset,
int64_t bytes, int64_t *pnum)
 {
 int64_t ret;
-int psectors;
+int64_t dummy;

-assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
-assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE) &&
-   bytes < INT_MAX * BDRV_SECTOR_SIZE);
-ret = bdrv_common_block_status_above(bs, backing_bs(bs), true,
- offset >> BDRV_SECTOR_BITS,
- bytes >> BDRV_SECTOR_BITS, &psectors,
- NULL);
+ret = bdrv_common_block_status_above(bs, backing_bs(bs), true, offset,
+ bytes, pnum ? pnum : &dummy, NULL);
 if (ret < 0) {
 return ret;
 }
-if (pnum) {
-*pnum = psectors * BDRV_SECTOR_SIZE;
-}
 return !!(ret & BDRV_BLOCK_ALLOCATED);
 }

-- 
2.9.4

[Qemu-devel] [PATCH v2 03/15] block: Add flag to avoid wasted work in bdrv_is_allocated()

2017-07-03 Thread Eric Blake

Not all callers care about which BDS owns the mapping for a given
range of the file.  In particular, bdrv_is_allocated() cares more
about finding the largest run of allocated data from the guest
perspective, whether or not that data is consecutive from the
host perspective.  Therefore, doing subsequent refinements such
as checking how much of the format-layer allocation also satisfies
BDRV_BLOCK_ZERO at the protocol layer is wasted work - in the best
case, it just costs extra CPU cycles during a single
bdrv_is_allocated(), but in the worst case, it results in a smaller
*pnum, and forces callers to iterate through more status probes when
visiting the entire file for even more extra CPU cycles.

This patch only optimizes the block layer.  But subsequent patches
will tweak the driver callback to be byte-based, and in the process,
can also pass this hint through to the driver.

Signed-off-by: Eric Blake 

---
v2: new patch
---
 block/io.c | 51 +--
 1 file changed, 37 insertions(+), 14 deletions(-)

diff --git a/block/io.c b/block/io.c
index 6358d07..719a6b0 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1685,6 +1685,7 @@ typedef struct BdrvCoGetBlockStatusData {
 int nb_sectors;
 int *pnum;
 int64_t ret;
+bool allocation;
 bool done;
 } BdrvCoGetBlockStatusData;

@@ -1717,6 +1718,10 @@ int64_t coroutine_fn 
bdrv_co_get_block_status_from_backing(BlockDriverState *bs,
  * Drivers not implementing the functionality are assumed to not support
  * backing files, hence all their sectors are reported as allocated.
  *
+ * If 'allocation' is true, the caller only cares about allocation
+ * status; this is a hint that a larger 'pnum' result is more
+ * important than including BDRV_BLOCK_OFFSET_VALID in the return.
+ *
  * If 'sector_num' is beyond the end of the disk image the return value is
  * BDRV_BLOCK_EOF and 'pnum' is set to 0.
  *
@@ -1733,6 +1738,7 @@ int64_t coroutine_fn 
bdrv_co_get_block_status_from_backing(BlockDriverState *bs,
  * is allocated in.
  */
 static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
+ bool allocation,
  int64_t sector_num,
  int nb_sectors, int *pnum,
  BlockDriverState **file)
@@ -1791,14 +1797,15 @@ static int64_t coroutine_fn 
bdrv_co_get_block_status(BlockDriverState *bs,

 if (ret & BDRV_BLOCK_RAW) {
 assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file);
-ret = bdrv_co_get_block_status(local_file, ret >> BDRV_SECTOR_BITS,
+ret = bdrv_co_get_block_status(local_file, allocation,
+   ret >> BDRV_SECTOR_BITS,
*pnum, pnum, &local_file);
 goto out;
 }

 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
 ret |= BDRV_BLOCK_ALLOCATED;
-} else {
+} else if (!allocation) {
 if (bdrv_unallocated_blocks_are_zero(bs)) {
 ret |= BDRV_BLOCK_ZERO;
 } else if (bs->backing) {
@@ -1810,12 +1817,13 @@ static int64_t coroutine_fn 
bdrv_co_get_block_status(BlockDriverState *bs,
 }
 }

-if (local_file && local_file != bs &&
+if (!allocation && local_file && local_file != bs &&
 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
 (ret & BDRV_BLOCK_OFFSET_VALID)) {
 int file_pnum;

-ret2 = bdrv_co_get_block_status(local_file, ret >> BDRV_SECTOR_BITS,
+ret2 = bdrv_co_get_block_status(local_file, true,
+ret >> BDRV_SECTOR_BITS,
 *pnum, &file_pnum, NULL);
 if (ret2 >= 0) {
 /* Ignore errors.  This is just providing extra information, it
@@ -1850,6 +1858,7 @@ out:

 static int64_t coroutine_fn bdrv_co_get_block_status_above(BlockDriverState 
*bs,
 BlockDriverState *base,
+bool allocation,
 int64_t sector_num,
 int nb_sectors,
 int *pnum,
@@ -1861,7 +1870,8 @@ static int64_t coroutine_fn 
bdrv_co_get_block_status_above(BlockDriverState *bs,

 assert(bs != base);
 for (p = bs; p != base; p = backing_bs(p)) {
-ret = bdrv_co_get_block_status(p, sector_num, nb_sectors, pnum, file);
+ret = bdrv_co_get_block_status(p, allocation, sector_num, nb_sectors,
+   pnum, file);
 if (ret < 0) {
 break;
 }
@@ -1891,6 +1901,7 @@ static void coroutine_fn 
bdrv_get_block_status_above_co_entry(void *opaque)
 BdrvCoGetBlockStatusData *data = opaque;

 data->ret = bdrv_co_get_block_status_above(data->bs, data->base,
+   data->allocation,
data->sector_num,

[Qemu-devel] [PATCH v2 00/15] make bdrv_get_block_status byte-based

2017-07-03 Thread Eric Blake

There are patches floating around to add NBD_CMD_BLOCK_STATUS,
but NBD wants to report status on byte granularity (even if the
reporting will probably be naturally aligned to sectors or even
much higher levels).  I've therefore started the task of
converting our block status code to report at a byte granularity
rather than sectors.

The overall conversion currently looks like:
part 1: bdrv_is_allocated (v3 is reviewed [1], modulo vvfat whitespace changes)
part 2: dirty-bitmap (v4 is posted [2]; v2 was reviewed but rebase changes
mean more review is needed)
part 3: this series, for bdrv_get_block_status (first half of v1, at [3],
did not get much review)
part 4: upcoming series, for .bdrv_co_block_status (second half of v1 [3])

Available as a tag at:
git fetch git://repo.or.cz/qemu/ericb.git nbd-byte-status-v2

It is based on the union of Max's and Kevin's block branches,
plus posted fixes that make iotest 55 pass.

The diffstat shows a net growth in line count, but some of that is due
to better comments, and some because the code is a bit longer in order
to handle differing alignments between caller and driver.

I still haven't felt like tackling the task of rewriting migration/block.c
and qemu-img.c to use bytes (instead of sectors) everywhere - that might
give another net win in lines of code and legibility.

Patch 1/15 isn't really mine; Manos will probably be posting a v3
of his series, which should be committed before mine [4].

[1] https://lists.gnu.org/archive/html/qemu-devel/2017-06/msg06077.html
[2] https://lists.gnu.org/archive/html/qemu-devel/2017-07/msg00269.html
[3] https://lists.gnu.org/archive/html/qemu-devel/2017-04/msg02642.html
[4] https://lists.gnu.org/archive/html/qemu-devel/2017-06/msg06846.html

Since v1: rebase against lots of upstream churn; add patches to support
arbitrary alignments and test them. I also split the original v1 series
(31 patches) into two halves, where teaching the individual drivers
about byte-level granularity is now going to be a fourth series (I'll
post a 16/15 RFC patch to show the interface that will use)

001/15:[down] 'block: add default implementations for 
bdrv_co_get_block_status()'
002/15:[down] 'block: Allow NULL file for bdrv_get_block_status()'
003/15:[down] 'block: Add flag to avoid wasted work in bdrv_is_allocated()'
004/15:[0002] [FC] 'block: Make bdrv_round_to_clusters() signature more useful'
005/15:[0018] [FC] 'qcow2: Switch is_zero_sectors() to byte-based'
006/15:[0004] [FC] 'block: Switch bdrv_make_zero() to byte-based'
007/15:[] [--] 'qemu-img: Switch get_block_status() to byte-based'
008/15:[0058] [FC] 'block: Convert bdrv_get_block_status() to bytes'
009/15:[0064] [FC] 'block: Switch bdrv_co_get_block_status() to byte-based'
010/15:[0001] [FC] 'block: Switch BdrvCoGetBlockStatusData to byte-based'
011/15:[down] 'block: Switch bdrv_common_block_status_above() to byte-based'
012/15:[0010] [FC] 'block: Switch bdrv_co_get_block_status_above() to 
byte-based'
013/15:[0063] [FC] 'block: Convert bdrv_get_block_status_above() to bytes'
014/15:[down] 'block: Align block status requests'
015/15:[down] 'qemu-io: Relax 'alloc' now that block-status doesn't assert'

Eric Blake (14):
  block: Allow NULL file for bdrv_get_block_status()
  block: Add flag to avoid wasted work in bdrv_is_allocated()
  block: Make bdrv_round_to_clusters() signature more useful
  qcow2: Switch is_zero_sectors() to byte-based
  block: Switch bdrv_make_zero() to byte-based
  qemu-img: Switch get_block_status() to byte-based
  block: Convert bdrv_get_block_status() to bytes
  block: Switch bdrv_co_get_block_status() to byte-based
  block: Switch BdrvCoGetBlockStatusData to byte-based
  block: Switch bdrv_common_block_status_above() to byte-based
  block: Switch bdrv_co_get_block_status_above() to byte-based
  block: Convert bdrv_get_block_status_above() to bytes
  block: Align block status requests
  qemu-io: Relax 'alloc' now that block-status doesn't assert

Manos Pitsidianakis (1):
  block: add default implementations for bdrv_co_get_block_status()

 include/block/block.h  |  26 ++--
 include/block/block_int.h  |  27 +++-
 block/blkdebug.c   |   9 +-
 block/commit.c |  12 +-
 block/io.c | 302 -
 block/mirror.c |  32 ++---
 block/qcow2-cluster.c  |   2 +-
 block/qcow2.c  |  34 ++---
 qemu-img.c |  71 ++-
 qemu-io-cmds.c |  13 --
 block/trace-events |   2 +-
 tests/qemu-iotests/177 |  11 +-
 tests/qemu-iotests/177.out |  18 ++-
 13 files changed, 317 insertions(+), 242 deletions(-)

-- 
2.9.4

Re: [Qemu-devel] [PATCH RFC 0/6] q35: add acpi pci hotplug support

2017-07-03 Thread Alexander Bezzubikov

Tried it on Win7 Enterprise SP1 - SHPC works well,  _OSC patches aren't
necessary (since pci-bridge has its own controller, I suppose).
On Linux guests it works when adding device from CLI with -device, but OS
seems to fail detecting the device when I add it with device_add from
monitor.
Also there're some issues with unplugging on Linux (haven't tested
unplugging on WIndows yet). That's the news.

2017-07-03 21:29 GMT+03:00 Michael S. Tsirkin :

> On Mon, Jul 03, 2017 at 09:26:33PM +0300, Marcel Apfelbaum wrote:
> > On 03/07/2017 19:34, Michael S. Tsirkin wrote:
> > > On Mon, Jul 03, 2017 at 02:27:11PM +0200, Igor Mammedov wrote:
> > > > On Fri, 30 Jun 2017 10:25:05 +0300
> > > > Marcel Apfelbaum  wrote:
> > > >
> > > > [...]
> > > > >
> > > > > So for the modern systems not supporting PCI ACPI hotplug
> > > > > we don't need pci-bridges anyway, but for the older ones
> > > > > the ACPI code of the pci-bridge will be loaded into the
> > > > > ACPI namespace only if a pci-bridge is actually hot-plugged.
> > > >
> > > > just note that the set of 'older' guest OSes is limited to
> > > > one that do not support SHPC (i.e. to EOLed WinXP & co)
> > > > as for linux and more modern Windows SHPC hotplug should
> > > > just work without our ACPI hack (which taxes low memory
> > > > to keep acpi tables for bridges).
> > > >
> > > > So I'm in favor of Michael's suggestion to leave ACPI PCI
> > > > only in PC machine for old WinXP guests and to keep Q35
> > > > clean, where linux or newer Windows guests could just
> > > > use standard SHPC.
> > > >
> > > > [...]
> > >
> > > I didn't realize windows actually supports SHPC for PCI.
> >
> > Me neither, if Igor is right I am all for shpc hotplug
> > since Q35 is not supposed to support older guests.
> >
> > I remember I succeeded to enable shpc hotplug some time
> > ago, but only for Linux guests.
> >
> > Igor, do you have some spec/doc on newer Windows OSes that confirm
> > PCI shpc hotplug support?
>
> Just try it, easier than poking at specs which aren't always up to date.
>
> > >
> > > Do they correctly set _OSC Arg3, bit offset 1?
> > > SHPC Native Hot Plug control
> > > The OS sets this bit to 1 to request control over PCI/PCI-X
> Standard Hot-Plug Controller
> > > (SHPC) hot plug. If the OS successfully receives control of this
> feature, it must track and
> > > update the status of hot plug slots and handle hot plug events as
> described in the SHPC
> > > Specification.
> > > I was under impression they only set bit 0.
> > >
> >
> > Alexandr, if modern Windows OSes do support shpc, it makes our
> > job easier, can you please try to enable shpc hotplug?
> >
> > Thanks,
> > Marcel
>
> No need to enable or even have a bridge for that at all -
> set the bit in _OSC, see what does guest enable.
>



-- 
Alexander Bezzubikov

Re: [Qemu-devel] [PATCH v3 1/1] s390x: vmstatify config migration for virtio-ccw

2017-07-03 Thread Cornelia Huck

Am Mon,  3 Jul 2017 23:34:14 +0200
schrieb Halil Pasic :

> Let's vmstatify virtio_ccw_save_config and virtio_ccw_load_config for
> flexibility (extending using subsections) and for fun.
> 
> To achieve this we need to hack the config_vector, which is
> VirtIODevice (that is common virtio) state, in the middle of the
> VirtioCcwDevice state representation.  This is somewhat ugly, but we
> have no choice because the stream format needs to be preserved.
> 
> Almost no changes in behavior. Exception is everything that comes with
> vmstate like extra bookkeeping about what's in the stream, and maybe
> some extra checks and better error reporting.
> 
> Signed-off-by: Halil Pasic 
> Reviewed-by: Dr. David Alan Gilbert 
> Reviewed-by: Juan Quintela 
> Reviewed-by: Cornelia Huck 
> ---
> 
> I was pondering whether to drop or to keep the r-b's I've got for v2.
> I've decided to keep the r-b's because the change corresponding to v3
> are IMHO non-substantive and  because the patch isn't exactly small
> (can't be re-reviewed in a couple of seconds).
> 
> All r-b people are on cc and encouraged to scream at me if they don't
> agree to keep their r-b.

No screams from me, I don't think the changes are substantive enough to
drop my r-b.

(I haven't looked at the new patch in detail.)

Re: [Qemu-devel] [PATCH v2 0/5] target/sh4: misc FPU fixes and optimizations

2017-07-03 Thread Richard Henderson


On 07/02/2017 01:28 PM, Aurelien Jarno wrote:

This patchset should fix the bug#1701821 reported by Bruno Haible,
which makes the gnulib testsuite to fail for single precision libm
tests or for tests relying on unordered comparisons.

It also fixes an inversion of cause and flag bits in the FPSCR register,
which is unrelated with the reported bug. It also improves a bit the fneg
and fcmp instructions.

Aurelien Jarno (5):
   target/sh4: do not check for PR bit for fabs instruction
   target/sh4: fix FPU unorderered compare
   target/sh4: fix FPSCR cause vs flag inversion
   target/sh4: do not use a helper to implement fneg
   target/sh4: return result of fcmp using TCG

  target/sh4/helper.h| 11 +++-
  target/sh4/op_helper.c | 71 --
  target/sh4/translate.c | 30 -
  3 files changed, 37 insertions(+), 75 deletions(-)



Reviewed-by: Richard Henderson 


r~

[Qemu-devel] [PATCH v3 1/1] s390x: vmstatify config migration for virtio-ccw

2017-07-03 Thread Halil Pasic

Let's vmstatify virtio_ccw_save_config and virtio_ccw_load_config for
flexibility (extending using subsections) and for fun.

To achieve this we need to hack the config_vector, which is VirtIODevice
(that is common virtio) state, in the middle of the VirtioCcwDevice state
representation.  This is somewhat ugly, but we have no choice because the
stream format needs to be preserved.

Almost no changes in behavior. Exception is everything that comes with
vmstate like extra bookkeeping about what's in the stream, and maybe some
extra checks and better error reporting.

Signed-off-by: Halil Pasic 
Reviewed-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Reviewed-by: Cornelia Huck 
---

I was pondering whether to drop or to keep the r-b's I've got for v2.
I've decided to keep the r-b's because the change corresponding to v3 are
IMHO non-substantive and  because the patch isn't exactly small (can't be
re-reviewed in a couple of seconds).

All r-b people are on cc and encouraged to scream at me if they don't
agree to keep their r-b.

@Christian: v2 was supposed to go in but since commit 8ed179c937 was
already on your branch and it interfered with v2 I had to do it's
equivalent here. (For reference see:
 https://www.mail-archive.com/qemu-devel@nongnu.org/msg455772.html).

v2 --> v3:
* rebased
* added handling (modulo QMP error reporting) equivalent to commit
  8ed179c937 ("s390x/css: catch section mismatch on load", 2017-05-18)
* added a hint "Bug!" to the _EQUAL check not expected to fail unless
  programing error in QEMU
* minor style issues (poited out by Dong Jia)
v1 --> v2:
* added r-bs
* fixed typo in commit message
* fixed a style issue found out by Connie
---
 hw/intc/s390_flic.c  |  28 
 hw/s390x/ccw-device.c|  10 ++
 hw/s390x/ccw-device.h|   4 +
 hw/s390x/css.c   | 378 +--
 hw/s390x/virtio-ccw.c| 158 +-
 include/hw/s390x/css.h   |  12 +-
 include/hw/s390x/s390_flic.h |   5 +
 7 files changed, 358 insertions(+), 237 deletions(-)

diff --git a/hw/intc/s390_flic.c b/hw/intc/s390_flic.c
index a26e90670f..a99a350d8e 100644
--- a/hw/intc/s390_flic.c
+++ b/hw/intc/s390_flic.c
@@ -17,6 +17,7 @@
 #include "trace.h"
 #include "hw/qdev.h"
 #include "qapi/error.h"
+#include "hw/s390x/s390-virtio-ccw.h"
 
 S390FLICState *s390_get_flic(void)
 {
@@ -136,3 +137,30 @@ static void qemu_s390_flic_register_types(void)
 }
 
 type_init(qemu_s390_flic_register_types)
+
+const VMStateDescription vmstate_adapter_info = {
+.name = "s390_adapter_info",
+.version_id = 1,
+.minimum_version_id = 1,
+.fields = (VMStateField[]) {
+VMSTATE_UINT64(ind_offset, AdapterInfo),
+/*
+ * We do not have to migrate neither the id nor the addresses.
+ * The id is set by css_register_io_adapter and the addresses
+ * are set based on the IndAddr objects after those get mapped.
+ */
+VMSTATE_END_OF_LIST()
+},
+};
+
+const VMStateDescription vmstate_adapter_routes = {
+
+.name = "s390_adapter_routes",
+.version_id = 1,
+.minimum_version_id = 1,
+.fields = (VMStateField[]) {
+VMSTATE_STRUCT(adapter, AdapterRoutes, 1, vmstate_adapter_info,
+   AdapterInfo),
+VMSTATE_END_OF_LIST()
+}
+};
diff --git a/hw/s390x/ccw-device.c b/hw/s390x/ccw-device.c
index fb8d640a7e..f9bfa154d6 100644
--- a/hw/s390x/ccw-device.c
+++ b/hw/s390x/ccw-device.c
@@ -50,6 +50,16 @@ static void ccw_device_class_init(ObjectClass *klass, void 
*data)
 dc->props = ccw_device_properties;
 }
 
+const VMStateDescription vmstate_ccw_dev = {
+.name = "s390_ccw_dev",
+.version_id = 1,
+.minimum_version_id = 1,
+.fields = (VMStateField[]) {
+VMSTATE_STRUCT_POINTER(sch, CcwDevice, vmstate_subch_dev, SubchDev),
+VMSTATE_END_OF_LIST()
+}
+};
+
 static const TypeInfo ccw_device_info = {
 .name = TYPE_CCW_DEVICE,
 .parent = TYPE_DEVICE,
diff --git a/hw/s390x/ccw-device.h b/hw/s390x/ccw-device.h
index 89c8e5dff7..4e6af287e7 100644
--- a/hw/s390x/ccw-device.h
+++ b/hw/s390x/ccw-device.h
@@ -27,6 +27,10 @@ typedef struct CcwDevice {
 CssDevId subch_id;
 } CcwDevice;
 
+extern const VMStateDescription vmstate_ccw_dev;
+#define VMSTATE_CCW_DEVICE(_field, _state) \
+VMSTATE_STRUCT(_field, _state, 1, vmstate_ccw_dev, CcwDevice)
+
 typedef struct CCWDeviceClass {
 DeviceClass parent_class;
 void (*unplug)(HotplugHandler *, DeviceState *, Error **);
diff --git a/hw/s390x/css.c b/hw/s390x/css.c
index 599805d275..d67fffae30 100644
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -22,6 +22,7 @@
 #include "hw/s390x/css.h"
 #include "trace.h"
 #include "hw/s390x/s390_flic.h"
+#include "hw/s390x/s390-virtio-ccw.h"
 
 typedef struct CrwContainer {
 CRW crw;
@@ -40,6 +41,181 @@ typedef struct SubchSet {
 unsigned long devnos_used[BITS_TO_LONGS(MAX_SCHID + 1)];
 } SubchSet;
 
+static co

Re: [Qemu-devel] [PATCH 0/2] target/sh4: fix fabs and optimize fneg

2017-07-03 Thread Richard Henderson


On 07/02/2017 09:32 AM, Aurelien Jarno wrote:

This patchset should fix the bug #1701821 reported by Bruno Haible,
which makes the gnulib testsuite to fail for single precision libm
tests.

Aurelien Jarno (2):
   target/sh4: do not check for PR bit for fabs instruction
   target/sh4: do not use a helper to implement fneg

  target/sh4/helper.h|  3 ---
  target/sh4/op_helper.c | 15 ---
  target/sh4/translate.c | 20 +---
  3 files changed, 5 insertions(+), 33 deletions(-)



Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH v2 2/8] target/s390x: Implement CONVERT UNICODE insns

2017-07-03 Thread Richard Henderson


On 07/02/2017 06:18 AM, Aurelien Jarno wrote:

+if (!s390_has_feat(s->insn->fac == S390_FEAT_EXTENDED_TRANSLATION_3
+   ? S390_FEAT_ETF3_ENH : S390_FEAT_ETF2_ENH)) {
+m3 = 0;
+}


This doesn't look correct to me. The well-formedness checking is part of
ETF3_ENH facility, for both convert unicode instructions that are part
of the Z architecture (CU12 and CU21) and for the ones added by the ETF3
facility (CU14 and CU24).


Quite right.


r~

Re: [Qemu-devel] [PATCH] util/cacheinfo: Fix warning generated by clang

2017-07-03 Thread Richard Henderson


On 07/01/2017 03:44 PM, Peter Maydell wrote:

On 1 July 2017 at 23:35, Richard Henderson  wrote:

Perhaps it ought to be telling me to use %x to force Xn in spite of the
type?


You always get Xn anyway, regardless of the type.

For us, I think the right thing to do is make 'ctr' be a uint64_t,
because we're reading a 64 bit sysreg and silently truncating it
as a side effect of the asm constraints is a bit obscure.


Fair enough.  Applied as-is to tcg-next.


r~

Re: [Qemu-devel] [PATCH v4 3/3] tcg/aarch64: Enable indirect jump path using LDR (literal)

2017-07-03 Thread Richard Henderson


On 06/30/2017 07:36 AM, Pranith Kumar wrote:

This patch enables the indirect jump path using an LDR (literal)
instruction. It will be interesting to test and see which performs
better among the two paths.

CC: Alex Bennée 
Reviewed-by: Richard Henderson 
Signed-off-by: Pranith Kumar 


Applied all to tcg-next.

Bonus points for converting USE_DIRECT_JUMP to a run-time flag, perhaps 
controllable by a -d bit.  That would further enable easy benchmarking of the 
two paths.



r~

Re: [Qemu-devel] [PATCH 2/4] xen/mapcache: add an ability to create dummy mappings

2017-07-03 Thread Stefano Stabellini

On Mon, 3 Jul 2017, Igor Druzhinin wrote:
> On 01/07/17 01:06, Stefano Stabellini wrote:
> > On Fri, 30 Jun 2017, Igor Druzhinin wrote:
> >> Dummys are simple anonymous mappings that are placed instead
> >> of regular foreign mappings in certain situations when we need
> >> to postpone the actual mapping but still have to give a
> >> memory region to QEMU to play with.
> >>
> >> This is planned to be used for restore on Xen.
> >>
> >> Signed-off-by: Igor Druzhinin 
> >>
> >> ---
> >>  hw/i386/xen/xen-mapcache.c | 36 
> >>  1 file changed, 28 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/hw/i386/xen/xen-mapcache.c b/hw/i386/xen/xen-mapcache.c
> >> index e60156c..05050de 100644
> >> --- a/hw/i386/xen/xen-mapcache.c
> >> +++ b/hw/i386/xen/xen-mapcache.c
> >> @@ -150,7 +150,8 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f, void 
> >> *opaque)
> >>  
> >>  static void xen_remap_bucket(MapCacheEntry *entry,
> >>   hwaddr size,
> >> - hwaddr address_index)
> >> + hwaddr address_index,
> >> + bool dummy)
> >>  {
> >>  uint8_t *vaddr_base;
> >>  xen_pfn_t *pfns;
> >> @@ -177,11 +178,25 @@ static void xen_remap_bucket(MapCacheEntry *entry,
> >>  pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) 
> >> + i;
> >>  }
> >>  
> >> -vaddr_base = xenforeignmemory_map(xen_fmem, xen_domid, 
> >> PROT_READ|PROT_WRITE,
> >> -  nb_pfn, pfns, err);
> >> -if (vaddr_base == NULL) {
> >> -perror("xenforeignmemory_map");
> >> -exit(-1);
> >> +if (!dummy) {
> >> +vaddr_base = xenforeignmemory_map(xen_fmem, xen_domid,
> >> +   PROT_READ|PROT_WRITE,
> >> +   nb_pfn, pfns, err);
> >> +if (vaddr_base == NULL) {
> >> +perror("xenforeignmemory_map");
> >> +exit(-1);
> >> +}
> >> +} else {
> >> +/*
> >> + * We create dummy mappings where we are unable to create a 
> >> foreign
> >> + * mapping immediately due to certain circumstances (i.e. on 
> >> resume now)
> >> + */
> >> +vaddr_base = mmap(NULL, size, PROT_READ|PROT_WRITE,
> >> +  MAP_ANON|MAP_SHARED, -1, 0);
> >> +if (vaddr_base == NULL) {
> >> +perror("mmap");
> >> +exit(-1);
> >> +}
> > 
> > For our sanity in debugging this in the future, I think it's best if we
> > mark this mapcache entry as "dummy". Since we are at it, we could turn
> > the lock field of MapCacheEntry into a flag field and #define LOCK as
> > (1<<0) and DUMMY as (1<<1). Please do that as a separate patch.
> >
> 
> Unfortunately, lock field is a reference counter (or at least it looks
> like according to the source code). It seems to me that it's technically
> possible to have one region locked from several places in QEMU code. For
> that reason, I'd like to introduce a separate field - something like
> uint8_t flags.

Yes, you are right.


> >>>  }
> >>  
> >>  entry->vaddr_base = vaddr_base;
> >> @@ -211,6 +226,7 @@ static uint8_t *xen_map_cache_unlocked(hwaddr 
> >> phys_addr, hwaddr size,
> >>  hwaddr cache_size = size;
> >>  hwaddr test_bit_size;
> >>  bool translated = false;
> >> +bool dummy = false;
> >>  
> >>  tryagain:
> >>  address_index  = phys_addr >> MCACHE_BUCKET_SHIFT;
> >> @@ -262,14 +278,14 @@ tryagain:
> >>  if (!entry) {
> >>  entry = g_malloc0(sizeof (MapCacheEntry));
> >>  pentry->next = entry;
> >> -xen_remap_bucket(entry, cache_size, address_index);
> >> +xen_remap_bucket(entry, cache_size, address_index, dummy);
> >>  } else if (!entry->lock) {
> >>  if (!entry->vaddr_base || entry->paddr_index != address_index ||
> >>  entry->size != cache_size ||
> >>  !test_bits(address_offset >> XC_PAGE_SHIFT,
> >>  test_bit_size >> XC_PAGE_SHIFT,
> >>  entry->valid_mapping)) {
> >> -xen_remap_bucket(entry, cache_size, address_index);
> >> +xen_remap_bucket(entry, cache_size, address_index, dummy);
> >>  }
> >>  }
> >>  
> >> @@ -282,6 +298,10 @@ tryagain:
> >>  translated = true;
> >>  goto tryagain;
> >>  }
> >> +if (!dummy && runstate_check(RUN_STATE_INMIGRATE)) {
> >> +dummy = true;
> >> +goto tryagain;
> >> +}
> >>  trace_xen_map_cache_return(NULL);
> >>  return NULL;
> >>  }
> >> -- 
> >> 2.7.4
> >>
>

Re: [Qemu-devel] [PATCH 3/4] xen/mapcache: introduce xen_remap_cache_entry()

2017-07-03 Thread Igor Druzhinin

On 01/07/17 01:08, Stefano Stabellini wrote:
> On Fri, 30 Jun 2017, Igor Druzhinin wrote:
>> This new call is trying to update a requested map cache entry
>> according to the changes in the physmap. The call is searching
>> for the entry, unmaps it, tries to translate the address and
>> maps again at the same place. If the mapping is dummy this call
>> will make it real.
>>
>> This function makes use of a new xenforeignmemory_map2() call
>> with extended interface that was recently introduced in
>> libxenforeignmemory [1].
>>
>> [1] https://www.mail-archive.com/xen-devel@lists.xen.org/msg113007.html
>>
>> Signed-off-by: Igor Druzhinin 
>> ---
>>  configure |  18 
>>  hw/i386/xen/xen-mapcache.c| 105 
>> +++---
>>  include/hw/xen/xen_common.h   |   7 +++
>>  include/sysemu/xen-mapcache.h |   6 +++
>>  4 files changed, 130 insertions(+), 6 deletions(-)
>>
>> diff --git a/configure b/configure
>> index c571ad1..ad6156b 100755
>> --- a/configure
>> +++ b/configure
>> @@ -2021,6 +2021,24 @@ EOF
>>  # Xen unstable
>>  elif
>>  cat > $TMPC <> +#undef XC_WANT_COMPAT_MAP_FOREIGN_API
>> +#include 
>> +int main(void) {
>> +  xenforeignmemory_handle *xfmem;
>> +
>> +  xfmem = xenforeignmemory_open(0, 0);
>> +  xenforeignmemory_map2(xfmem, 0, 0, 0, 0, 0, 0, 0);
>> +
>> +  return 0;
>> +}
>> +EOF
>> +compile_prog "" "$xen_libs -lxendevicemodel $xen_stable_libs"
>> +  then
>> +  xen_stable_libs="-lxendevicemodel $xen_stable_libs"
>> +  xen_ctrl_version=41000
>> +  xen=yes
>> +elif
>> +cat > $TMPC <>  #undef XC_WANT_COMPAT_DEVICEMODEL_API
>>  #define __XEN_TOOLS__
>>  #include 
>> diff --git a/hw/i386/xen/xen-mapcache.c b/hw/i386/xen/xen-mapcache.c
>> index 05050de..5d8d990 100644
>> --- a/hw/i386/xen/xen-mapcache.c
>> +++ b/hw/i386/xen/xen-mapcache.c
>> @@ -149,6 +149,7 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f, void 
>> *opaque)
>>  }
>>  
>>  static void xen_remap_bucket(MapCacheEntry *entry,
>> + void *vaddr,
>>   hwaddr size,
>>   hwaddr address_index,
>>   bool dummy)
>> @@ -179,11 +180,11 @@ static void xen_remap_bucket(MapCacheEntry *entry,
>>  }
>>  
>>  if (!dummy) {
>> -vaddr_base = xenforeignmemory_map(xen_fmem, xen_domid,
>> -   PROT_READ|PROT_WRITE,
>> +vaddr_base = xenforeignmemory_map2(xen_fmem, xen_domid, vaddr,
>> +   PROT_READ|PROT_WRITE, 0,
>> nb_pfn, pfns, err);
>>  if (vaddr_base == NULL) {
>> -perror("xenforeignmemory_map");
>> +perror("xenforeignmemory_map2");
>>  exit(-1);
>>  }
>>  } else {
>> @@ -191,7 +192,7 @@ static void xen_remap_bucket(MapCacheEntry *entry,
>>   * We create dummy mappings where we are unable to create a foreign
>>   * mapping immediately due to certain circumstances (i.e. on resume 
>> now)
>>   */
>> -vaddr_base = mmap(NULL, size, PROT_READ|PROT_WRITE,
>> +vaddr_base = mmap(vaddr, size, PROT_READ|PROT_WRITE,
>>MAP_ANON|MAP_SHARED, -1, 0);
>>  if (vaddr_base == NULL) {
>>  perror("mmap");
>> @@ -278,14 +279,14 @@ tryagain:
>>  if (!entry) {
>>  entry = g_malloc0(sizeof (MapCacheEntry));
>>  pentry->next = entry;
>> -xen_remap_bucket(entry, cache_size, address_index, dummy);
>> +xen_remap_bucket(entry, NULL, cache_size, address_index, dummy);
>>  } else if (!entry->lock) {
>>  if (!entry->vaddr_base || entry->paddr_index != address_index ||
>>  entry->size != cache_size ||
>>  !test_bits(address_offset >> XC_PAGE_SHIFT,
>>  test_bit_size >> XC_PAGE_SHIFT,
>>  entry->valid_mapping)) {
>> -xen_remap_bucket(entry, cache_size, address_index, dummy);
>> +xen_remap_bucket(entry, NULL, cache_size, address_index, dummy);
>>  }
>>  }
>>  
>> @@ -482,3 +483,95 @@ void xen_invalidate_map_cache(void)
>>  
>>  mapcache_unlock();
>>  }
>> +
>> +static uint8_t *xen_remap_cache_entry_unlocked(hwaddr phys_addr, hwaddr 
>> size)
> 
> I think it's best if we use a more descriptive name, such as
> xen_replace_dummy_entry to avoid confusion.
> 
> 
>> +{
>> +MapCacheEntry *entry, *pentry = NULL;
>> +hwaddr address_index;
>> +hwaddr address_offset;
>> +hwaddr cache_size = size;
>> +hwaddr test_bit_size;
>> +void *vaddr = NULL;
>> +uint8_t lock;
>> +
>> +address_index  = phys_addr >> MCACHE_BUCKET_SHIFT;
>> +address_offset = phys_addr & (MCACHE_BUCKET_SIZE - 1);
>> +
>> +/* test_bit_size is always a multiple of XC_PAGE_SIZE */
>> +if (size) {
> 
> There is no need to make xen

Re: [Qemu-devel] [PATCH 4/4] target-m68k: add fscale, fgetman and fgetexp

2017-07-03 Thread Richard Henderson


On 07/03/2017 12:50 PM, Laurent Vivier wrote:

fmove.x #0x0ABCDEF12345,%fp0
fgetman.x %fp0,%fp6
fp02.1518178707571747286191852003521627e-4938   (raw
0x0abcdef12345)
fp61.34103012886691431049257516861  (raw
0x3fffabcdef123450)


This one shows exactly what I was thinking about.


So I guess the mantissa must be shifted to left until we have a 1 in the
explicit integer part bit?


Yes.

Please try fgetexp on this same input.
I suspect the answer is -16384 - clz64(val->l.lower).
Otherwise the behaviour of fgetman above doesn't make sense.


r~

Re: [Qemu-devel] [PATCH 22/22] configure: warn on untested --disable-tcg

2017-07-03 Thread Richard Henderson


On 07/03/2017 09:34 AM, Paolo Bonzini wrote:

--disable-tcg will almost certainly fail to compile on non-x86 platforms,
so issue a warning.

Signed-off-by: Paolo Bonzini
---
  configure | 7 +++
  1 file changed, 7 insertions(+)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH 21/22] target/i386: add the CONFIG_TCG into Makefiles

2017-07-03 Thread Richard Henderson


On 07/03/2017 09:34 AM, Paolo Bonzini wrote:

From: Yang Zhong 

Add the CONFIG_TCG for frontend and backend's files in the related
Makefiles.

Signed-off-by: Yang Zhong 
Signed-off-by: Paolo Bonzini 
---
  target/i386/Makefile.objs | 7 ---
  1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/target/i386/Makefile.objs b/target/i386/Makefile.objs
index 4fcb7f3..36949b1 100644
--- a/target/i386/Makefile.objs
+++ b/target/i386/Makefile.objs
@@ -1,6 +1,7 @@
-obj-y += translate.o helper.o cpu.o bpt_helper.o
-obj-y += excp_helper.o fpu_helper.o cc_helper.o int_helper.o svm_helper.o
-obj-y += smm_helper.o misc_helper.o mem_helper.o seg_helper.o mpx_helper.o
+obj-y += helper.o cpu.o bpt_helper.o
+obj-$(CONFIG_TCG) += translate.o
+obj-$(CONFIG_TCG) += excp_helper.o fpu_helper.o int_helper.o svm_helper.o 
cc_helper.o
+obj-$(CONFIG_TCG) += smm_helper.o misc_helper.o mem_helper.o seg_helper.o 
mpx_helper.o
  obj-y += gdbstub.o
  obj-$(CONFIG_SOFTMMU) += machine.o arch_memory_mapping.o arch_dump.o monitor.o
  obj-$(CONFIG_KVM) += kvm.o hyperv.o



Modulo my final question for patch 20,

Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH 20/22] target/i386: add the tcg_enabled() in target/i386/

2017-07-03 Thread Richard Henderson


On 07/03/2017 09:34 AM, Paolo Bonzini wrote:

@@ -215,10 +215,12 @@ void breakpoint_handler(CPUState *cs)
  if (cs->watchpoint_hit) {
  if (cs->watchpoint_hit->flags & BP_CPU) {
  cs->watchpoint_hit = NULL;
-if (check_hw_breakpoints(env, false)) {
-raise_exception(env, EXCP01_DB);
-} else {
-cpu_loop_exit_noexc(cs);
+if (tcg_enabled()) {
+if (check_hw_breakpoints(env, false)) {
+raise_exception(env, EXCP01_DB);
+} else {
+cpu_loop_exit_noexc(cs);
+}


This seems like an odd place for the tcg_enabled check.  It seems like it 
should be much higher in the if/call chain.


Why are we doing all these bp checks only to disable the final raising of an 
exception?


Indeed, what in bpt_helper.c needs to be compiled in when !tcg_enabled?


r~

Re: [Qemu-devel] [PATCH 18/22] target/i386: split cpu_set_mxcsr() and make cpu_set_fpuc() inline

2017-07-03 Thread Richard Henderson


On 07/03/2017 09:34 AM, Paolo Bonzini wrote:

+static inline void cpu_set_mxcsr(CPUX86State *env, uint32_t mxcsr)
+{
+env->mxcsr = mxcsr;
+if (tcg_enabled()) {
+tcg_update_mxcsr(env);


I'd prefer update_mxcsr_status for this new name.


+}
+}
+
+static inline void cpu_set_fpuc(CPUX86State *env, uint16_t fpuc)
+{
+ env->fpuc = fpuc;
+ if (tcg_enabled()) {
+update_fp_status(env);
+ }
+}


to match this, and to avoid the implication that it's a function in tcg/.


r~

Re: [Qemu-devel] [PATCH 19/22] target/i386: move TLB refill function out of helper.c

2017-07-03 Thread Richard Henderson


On 07/03/2017 09:34 AM, Paolo Bonzini wrote:

This function calls tlb_set_page_with_attrs, which is not available
when TCG is disabled.  Move it to excp_helper.c.

Signed-off-by: Paolo Bonzini
---
  target/i386/excp_helper.c | 343 +
  target/i386/helper.c  | 344 +-
  2 files changed, 344 insertions(+), 343 deletions(-)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH 17/22] target/i386: make cpu_get_fp80()/cpu_set_fp80() static

2017-07-03 Thread Richard Henderson


On 07/03/2017 09:34 AM, Paolo Bonzini wrote:

From: Yang Zhong

Move cpu_get_fp80()/cpu_set_fp80() from fpu_helper.c to
machine.c because fpu_helper.c will be disabled if tcg is
disabled in the build.

Signed-off-by: Yang Zhong
Signed-off-by: Paolo Bonzini
---
  target/i386/cpu.h|  2 --
  target/i386/fpu_helper.c | 18 --
  target/i386/machine.c| 18 ++
  3 files changed, 18 insertions(+), 20 deletions(-)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH 16/22] target/i386: move cpu_sync_bndcs_hflags() function

2017-07-03 Thread Richard Henderson


On 07/03/2017 09:34 AM, Paolo Bonzini wrote:

@@ -1302,10 +1332,12 @@ void cpu_report_tpr_access(CPUX86State *env, TPRAccess 
access)
  env->tpr_access_type = access;
  
  cpu_interrupt(cs, CPU_INTERRUPT_TPR);

-} else {
+} else if (tcg_enabled()) {
  cpu_restore_state(cs, cs->mem_io_pc);
  
  apic_handle_tpr_access_report(cpu->apic_state, env->eip, access);

+} else {
+abort();
  }
  }
  #endif /* !CONFIG_USER_ONLY */


This hunk belongs in another patch.


r~

Re: [Qemu-devel] [PATCH 15/22] tcg: add the CONFIG_TCG into Makefiles

2017-07-03 Thread Richard Henderson


On 07/03/2017 09:34 AM, Paolo Bonzini wrote:

From: Yang Zhong

Add the CONFIG_TCG for frontend and backend's files in the related
Makefiles.

Signed-off-by: Yang Zhong
Signed-off-by: Paolo Bonzini
---
  Makefile.target | 4 ++--
  accel/Makefile.objs | 2 +-
  2 files changed, 3 insertions(+), 3 deletions(-)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH 13/22] exec: elide calls to tb_lock and tb_unlock

2017-07-03 Thread Richard Henderson


On 07/03/2017 09:34 AM, Paolo Bonzini wrote:

Adding assertions fixes link errors.

Signed-off-by: Paolo Bonzini
---
  exec.c | 3 +++
  1 file changed, 3 insertions(+)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH 14/22] tcg: add CONFIG_TCG guards in headers

2017-07-03 Thread Richard Henderson


On 07/03/2017 09:34 AM, Paolo Bonzini wrote:

--- a/include/exec/helper-proto.h
+++ b/include/exec/helper-proto.h
@@ -28,7 +28,9 @@ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), 
dh_ctype(t3), \
  
  #include "helper.h"

  #include "trace/generated-helpers.h"
+#ifdef CONFIG_TCG
  #include "tcg-runtime.h"
+#endif


Do we really want to define any of the helpers if !CONFIG_TCG?
Perhaps it's the user of this header that needs adjustment.


r~

Re: [Qemu-devel] [PATCH 12/22] tcg: move tb_lock out of translate-all.h

2017-07-03 Thread Richard Henderson


On 07/03/2017 09:34 AM, Paolo Bonzini wrote:

Signed-off-by: Paolo Bonzini
---
  bsd-user/main.c | 1 -
  include/exec/exec-all.h | 4 
  tcg/tcg.h   | 4 
  3 files changed, 4 insertions(+), 5 deletions(-)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH 1/7] vmgenid: replace x-write-pointer-available hack

2017-07-03 Thread Michael S. Tsirkin

On Mon, Jul 03, 2017 at 03:50:52PM -0300, Eduardo Habkost wrote:
> On Mon, Jul 03, 2017 at 09:38:52PM +0300, Michael S. Tsirkin wrote:
> > On Thu, Jun 29, 2017 at 03:23:04PM +0200, Marc-André Lureau wrote:
> > > This compat property sole function is to prevent the device from being
> > > instantiated. Instead of requiring an extra compat property, check if
> > > fw_cfg has DMA enabled.
> > > 
> > > This has the additional benefit of handling other cases properly, like:
> > > 
> > >   $ qemu-system-x86_64 -device vmgenid -machine none
> > >   qemu-system-x86_64: -device vmgenid: vmgenid requires DMA write support 
> > > in fw_cfg, which this machine type does not provide
> > >   $ qemu-system-x86_64 -device vmgenid -machine pc-i440fx-2.9 -global 
> > > fw_cfg.dma_enabled=off
> > >   qemu-system-x86_64: -device vmgenid: vmgenid requires DMA write support 
> > > in fw_cfg, which this machine type does not provide
> > >   $ qemu-system-x86_64 -device vmgenid -machine pc-i440fx-2.6 -global 
> > > fw_cfg.dma_enabled=on
> > >   [boots normally]
> > > 
> > > Suggested-by: Eduardo Habkost 
> > > Signed-off-by: Marc-André Lureau 
> > 
> > It's a nice cleanup, but I suspect we need to first implement
> > a framework for initialization ordering. I don't much like it
> > that we are adding more dependencies to the current bag of hacks.
> 
> I agree we should address this, but in this case there's no need to
> introduce new mechanisms for initialization ordering if we just check
> the dependencies on machine_done notifier or acpi_setup() (which is
> called by machine_done).

I guess what should fail is attempt to register a writeable blob.
This sounds reasonable.

> -- 
> Eduardo

Re: [Qemu-devel] [PATCH 10/22] vapic: use tcg_enabled

2017-07-03 Thread Richard Henderson


On 07/03/2017 09:34 AM, Paolo Bonzini wrote:

Signed-off-by: Paolo Bonzini
---
  hw/i386/kvmvapic.c | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH 09/22] monitor: disable "info jit" and "info opcount" if !TCG

2017-07-03 Thread Richard Henderson


On 07/03/2017 09:34 AM, Paolo Bonzini wrote:

Signed-off-by: Paolo Bonzini
---
  accel/tcg/translate-all.c | 5 +
  hmp-commands-info.hx  | 4 
  monitor.c | 2 ++
  3 files changed, 11 insertions(+)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH 11/22] tcg: add the tcg-stub.c file into accel/stubs/

2017-07-03 Thread Richard Henderson


On 07/03/2017 09:34 AM, Paolo Bonzini wrote:

From: Yang Zhong

If tcg is disabled, the functions in tcg-stub.c file will be called.
This file is target-independent file, do not include any platform
related stub functions into this file.

Signed-off-by: Yang Zhong
Signed-off-by: Paolo Bonzini
---
  accel/stubs/Makefile.objs |  1 +
  accel/stubs/tcg-stub.c| 22 ++
  2 files changed, 23 insertions(+)
  create mode 100644 accel/stubs/tcg-stub.c


Reviewed-by: Richard Henderson 


r~

[Qemu-devel] [PULL 21/21] i386/acpi: update expected acpi files

2017-07-03 Thread Michael S. Tsirkin

We dropped some dead code, update extected table binaries.

Fixes: 4d7e7f2702912 ("hw/acpi: remove dead acpi code")
Signed-off-by: Michael S. Tsirkin 
---
 tests/acpi-test-data/q35/DSDT| Bin 7824 -> 7782 bytes
 tests/acpi-test-data/q35/DSDT.bridge | Bin 7841 -> 7799 bytes
 tests/acpi-test-data/q35/DSDT.cphp   | Bin 8287 -> 8245 bytes
 tests/acpi-test-data/q35/DSDT.ipmibt | Bin 7899 -> 7857 bytes
 tests/acpi-test-data/q35/DSDT.memhp  | Bin 9189 -> 9147 bytes
 5 files changed, 0 insertions(+), 0 deletions(-)

diff --git a/tests/acpi-test-data/q35/DSDT b/tests/acpi-test-data/q35/DSDT
index 
0dccad439b8e8e00b403c8d290a89630c4329d45..a6138c829142265255ac6f7bedd44757e944eb2f
 100644
GIT binary patch
delta 22
dcmbPW`^<*RCDc*MhWdTt92V(#L

delta 64
zcmaE6Gr^Y2CDw)R87tJV5j)#h5+Z_5Jql>bzD4Pwi6>a&pO8FMsA?E
PlS6>BrxVAbzD4Pwi6>a&pO8FMsA?E
PlS6>BrxVAbzD4Pwi6>a&pO8FMsA?E
PlS6>BrxVA$rHpY$rx;o^_1Tjod(S
PCx-xMPbZFzdCqbGX)_Wf

diff --git a/tests/acpi-test-data/q35/DSDT.memhp 
b/tests/acpi-test-data/q35/DSDT.memhp
index 
bdbefd47a5398ed96498b77bfb6a74f7ea638db1..7341c405bfb7dd21b4ec5df92411963cdf894c4d
 100644
GIT binary patch
delta 22
dcmaFrzT2J4CD)QvNJlmS`p2U`FD

delta 64
zcmdn({?whzCDbzD4Pwi6>a&pO8FMsA?E
PlS6>BrxVA

Re: [Qemu-devel] [PATCH 2/4] xen/mapcache: add an ability to create dummy mappings

2017-07-03 Thread Igor Druzhinin

On 01/07/17 01:06, Stefano Stabellini wrote:
> On Fri, 30 Jun 2017, Igor Druzhinin wrote:
>> Dummys are simple anonymous mappings that are placed instead
>> of regular foreign mappings in certain situations when we need
>> to postpone the actual mapping but still have to give a
>> memory region to QEMU to play with.
>>
>> This is planned to be used for restore on Xen.
>>
>> Signed-off-by: Igor Druzhinin 
>>
>> ---
>>  hw/i386/xen/xen-mapcache.c | 36 
>>  1 file changed, 28 insertions(+), 8 deletions(-)
>>
>> diff --git a/hw/i386/xen/xen-mapcache.c b/hw/i386/xen/xen-mapcache.c
>> index e60156c..05050de 100644
>> --- a/hw/i386/xen/xen-mapcache.c
>> +++ b/hw/i386/xen/xen-mapcache.c
>> @@ -150,7 +150,8 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f, void 
>> *opaque)
>>  
>>  static void xen_remap_bucket(MapCacheEntry *entry,
>>   hwaddr size,
>> - hwaddr address_index)
>> + hwaddr address_index,
>> + bool dummy)
>>  {
>>  uint8_t *vaddr_base;
>>  xen_pfn_t *pfns;
>> @@ -177,11 +178,25 @@ static void xen_remap_bucket(MapCacheEntry *entry,
>>  pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + 
>> i;
>>  }
>>  
>> -vaddr_base = xenforeignmemory_map(xen_fmem, xen_domid, 
>> PROT_READ|PROT_WRITE,
>> -  nb_pfn, pfns, err);
>> -if (vaddr_base == NULL) {
>> -perror("xenforeignmemory_map");
>> -exit(-1);
>> +if (!dummy) {
>> +vaddr_base = xenforeignmemory_map(xen_fmem, xen_domid,
>> +   PROT_READ|PROT_WRITE,
>> +   nb_pfn, pfns, err);
>> +if (vaddr_base == NULL) {
>> +perror("xenforeignmemory_map");
>> +exit(-1);
>> +}
>> +} else {
>> +/*
>> + * We create dummy mappings where we are unable to create a foreign
>> + * mapping immediately due to certain circumstances (i.e. on resume 
>> now)
>> + */
>> +vaddr_base = mmap(NULL, size, PROT_READ|PROT_WRITE,
>> +  MAP_ANON|MAP_SHARED, -1, 0);
>> +if (vaddr_base == NULL) {
>> +perror("mmap");
>> +exit(-1);
>> +}
> 
> For our sanity in debugging this in the future, I think it's best if we
> mark this mapcache entry as "dummy". Since we are at it, we could turn
> the lock field of MapCacheEntry into a flag field and #define LOCK as
> (1<<0) and DUMMY as (1<<1). Please do that as a separate patch.
>

Unfortunately, lock field is a reference counter (or at least it looks
like according to the source code). It seems to me that it's technically
possible to have one region locked from several places in QEMU code. For
that reason, I'd like to introduce a separate field - something like
uint8_t flags.

Igor

>>>  }
>>  
>>  entry->vaddr_base = vaddr_base;
>> @@ -211,6 +226,7 @@ static uint8_t *xen_map_cache_unlocked(hwaddr phys_addr, 
>> hwaddr size,
>>  hwaddr cache_size = size;
>>  hwaddr test_bit_size;
>>  bool translated = false;
>> +bool dummy = false;
>>  
>>  tryagain:
>>  address_index  = phys_addr >> MCACHE_BUCKET_SHIFT;
>> @@ -262,14 +278,14 @@ tryagain:
>>  if (!entry) {
>>  entry = g_malloc0(sizeof (MapCacheEntry));
>>  pentry->next = entry;
>> -xen_remap_bucket(entry, cache_size, address_index);
>> +xen_remap_bucket(entry, cache_size, address_index, dummy);
>>  } else if (!entry->lock) {
>>  if (!entry->vaddr_base || entry->paddr_index != address_index ||
>>  entry->size != cache_size ||
>>  !test_bits(address_offset >> XC_PAGE_SHIFT,
>>  test_bit_size >> XC_PAGE_SHIFT,
>>  entry->valid_mapping)) {
>> -xen_remap_bucket(entry, cache_size, address_index);
>> +xen_remap_bucket(entry, cache_size, address_index, dummy);
>>  }
>>  }
>>  
>> @@ -282,6 +298,10 @@ tryagain:
>>  translated = true;
>>  goto tryagain;
>>  }
>> +if (!dummy && runstate_check(RUN_STATE_INMIGRATE)) {
>> +dummy = true;
>> +goto tryagain;
>> +}
>>  trace_xen_map_cache_return(NULL);
>>  return NULL;
>>  }
>> -- 
>> 2.7.4
>>

Re: [Qemu-devel] [PATCH 06/22] tcg: move page_size_init() function

2017-07-03 Thread Richard Henderson


On 07/03/2017 09:34 AM, Paolo Bonzini wrote:

From: Yang Zhong

translate-all.c will be disabled if tcg is disabled in the build,
so page_size_init() function and related variables will be moved
to exec.c file.

Signed-off-by: Yang Zhong
Signed-off-by: Paolo Bonzini
---
  accel/tcg/translate-all.c | 18 --
  exec.c| 20 
  2 files changed, 20 insertions(+), 18 deletions(-)


Reviewed-by: Richard Henderson 


r~

[Qemu-devel] [PULL 18/21] vhost-user: unregister slave req handler at cleanup time

2017-07-03 Thread Michael S. Tsirkin

From: Maxime Coquelin 

If the backend sends a request just before closing the socket,
the aio dispatcher might schedule its reading after the vhost
device has been cleaned, leading to a NULL pointer dereference
in slave_read();

vhost_user_cleanup() already closes the socket but it is not
enough, the handler has to be unregistered.

Signed-off-by: Maxime Coquelin 
Reviewed-by: Marc-André Lureau 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/virtio/vhost-user.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 958ee09..2203011 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -779,6 +779,7 @@ static int vhost_user_cleanup(struct vhost_dev *dev)
 
 u = dev->opaque;
 if (u->slave_fd >= 0) {
+qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
 close(u->slave_fd);
 u->slave_fd = -1;
 }
-- 
MST

Re: [Qemu-devel] [PATCH 08/22] tcg: make tcg_allowed global

2017-07-03 Thread Richard Henderson


On 07/03/2017 09:34 AM, Paolo Bonzini wrote:

From: Yang Zhong

Change the tcg_enabled() and make sure user build still enable tcg
even x86 softmmu disable tcg.

Signed-off-by: Yang Zhong
Signed-off-by: Paolo Bonzini
---
  accel/tcg/cpu-exec-common.c | 2 ++
  accel/tcg/tcg-all.c | 1 -
  accel/tcg/translate-all.c   | 6 +-
  include/qemu-common.h   | 7 ++-
  4 files changed, 9 insertions(+), 7 deletions(-)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH 07/22] tcg: tcg_handle_interrupt() function

2017-07-03 Thread Richard Henderson


On 07/03/2017 09:34 AM, Paolo Bonzini wrote:

From: Yang Zhong

Move tcg_handle_interrupt() from translate-common.c to
accel/tcg/tcg-all.c.

Signed-off-by: Yang Zhong
Signed-off-by: Paolo Bonzini
---
  accel/tcg/Makefile.objs  |  2 +-
  accel/tcg/tcg-all.c  | 32 +
  accel/tcg/translate-common.c | 56 
  qom/cpu.c|  2 ++
  4 files changed, 35 insertions(+), 57 deletions(-)
  delete mode 100644 accel/tcg/translate-common.c


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH 05/22] vl: add tcg_enabled() for tcg related code

2017-07-03 Thread Richard Henderson


On 07/03/2017 09:34 AM, Paolo Bonzini wrote:

From: Yang Zhong

Need to disable the tcg related code in the vl.c if the
disable-tcg option is added into ./configure command.

Signed-off-by: Yang Zhong
Signed-off-by: Paolo Bonzini
---
  vl.c | 8 +++-
  1 file changed, 7 insertions(+), 1 deletion(-)


Reviewed-by: Richard Henderson 


r~

[Qemu-devel] [PULL 16/21] intel_iommu: fix migration breakage on mr switch

2017-07-03 Thread Michael S. Tsirkin

From: Peter Xu 

Migration is broken after the vfio integration work:

qemu-kvm: AHCI: Failed to start FIS receive engine: bad FIS receive buffer 
address
qemu-kvm: Failed to load ich9_ahci:ahci
qemu-kvm: error while loading state for instance 0x0 of device 
':00:1f.2/ich9_ahci'
qemu-kvm: load of migration failed: Operation not permitted

The problem is that vfio work introduced dynamic memory region
switching (actually it is also used for future PT mode), and this memory
region layout is not properly delivered to destination when migration
happens. Solution is to rebuild the layout in post_load.

Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1459906
Fixes: 558e0024 ("intel_iommu: allow dynamic switch of IOMMU region")
Reviewed-by: Jason Wang 
Signed-off-by: Peter Xu 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/i386/intel_iommu.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 2ddf3bd..88dc042 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2337,11 +2337,26 @@ static void vtd_iommu_notify_flag_changed(MemoryRegion 
*iommu,
 }
 }
 
+static int vtd_post_load(void *opaque, int version_id)
+{
+IntelIOMMUState *iommu = opaque;
+
+/*
+ * Memory regions are dynamically turned on/off depending on
+ * context entry configurations from the guest. After migration,
+ * we need to make sure the memory regions are still correct.
+ */
+vtd_switch_address_space_all(iommu);
+
+return 0;
+}
+
 static const VMStateDescription vtd_vmstate = {
 .name = "iommu-intel",
 .version_id = 1,
 .minimum_version_id = 1,
 .priority = MIG_PRI_IOMMU,
+.post_load = vtd_post_load,
 .fields = (VMStateField[]) {
 VMSTATE_UINT64(root, IntelIOMMUState),
 VMSTATE_UINT64(intr_root, IntelIOMMUState),
-- 
MST

Re: [Qemu-devel] [PATCH 4/4] target-m68k: add fscale, fgetman and fgetexp

2017-07-03 Thread Laurent Vivier

Le 03/07/2017 à 21:26, Richard Henderson a écrit :
> On 07/03/2017 09:23 AM, Laurent Vivier wrote:
>> +void HELPER(fgetman)(CPUM68KState *env, FPReg *res, FPReg *val)
>> +{
>> +if (floatx80_is_infinity(val->d)) {
>> +res->d = floatx80_default_nan(NULL);
>> +/* FIXME: set the OPERR bit int he FPSR */
>> +return;
>> +}
>> +if (floatx80_is_zero(val->d) ||
>> +floatx80_is_any_nan(val->d)) {
>> +*res = *val;
>> +return;
>> +}
>> +
>> +res->l.upper = (val->l.upper & 0x8000) | 0x3fff;
>> +if (floatx80_is_zero_or_denormal(val->d)) {
>> +res->l.lower = val->l.lower << 1;
> 
> Surely you have to do more than this for denormals?  There may be more
> than one leading zero bit in the mant.  Do you actually need to
> re-normalize?  Or does real hardware produce a so-called "unnormal" in
> this situation?

I don't know. Do you have test values I can try on real hardware to know?

I've tried:

fmove.x #0x56789ABCDEF12345,%fp0
fgetman.x %fp0,%fp6

fp01.135643728339893804160017756766172e-4932(raw
0x56789abcdef12345)
fp61.35108837373479679699883604 (raw
0x3fffacf13579bde2468a)

fmove.x #0x86789ABCDEF12345,%fp0
fgetman.x %fp0,%fp6

fp01.7660380676734113365842698475140006e-4932   (raw
0x86789abcdef12345)
fp61.05054418686739839849941802 (raw
0x3fff86789abcdef12345)

fmove.x #0x0ABCDEF12345,%fp0
fgetman.x %fp0,%fp6
fp02.1518178707571747286191852003521627e-4938   (raw
0x0abcdef12345)
fp61.34103012886691431049257516861  (raw
0x3fffabcdef123450)

So I guess the mantissa must be shifted to left until we have a 1 in the
explicit integer part bit?

Thanks,
Laurent

[Qemu-devel] [PULL 13/21] fw_cfg: don't map the fw_cfg IO ports in fw_cfg_io_realize()

2017-07-03 Thread Michael S. Tsirkin

From: Mark Cave-Ayland 

As indicated by Laszlo it is a QOM bug for the realize() method to actually
map the device. Set up the IO regions within fw_cfg_io_realize() and defer
the mapping with sysbus_add_io() to the caller, as already done in
fw_cfg_init_mem_wide().

This makes the iobase and dma_iobase properties now obsolete so they can be
removed.

Signed-off-by: Mark Cave-Ayland 
Reviewed-by: Laszlo Ersek 
Reviewed-by: Eduardo Habkost 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Tested-by: Gabriel Somlo 
---
 hw/nvram/fw_cfg.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
index 316fca9..4e4f71a 100644
--- a/hw/nvram/fw_cfg.c
+++ b/hw/nvram/fw_cfg.c
@@ -96,7 +96,6 @@ struct FWCfgIoState {
 /*< public >*/
 
 MemoryRegion comb_iomem;
-uint32_t iobase, dma_iobase;
 };
 
 struct FWCfgMemState {
@@ -936,24 +935,30 @@ FWCfgState *fw_cfg_init_io_dma(uint32_t iobase, uint32_t 
dma_iobase,
 AddressSpace *dma_as)
 {
 DeviceState *dev;
+SysBusDevice *sbd;
+FWCfgIoState *ios;
 FWCfgState *s;
 uint32_t version = FW_CFG_VERSION;
 bool dma_requested = dma_iobase && dma_as;
 
 dev = qdev_create(NULL, TYPE_FW_CFG_IO);
-qdev_prop_set_uint32(dev, "iobase", iobase);
-qdev_prop_set_uint32(dev, "dma_iobase", dma_iobase);
 if (!dma_requested) {
 qdev_prop_set_bit(dev, "dma_enabled", false);
 }
 
 fw_cfg_init1(dev);
+
+sbd = SYS_BUS_DEVICE(dev);
+ios = FW_CFG_IO(dev);
+sysbus_add_io(sbd, iobase, &ios->comb_iomem);
+
 s = FW_CFG(dev);
 
 if (s->dma_enabled) {
 /* 64 bits for the address field */
 s->dma_as = dma_as;
 s->dma_addr = 0;
+sysbus_add_io(sbd, dma_iobase, &s->dma_iomem);
 
 version |= FW_CFG_VERSION_DMA;
 }
@@ -1059,8 +1064,6 @@ static void fw_cfg_file_slots_allocate(FWCfgState *s, 
Error **errp)
 }
 
 static Property fw_cfg_io_properties[] = {
-DEFINE_PROP_UINT32("iobase", FWCfgIoState, iobase, -1),
-DEFINE_PROP_UINT32("dma_iobase", FWCfgIoState, dma_iobase, -1),
 DEFINE_PROP_BOOL("dma_enabled", FWCfgIoState, parent_obj.dma_enabled,
  true),
 DEFINE_PROP_UINT16("x-file-slots", FWCfgIoState, parent_obj.file_slots,
@@ -1071,7 +1074,6 @@ static Property fw_cfg_io_properties[] = {
 static void fw_cfg_io_realize(DeviceState *dev, Error **errp)
 {
 FWCfgIoState *s = FW_CFG_IO(dev);
-SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
 Error *local_err = NULL;
 
 fw_cfg_file_slots_allocate(FW_CFG(s), &local_err);
@@ -1085,13 +1087,11 @@ static void fw_cfg_io_realize(DeviceState *dev, Error 
**errp)
  * of the i/o region used is FW_CFG_CTL_SIZE */
 memory_region_init_io(&s->comb_iomem, OBJECT(s), &fw_cfg_comb_mem_ops,
   FW_CFG(s), "fwcfg", FW_CFG_CTL_SIZE);
-sysbus_add_io(sbd, s->iobase, &s->comb_iomem);
 
 if (FW_CFG(s)->dma_enabled) {
 memory_region_init_io(&FW_CFG(s)->dma_iomem, OBJECT(s),
   &fw_cfg_dma_mem_ops, FW_CFG(s), "fwcfg.dma",
   sizeof(dma_addr_t));
-sysbus_add_io(sbd, s->dma_iobase, &FW_CFG(s)->dma_iomem);
 }
 }
 
-- 
MST

[Qemu-devel] [PULL 20/21] virtio-net: fix tx queue size for !vhost-user

2017-07-03 Thread Michael S. Tsirkin

Current code segfaults when no nic peer is specified.
Fix it up - fall back to default queue size.

Fixes: 9b02e1618cf26a ("virtio-net: enable configurable tx queue size")
Cc: Wei Wang 
Signed-off-by: Michael S. Tsirkin 
---
 hw/net/virtio-net.c | 28 
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index a1fc0db..5630a9e 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -498,6 +498,24 @@ static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int 
mergeable_rx_bufs,
 }
 }
 
+static int virtio_net_max_tx_queue_size(VirtIONet *n)
+{
+NetClientState *peer = n->nic_conf.peers.ncs[0];
+
+/*
+ * Backends other than vhost-user don't support max queue size.
+ */
+if (!peer) {
+return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
+}
+
+if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
+return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
+}
+
+return VIRTQUEUE_MAX_SIZE;
+}
+
 static int peer_attach(VirtIONet *n, int index)
 {
 NetClientState *nc = qemu_get_subqueue(n->nic, index);
@@ -1964,14 +1982,8 @@ static void virtio_net_device_realize(DeviceState *dev, 
Error **errp)
 error_report("Defaulting to \"bh\"");
 }
 
-/*
- * Currently, backends other than vhost-user don't support 1024 queue
- * size.
- */
-if (n->net_conf.tx_queue_size == VIRTQUEUE_MAX_SIZE &&
-n->nic_conf.peers.ncs[0]->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
-n->net_conf.tx_queue_size = VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
-}
+n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
+n->net_conf.tx_queue_size);
 
 for (i = 0; i < n->max_queues; i++) {
 virtio_net_add_queue(n, i);
-- 
MST

[Qemu-devel] [PULL 19/21] tests: Add unit tests for the VM Generation ID feature

2017-07-03 Thread Michael S. Tsirkin

From: Ben Warren 

The following tests are implemented:
* test that a GUID passed in by command line is propagated to the guest.
  Read the GUID from guest memory
* test that the "auto" argument to the GUID generates a valid GUID, as
  seen by the guest.
* test that a GUID passed in can be queried from the monitor

  This patch is loosely based on a previous patch from:
  Gal Hammer   and Igor Mammedov 

Signed-off-by: Ben Warren 
Reviewed-by: Igor Mammedov 
Reviewed-by: Marc-André Lureau 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 tests/vmgenid-test.c   | 203 +
 tests/Makefile.include |   2 +
 2 files changed, 205 insertions(+)
 create mode 100644 tests/vmgenid-test.c

diff --git a/tests/vmgenid-test.c b/tests/vmgenid-test.c
new file mode 100644
index 000..e7ba38c
--- /dev/null
+++ b/tests/vmgenid-test.c
@@ -0,0 +1,203 @@
+/*
+ * QTest testcase for VM Generation ID
+ *
+ * Copyright (c) 2016 Red Hat, Inc.
+ * Copyright (c) 2017 Skyport Systems
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include 
+#include 
+#include 
+#include "qemu/osdep.h"
+#include "qemu/bitmap.h"
+#include "qemu/uuid.h"
+#include "hw/acpi/acpi-defs.h"
+#include "acpi-utils.h"
+#include "libqtest.h"
+
+#define VGID_GUID "324e6eaf-d1d1-4bf6-bf41-b9bb6c91fb87"
+#define VMGENID_GUID_OFFSET 40   /* allow space for
+  * OVMF SDT Header Probe Supressor
+  */
+#define RSDP_ADDR_INVALID 0x10 /* RSDP must be below this address */
+#define RSDP_SLEEP_US 10   /* Sleep for 100ms between tries */
+#define RSDP_TRIES_MAX100  /* Max total time is 10 seconds */
+
+typedef struct {
+AcpiTableHeader header;
+gchar name_op;
+gchar vgia[4];
+gchar val_op;
+uint32_t vgia_val;
+} QEMU_PACKED VgidTable;
+
+static uint32_t acpi_find_vgia(void)
+{
+uint32_t rsdp_offset;
+uint32_t guid_offset = 0;
+AcpiRsdpDescriptor rsdp_table;
+uint32_t rsdt;
+AcpiRsdtDescriptorRev1 rsdt_table;
+int tables_nr;
+uint32_t *tables;
+AcpiTableHeader ssdt_table;
+VgidTable vgid_table;
+int i;
+
+/* Tables may take a short time to be set up by the guest */
+for (i = 0; i < RSDP_TRIES_MAX; i++) {
+rsdp_offset = acpi_find_rsdp_address();
+if (rsdp_offset < RSDP_ADDR_INVALID) {
+break;
+}
+g_usleep(RSDP_SLEEP_US);
+}
+g_assert_cmphex(rsdp_offset, <, RSDP_ADDR_INVALID);
+
+acpi_parse_rsdp_table(rsdp_offset, &rsdp_table);
+
+rsdt = rsdp_table.rsdt_physical_address;
+/* read the header */
+ACPI_READ_TABLE_HEADER(&rsdt_table, rsdt);
+ACPI_ASSERT_CMP(rsdt_table.signature, "RSDT");
+
+/* compute the table entries in rsdt */
+tables_nr = (rsdt_table.length - sizeof(AcpiRsdtDescriptorRev1)) /
+sizeof(uint32_t);
+g_assert_cmpint(tables_nr, >, 0);
+
+/* get the addresses of the tables pointed by rsdt */
+tables = g_new0(uint32_t, tables_nr);
+ACPI_READ_ARRAY_PTR(tables, tables_nr, rsdt);
+
+for (i = 0; i < tables_nr; i++) {
+ACPI_READ_TABLE_HEADER(&ssdt_table, tables[i]);
+if (!strncmp((char *)ssdt_table.oem_table_id, "VMGENID", 7)) {
+/* the first entry in the table should be VGIA
+ * That's all we need
+ */
+ACPI_READ_FIELD(vgid_table.name_op, tables[i]);
+g_assert(vgid_table.name_op == 0x08);  /* name */
+ACPI_READ_ARRAY(vgid_table.vgia, tables[i]);
+g_assert(memcmp(vgid_table.vgia, "VGIA", 4) == 0);
+ACPI_READ_FIELD(vgid_table.val_op, tables[i]);
+g_assert(vgid_table.val_op == 0x0C);  /* dword */
+ACPI_READ_FIELD(vgid_table.vgia_val, tables[i]);
+/* The GUID is written at a fixed offset into the fw_cfg file
+ * in order to implement the "OVMF SDT Header probe suppressor"
+ * see docs/specs/vmgenid.txt for more details
+ */
+guid_offset = vgid_table.vgia_val + VMGENID_GUID_OFFSET;
+break;
+}
+}
+g_free(tables);
+return guid_offset;
+}
+
+static void read_guid_from_memory(QemuUUID *guid)
+{
+uint32_t vmgenid_addr;
+int i;
+
+vmgenid_addr = acpi_find_vgia();
+g_assert(vmgenid_addr);
+
+/* Read the GUID directly from guest memory */
+for (i = 0; i < 16; i++) {
+guid->data[i] = readb(vmgenid_addr + i);
+}
+/* The GUID is in little-endian format in the guest, while QEMU
+ * uses big-endian.  Swap after reading.
+ */
+qemu_uuid_bswap(guid);
+}
+
+static void read_guid_from_monitor(QemuUUID *guid)
+{
+QDict *rsp, *rsp_ret;
+const char *guid_str;
+
+rsp = qmp("{ 'execute': 'query-vm-generation-id' }");
+if (qdict_haskey(rsp, "return")) {
+rsp_ret = qdict_get_q

[Qemu-devel] [PULL 14/21] fw_cfg: move setting of FW_CFG_VERSION_DMA bit to fw_cfg_init1()

2017-07-03 Thread Michael S. Tsirkin

From: Mark Cave-Ayland 

The setting of the FW_CFG_VERSION_DMA bit is the same across both the
TYPE_FW_CFG_MEM and TYPE_FW_CFG_IO devices, so unify the logic in
fw_cfg_init1().

Signed-off-by: Mark Cave-Ayland 
Reviewed-by: Laszlo Ersek 
Reviewed-by: Eduardo Habkost 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Tested-by: Gabriel Somlo 
---
 hw/nvram/fw_cfg.c | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
index 4e4f71a..99bdbc2 100644
--- a/hw/nvram/fw_cfg.c
+++ b/hw/nvram/fw_cfg.c
@@ -913,6 +913,7 @@ static void fw_cfg_init1(DeviceState *dev)
 {
 FWCfgState *s = FW_CFG(dev);
 MachineState *machine = MACHINE(qdev_get_machine());
+uint32_t version = FW_CFG_VERSION;
 
 assert(!object_resolve_path(FW_CFG_PATH, NULL));
 
@@ -927,6 +928,12 @@ static void fw_cfg_init1(DeviceState *dev)
 fw_cfg_bootsplash(s);
 fw_cfg_reboot(s);
 
+if (s->dma_enabled) {
+version |= FW_CFG_VERSION_DMA;
+}
+
+fw_cfg_add_i32(s, FW_CFG_ID, version);
+
 s->machine_ready.notify = fw_cfg_machine_ready;
 qemu_add_machine_init_done_notifier(&s->machine_ready);
 }
@@ -938,7 +945,6 @@ FWCfgState *fw_cfg_init_io_dma(uint32_t iobase, uint32_t 
dma_iobase,
 SysBusDevice *sbd;
 FWCfgIoState *ios;
 FWCfgState *s;
-uint32_t version = FW_CFG_VERSION;
 bool dma_requested = dma_iobase && dma_as;
 
 dev = qdev_create(NULL, TYPE_FW_CFG_IO);
@@ -959,12 +965,8 @@ FWCfgState *fw_cfg_init_io_dma(uint32_t iobase, uint32_t 
dma_iobase,
 s->dma_as = dma_as;
 s->dma_addr = 0;
 sysbus_add_io(sbd, dma_iobase, &s->dma_iomem);
-
-version |= FW_CFG_VERSION_DMA;
 }
 
-fw_cfg_add_i32(s, FW_CFG_ID, version);
-
 return s;
 }
 
@@ -980,7 +982,6 @@ FWCfgState *fw_cfg_init_mem_wide(hwaddr ctl_addr,
 DeviceState *dev;
 SysBusDevice *sbd;
 FWCfgState *s;
-uint32_t version = FW_CFG_VERSION;
 bool dma_requested = dma_addr && dma_as;
 
 dev = qdev_create(NULL, TYPE_FW_CFG_MEM);
@@ -1001,11 +1002,8 @@ FWCfgState *fw_cfg_init_mem_wide(hwaddr ctl_addr,
 s->dma_as = dma_as;
 s->dma_addr = 0;
 sysbus_mmio_map(sbd, 2, dma_addr);
-version |= FW_CFG_VERSION_DMA;
 }
 
-fw_cfg_add_i32(s, FW_CFG_ID, version);
-
 return s;
 }
 
-- 
MST

Re: [Qemu-devel] [PATCH 1/4] block/qcow2: add compression_algorithm create option

2017-07-03 Thread Peter Lieven

Am 27.06.2017 um 17:04 schrieb Eric Blake:
> On 06/27/2017 09:49 AM, Peter Lieven wrote:
>
>> Before I continue, can you please give feedback on the following spec
>> change:
>>
>> diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt
>> index 80cdfd0..f1428e9 100644
>> --- a/docs/interop/qcow2.txt
>> +++ b/docs/interop/qcow2.txt
>> @@ -85,7 +85,11 @@ in the description of a field.
>>  be written to (unless for regaining
>>  consistency).
>>
>> -Bits 2-63:  Reserved (set to 0)
>> +Bit 2:  Compression format bit.  Iff this bit
> I know what this means, but spelling it "If and only if" or "When" might
> make more sense to other readers, as "Iff" is not common in English.
>
>> is set then
>> +the compression format extension MUST
>> be present
>> +and MUST be parsed and checked for
>> compatibility.
>> +
>> +Bits 3-63:  Reserved (set to 0)
>>
>>   80 -  87:  compatible_features
>>  Bitmask of compatible features. An implementation can
>> @@ -135,6 +139,7 @@ be stored. Each extension has a structure like the
>> following:
>>  0xE2792ACA - Backing file format name
>>  0x6803f857 - Feature name table
>>  0x23852875 - Bitmaps extension
>> +0xC0318300 - Compression format extension
> Now that you aren't burning 256 magic numbers, it may make sense to have
> the last two hex digits be non-zero.
>
>
>> +== Compression format extension ==
>> +
>> +The compression format extension is an optional header extension. It
>> provides
> Inline pasting created interesting wrapping, but the actual patch will
> obviously read better.
>
>> +the ability to specify the compression algorithm and compression
>> parameters
>> +that are used for compressed clusters. This new header MUST be present if
>> +the incompatible-feature bit "compression format bit" is set and MUST
>> be absent
>> +otherwise.
>> +
>> +The fields of the compression format extension are:
>> +
>> +Byte  0 - 15:  compression_format_name (padded with zeros, but not
>> +   necessarily null terminated if it has full length)
> Do we really want arbitrary names of formats, or do we want to specify
> specific algorithms (gzip, lzo, zstd) as an enum?  Which way gives us
> maximum likelihood of interoperability?
>
>> +
>> +  16:  compression_level (uint8_t)
>> +   0 = default compression level
>> +   1 = lowest compression level
>> +   x = highest compression level (the highest compression
>> +   level may vary for different compression formats)
>> +
>> + 17 - 23:  Reserved for future use, must be zero.
> Feels pretty limited - you don't have a length field for variable-length
> extension of additional parameters, but have to fit all additions in the
> next 8 bytes.  Yes, all extension headers are already paired with a
> length parameter outside of the struct, sent alongside the header magic
> number, but embedding a length directly in the header (while redundant)
> makes it easier to keep information local to the header.  See
> extra_data_size under Bitmap directory, for example.  Of course, we may
> turn those 8 bytes INTO a length field, that then describe the rest of
> the variable length parameters, but why not do it up front?
>
> If we go with an enum mapping of supported compression formats, then you
> can go into further details on exactly what extra parameters are
> supports for each algorithm; while leaving it as a free-form text string
> makes it harder to interpret what any additional payload will represent.
>

I send a V2 of the series including the update of the spec last week.

Maybe you can have a look if this version is better.


Thanks,

Peter

[Qemu-devel] [PULL 17/21] vhost: ensure vhost_ops are set before calling iotlb callback

2017-07-03 Thread Michael S. Tsirkin

From: Maxime Coquelin 

This patch fixes a crash that happens when vhost-user iommu
support is enabled and vhost-user socket is closed.

When it happens, if an IOTLB invalidation notification is sent
by the IOMMU, vhost_ops's NULL pointer is dereferenced.

Signed-off-by: Maxime Coquelin 
Reviewed-by: Marc-André Lureau 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/virtio/vhost-backend.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
index 4e31de1..cb055e8 100644
--- a/hw/virtio/vhost-backend.c
+++ b/hw/virtio/vhost-backend.c
@@ -309,7 +309,10 @@ int vhost_backend_update_device_iotlb(struct vhost_dev 
*dev,
 return -EINVAL;
 }
 
-return dev->vhost_ops->vhost_send_device_iotlb_msg(dev, &imsg);
+if (dev->vhost_ops && dev->vhost_ops->vhost_send_device_iotlb_msg)
+return dev->vhost_ops->vhost_send_device_iotlb_msg(dev, &imsg);
+
+return -ENODEV;
 }
 
 int vhost_backend_invalidate_device_iotlb(struct vhost_dev *dev,
@@ -321,7 +324,10 @@ int vhost_backend_invalidate_device_iotlb(struct vhost_dev 
*dev,
 imsg.size = len;
 imsg.type = VHOST_IOTLB_INVALIDATE;
 
-return dev->vhost_ops->vhost_send_device_iotlb_msg(dev, &imsg);
+if (dev->vhost_ops && dev->vhost_ops->vhost_send_device_iotlb_msg)
+return dev->vhost_ops->vhost_send_device_iotlb_msg(dev, &imsg);
+
+return -ENODEV;
 }
 
 int vhost_backend_handle_iotlb_msg(struct vhost_dev *dev,
-- 
MST

[Qemu-devel] [PULL 08/21] pci: Replace pci_add_capability2() with pci_add_capability()

2017-07-03 Thread Michael S. Tsirkin

From: Mao Zhongyi 

After the patch 'Make errp the last parameter of pci_add_capability()',
pci_add_capability() and pci_add_capability2() now do exactly the same.
So drop the wrapper pci_add_capability() of pci_add_capability2(), then
replace the pci_add_capability2() with pci_add_capability() everywhere.

Cc: pbonz...@redhat.com
Cc: r...@twiddle.net
Cc: ehabk...@redhat.com
Cc: m...@redhat.com
Cc: dmi...@daynix.com
Cc: jasow...@redhat.com
Cc: mar...@redhat.com
Cc: alex.william...@redhat.com
Cc: arm...@redhat.com
Suggested-by: Eduardo Habkost 
Signed-off-by: Mao Zhongyi 
Reviewed-by: Marcel Apfelbaum 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/hw/pci/pci.h |  3 ---
 hw/i386/kvm/pci-assign.c | 14 +++---
 hw/ide/ich.c |  2 +-
 hw/pci/msi.c |  2 +-
 hw/pci/msix.c|  2 +-
 hw/pci/pci.c | 20 ++--
 hw/vfio/pci.c|  6 +++---
 7 files changed, 15 insertions(+), 34 deletions(-)

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index fe52aa8..e598b09 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -358,9 +358,6 @@ pcibus_t pci_get_bar_addr(PCIDevice *pci_dev, int 
region_num);
 int pci_add_capability(PCIDevice *pdev, uint8_t cap_id,
uint8_t offset, uint8_t size,
Error **errp);
-int pci_add_capability2(PCIDevice *pdev, uint8_t cap_id,
-   uint8_t offset, uint8_t size,
-   Error **errp);
 
 void pci_del_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
 
diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
index 87dcbdd..3d60455 100644
--- a/hw/i386/kvm/pci-assign.c
+++ b/hw/i386/kvm/pci-assign.c
@@ -1254,7 +1254,7 @@ static int assigned_device_pci_cap_init(PCIDevice 
*pci_dev, Error **errp)
 dev->dev.cap_present |= QEMU_PCI_CAP_MSI;
 dev->cap.available |= ASSIGNED_DEVICE_CAP_MSI;
 /* Only 32-bit/no-mask currently supported */
-ret = pci_add_capability2(pci_dev, PCI_CAP_ID_MSI, pos, 10,
+ret = pci_add_capability(pci_dev, PCI_CAP_ID_MSI, pos, 10,
   &local_err);
 if (ret < 0) {
 error_propagate(errp, local_err);
@@ -1288,7 +1288,7 @@ static int assigned_device_pci_cap_init(PCIDevice 
*pci_dev, Error **errp)
 }
 dev->dev.cap_present |= QEMU_PCI_CAP_MSIX;
 dev->cap.available |= ASSIGNED_DEVICE_CAP_MSIX;
-ret = pci_add_capability2(pci_dev, PCI_CAP_ID_MSIX, pos, 12,
+ret = pci_add_capability(pci_dev, PCI_CAP_ID_MSIX, pos, 12,
   &local_err);
 if (ret < 0) {
 error_propagate(errp, local_err);
@@ -1318,7 +1318,7 @@ static int assigned_device_pci_cap_init(PCIDevice 
*pci_dev, Error **errp)
 if (pos) {
 uint16_t pmc;
 
-ret = pci_add_capability2(pci_dev, PCI_CAP_ID_PM, pos, PCI_PM_SIZEOF,
+ret = pci_add_capability(pci_dev, PCI_CAP_ID_PM, pos, PCI_PM_SIZEOF,
   &local_err);
 if (ret < 0) {
 error_propagate(errp, local_err);
@@ -1386,7 +1386,7 @@ static int assigned_device_pci_cap_init(PCIDevice 
*pci_dev, Error **errp)
 return -EINVAL;
 }
 
-ret = pci_add_capability2(pci_dev, PCI_CAP_ID_EXP, pos, size,
+ret = pci_add_capability(pci_dev, PCI_CAP_ID_EXP, pos, size,
   &local_err);
 if (ret < 0) {
 error_propagate(errp, local_err);
@@ -1462,7 +1462,7 @@ static int assigned_device_pci_cap_init(PCIDevice 
*pci_dev, Error **errp)
 uint32_t status;
 
 /* Only expose the minimum, 8 byte capability */
-ret = pci_add_capability2(pci_dev, PCI_CAP_ID_PCIX, pos, 8,
+ret = pci_add_capability(pci_dev, PCI_CAP_ID_PCIX, pos, 8,
   &local_err);
 if (ret < 0) {
 error_propagate(errp, local_err);
@@ -1490,7 +1490,7 @@ static int assigned_device_pci_cap_init(PCIDevice 
*pci_dev, Error **errp)
 pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VPD, 0);
 if (pos) {
 /* Direct R/W passthrough */
-ret = pci_add_capability2(pci_dev, PCI_CAP_ID_VPD, pos, 8,
+ret = pci_add_capability(pci_dev, PCI_CAP_ID_VPD, pos, 8,
   &local_err);
 if (ret < 0) {
 error_propagate(errp, local_err);
@@ -1508,7 +1508,7 @@ static int assigned_device_pci_cap_init(PCIDevice 
*pci_dev, Error **errp)
 pos += PCI_CAP_LIST_NEXT) {
 uint8_t len = pci_get_byte(pci_dev->config + pos + PCI_CAP_FLAGS);
 /* Direct R/W passthrough */
-ret = pci_add_capability2(pci_dev, PCI_CAP_ID_VNDR, pos, len,
+ret = pci_add_capability(pci_dev, PCI_CAP_ID_VNDR, pos, len,
   &local_err);
 if (ret < 0) {
 error_propagate(errp, local_err);
diff --git

1 2 3 4 >

1 - 100 of 351 matches

Mail list logo