date:20170217

[Qemu-devel] [PATCH v6 1/2] PCI: add missing classes in pci_ids.h to build device tree

2017-02-17 Thread Laurent Vivier

To allow QEMU to add PCI entries in device tree,
we must have a more exhaustive list of PCI class IDs.

This patch synchronizes as much as possible with
pci_ids.h and add some missing IDs from SLOF.

Signed-off-by: Laurent Vivier 
Reviewed-by: Michael S. Tsirkin 
Reviewed-by: Thomas Huth 
---
 include/hw/pci/pci_ids.h | 112 +++
 1 file changed, 103 insertions(+), 9 deletions(-)

diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h
index d77ca60..d22ad8d 100644
--- a/include/hw/pci/pci_ids.h
+++ b/include/hw/pci/pci_ids.h
@@ -13,41 +13,84 @@
 
 /* Device classes and subclasses */
 
-#define PCI_BASE_CLASS_STORAGE   0x01
-#define PCI_BASE_CLASS_NETWORK   0x02
+#define PCI_CLASS_NOT_DEFINED0x
+#define PCI_CLASS_NOT_DEFINED_VGA0x0001
 
+#define PCI_BASE_CLASS_STORAGE   0x01
 #define PCI_CLASS_STORAGE_SCSI   0x0100
 #define PCI_CLASS_STORAGE_IDE0x0101
+#define PCI_CLASS_STORAGE_FLOPPY 0x0102
+#define PCI_CLASS_STORAGE_IPI0x0103
 #define PCI_CLASS_STORAGE_RAID   0x0104
+#define PCI_CLASS_STORAGE_ATA0x0105
 #define PCI_CLASS_STORAGE_SATA   0x0106
+#define PCI_CLASS_STORAGE_SAS0x0107
 #define PCI_CLASS_STORAGE_EXPRESS0x0108
 #define PCI_CLASS_STORAGE_OTHER  0x0180
 
+#define PCI_BASE_CLASS_NETWORK   0x02
 #define PCI_CLASS_NETWORK_ETHERNET   0x0200
+#define PCI_CLASS_NETWORK_TOKEN_RING 0x0201
+#define PCI_CLASS_NETWORK_FDDI   0x0202
+#define PCI_CLASS_NETWORK_ATM0x0203
+#define PCI_CLASS_NETWORK_ISDN   0x0204
+#define PCI_CLASS_NETWORK_WORLDFIP   0x0205
+#define PCI_CLASS_NETWORK_PICMG214   0x0206
 #define PCI_CLASS_NETWORK_OTHER  0x0280
 
+#define PCI_BASE_CLASS_DISPLAY   0x03
 #define PCI_CLASS_DISPLAY_VGA0x0300
+#define PCI_CLASS_DISPLAY_XGA0x0301
+#define PCI_CLASS_DISPLAY_3D 0x0302
 #define PCI_CLASS_DISPLAY_OTHER  0x0380
 
+#define PCI_BASE_CLASS_MULTIMEDIA0x04
+#define PCI_CLASS_MULTIMEDIA_VIDEO   0x0400
 #define PCI_CLASS_MULTIMEDIA_AUDIO   0x0401
+#define PCI_CLASS_MULTIMEDIA_PHONE   0x0402
+#define PCI_CLASS_MULTIMEDIA_OTHER   0x0480
 
+#define PCI_BASE_CLASS_MEMORY0x05
 #define PCI_CLASS_MEMORY_RAM 0x0500
+#define PCI_CLASS_MEMORY_FLASH   0x0501
+#define PCI_CLASS_MEMORY_OTHER   0x0580
 
-#define PCI_CLASS_SYSTEM_SDHCI   0x0805
-#define PCI_CLASS_SYSTEM_OTHER   0x0880
-
-#define PCI_CLASS_SERIAL_USB 0x0c03
-#define PCI_CLASS_SERIAL_SMBUS   0x0c05
-
+#define PCI_BASE_CLASS_BRIDGE0x06
 #define PCI_CLASS_BRIDGE_HOST0x0600
 #define PCI_CLASS_BRIDGE_ISA 0x0601
+#define PCI_CLASS_BRIDGE_EISA0x0602
+#define PCI_CLASS_BRIDGE_MC  0x0603
 #define PCI_CLASS_BRIDGE_PCI 0x0604
 #define PCI_CLASS_BRIDGE_PCI_INF_SUB 0x01
+#define PCI_CLASS_BRIDGE_PCMCIA  0x0605
+#define PCI_CLASS_BRIDGE_NUBUS   0x0606
+#define PCI_CLASS_BRIDGE_CARDBUS 0x0607
+#define PCI_CLASS_BRIDGE_RACEWAY 0x0608
+#define PCI_CLASS_BRIDGE_PCI_SEMITP  0x0609
+#define PCI_CLASS_BRIDGE_IB_PCI  0x060a
 #define PCI_CLASS_BRIDGE_OTHER   0x0680
 
+#define PCI_BASE_CLASS_COMMUNICATION 0x07
 #define PCI_CLASS_COMMUNICATION_SERIAL   0x0700
+#define PCI_CLASS_COMMUNICATION_PARALLEL 0x0701
+#define PCI_CLASS_COMMUNICATION_MULTISERIAL 0x0702
+#define PCI_CLASS_COMMUNICATION_MODEM0x0703
+#define PCI_CLASS_COMMUNICATION_GPIB 0x0704
+#define PCI_CLASS_COMMUNICATION_SC   0x0705
 #define PCI_CLASS_COMMUNICATION_OTHER0x0780
 
+#define PCI_BASE_CLASS_SYSTEM0x08
+#define PCI_CLASS_SYSTEM_PIC 0x0800
+#define PCI_CLASS_SYSTEM_PIC_IOAPIC  0x080010
+#define PCI_CLASS_SYSTEM_PIC_IOXAPIC 0x080020
+#define PCI_CLASS_SYSTEM_DMA 0x0801
+#define PCI_CLASS_SYSTEM_TIMER   0x0802
+#define PCI_CLASS_SYSTEM_RTC 0x0803
+#define PCI_CLASS_SYSTEM_PCI_HOTPLUG 0x0804
+#define PCI_CLASS_SYSTEM_SDHCI   0x0805
+#define PCI_CLASS_SYSTEM_OTHER   0x0880
+
+#define PCI_BASE_CLASS_INPUT 0x09
 #define PCI_CLASS_INPUT_KEYBOARD 0x0900
 #define PCI_CLASS_INPUT_PEN  0x0901
 #define PCI_CLASS_INPUT_MOUSE0x0902
@@ -55,8 +98,59 @@
 #define PCI_CLASS_INPUT_GAMEPORT 0x0904
 #define PCI_CLASS_INPUT_OTHER0x0980
 
-#define PCI_CLASS_PROCESSOR_CO   0x0b40
+#define PCI_BASE_CLASS_DOCKING   0x0a
+#define PCI_CLASS_DOCKING_GENERIC0x0a00
+#define PCI_CLASS_DOCKING_OTHER  0x0a80
+
+#define PCI_BASE_CLASS_PROCESSOR 0x0b
+#define PCI_CLASS_PROCESSOR_PENTIUM  0x0b02
 #define PCI_CLASS_PROCESSOR_POWERPC  0x0b20
+#define PCI_CLASS_PROCESSOR_MIPS 0x0b30
+#define PCI_CLASS_

Re: [Qemu-devel] [PATCH 15/17] iotests: add default node-name

2017-02-17 Thread Vladimir Sementsov-Ogievskiy


17.02.2017 15:21, Fam Zheng wrote:

On Fri, 02/17 13:20, Vladimir Sementsov-Ogievskiy wrote:

16.02.2017 16:48, Fam Zheng wrote:

On Mon, 02/13 12:54, Vladimir Sementsov-Ogievskiy wrote:

When testing migration, auto-generated by qemu node-names differs in
source and destination qemu and migration fails. After this patch,
auto-generated by iotest nodenames will be the same.

What should be done in libvirt to make sure the node-names are matching
correctly at both sides?

Hmm, just set node names appropriately?

But I think the problem is that node names are not configurable from libvirt
today, and then the migration will fail. Should the device name take precedence
in the code, to make it easier?


Why not configurable? libvirt can use same parameters as I in this 
patch.. Or what do you mean?




Fam



--
Best regards,
Vladimir

[Qemu-devel] [PATCH v6 2/2] spapr: generate DT node names

2017-02-17 Thread Laurent Vivier

When DT node names for PCI devices are generated by SLOF,
they are generated according to the type of the device
(for instance, ethernet for virtio-net-pci device).

Node name for hotplugged devices is generated by QEMU.
This patch adds the mechanic to QEMU to create the node
name according to the device type too.

The data structure has been roughly copied from OpenBIOS/OpenHackware,
node names from SLOF.

Example:

Hotplugging some PCI cards with QEMU monitor:

device_add virtio-tablet-pci
device_add virtio-serial-pci
device_add virtio-mouse-pci
device_add virtio-scsi-pci
device_add virtio-gpu-pci
device_add ne2k_pci
device_add nec-usb-xhci
device_add intel-hda

What we can see in linux device tree:

for dir in /proc/device-tree/pci@8002000/*@*/; do
echo $dir
cat $dir/name
echo
done

WITHOUT this patch:

/proc/device-tree/pci@8002000/pci@0/
pci
/proc/device-tree/pci@8002000/pci@1/
pci
/proc/device-tree/pci@8002000/pci@2/
pci
/proc/device-tree/pci@8002000/pci@3/
pci
/proc/device-tree/pci@8002000/pci@4/
pci
/proc/device-tree/pci@8002000/pci@5/
pci
/proc/device-tree/pci@8002000/pci@6/
pci
/proc/device-tree/pci@8002000/pci@7/
pci

WITH this patch:

/proc/device-tree/pci@8002000/communication-controller@1/
communication-controller
/proc/device-tree/pci@8002000/display@4/
display
/proc/device-tree/pci@8002000/ethernet@5/
ethernet
/proc/device-tree/pci@8002000/input-controller@0/
input-controller
/proc/device-tree/pci@8002000/mouse@2/
mouse
/proc/device-tree/pci@8002000/multimedia-device@7/
multimedia-device
/proc/device-tree/pci@8002000/scsi@3/
scsi
/proc/device-tree/pci@8002000/usb-xhci@6/
usb-xhci

Signed-off-by: Laurent Vivier 
Reviewed-by: Thomas Huth 
Reviewed-by: David Gibson 
---
 hw/ppc/spapr_pci.c | 290 ++---
 1 file changed, 276 insertions(+), 14 deletions(-)

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index fd6fc1d..1c4fa8b 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -43,6 +43,7 @@
 
 #include "hw/pci/pci_bridge.h"
 #include "hw/pci/pci_bus.h"
+#include "hw/pci/pci_ids.h"
 #include "hw/ppc/spapr_drc.h"
 #include "sysemu/device_tree.h"
 #include "sysemu/kvm.h"
@@ -946,6 +947,274 @@ static void populate_resource_props(PCIDevice *d, 
ResourceProps *rp)
 rp->assigned_len = assigned_idx * sizeof(ResourceFields);
 }
 
+typedef struct PCIClass PCIClass;
+typedef struct PCISubClass PCISubClass;
+typedef struct PCIIFace PCIIFace;
+
+struct PCIIFace {
+int iface;
+const char *name;
+};
+
+struct PCISubClass {
+int subclass;
+const char *name;
+const PCIIFace *iface;
+};
+
+struct PCIClass {
+const char *name;
+const PCISubClass *subc;
+};
+
+static const PCISubClass undef_subclass[] = {
+{ PCI_CLASS_NOT_DEFINED_VGA, "display", NULL },
+{ 0xFF, NULL, NULL },
+};
+
+static const PCISubClass mass_subclass[] = {
+{ PCI_CLASS_STORAGE_SCSI, "scsi", NULL },
+{ PCI_CLASS_STORAGE_IDE, "ide", NULL },
+{ PCI_CLASS_STORAGE_FLOPPY, "fdc", NULL },
+{ PCI_CLASS_STORAGE_IPI, "ipi", NULL },
+{ PCI_CLASS_STORAGE_RAID, "raid", NULL },
+{ PCI_CLASS_STORAGE_ATA, "ata", NULL },
+{ PCI_CLASS_STORAGE_SATA, "sata", NULL },
+{ PCI_CLASS_STORAGE_SAS, "sas", NULL },
+{ 0xFF, NULL, NULL },
+};
+
+static const PCISubClass net_subclass[] = {
+{ PCI_CLASS_NETWORK_ETHERNET, "ethernet", NULL },
+{ PCI_CLASS_NETWORK_TOKEN_RING, "token-ring", NULL },
+{ PCI_CLASS_NETWORK_FDDI, "fddi", NULL },
+{ PCI_CLASS_NETWORK_ATM, "atm", NULL },
+{ PCI_CLASS_NETWORK_ISDN, "isdn", NULL },
+{ PCI_CLASS_NETWORK_WORLDFIP, "worldfip", NULL },
+{ PCI_CLASS_NETWORK_PICMG214, "picmg", NULL },
+{ 0xFF, NULL, NULL },
+};
+
+static const PCISubClass displ_subclass[] = {
+{ PCI_CLASS_DISPLAY_VGA, "vga", NULL },
+{ PCI_CLASS_DISPLAY_XGA, "xga", NULL },
+{ PCI_CLASS_DISPLAY_3D, "3d-controller", NULL },
+{ 0xFF, NULL, NULL },
+};
+
+static const PCISubClass media_subclass[] = {
+{ PCI_CLASS_MULTIMEDIA_VIDEO, "video", NULL },
+{ PCI_CLASS_MULTIMEDIA_AUDIO, "sound", NULL },
+{ PCI_CLASS_MULTIMEDIA_PHONE, "telephony", NULL },
+{ 0xFF, NULL, NULL },
+};
+
+static const PCISubClass mem_subclass[] = {
+{ PCI_CLASS_MEMORY_RAM, "memory", NULL },
+{ PCI_CLASS_MEMORY_FLASH, "flash", NULL },
+{ 0xFF, NULL, NULL },
+};
+
+static const PCISubClass bridg_subclass[] = {
+{ PCI_CLASS_BRIDGE_HOST, "host", NULL },
+{ PCI_CLASS_BRIDGE_ISA, "isa", NULL },
+{ PCI_CLASS_BRIDGE_EISA, "eisa", NULL },
+{ PCI_CLASS_BRIDGE_MC, "mca", NULL },
+{ PCI_CLASS_BRIDGE_PCI, "pci", NULL },
+{ PCI_CLASS_BRIDGE_PCMCIA, "pcmcia", NULL },
+{ PCI_CLASS_BRIDGE_NUBUS, "nubus", NULL },
+{ PCI_CLASS_BRIDGE_CARDBUS, "cardbus", NULL },
+{ PCI_CLASS_BRIDGE_RACEWAY, "raceway", NULL },
+{ PCI_CLASS_BRIDGE_PCI_SEMIT

Re: [Qemu-devel] [PATCH v8 4/5] target-arm: Add GICv3CPUState in CPUARMState struct

2017-02-17 Thread Peter Maydell

On 17 February 2017 at 06:31,   wrote:
> From: Vijaya Kumar K 
>
> Add gicv3state void pointer to CPUARMState struct
> to store GICv3CPUState.
>
> In case of usecase like CPU reset, we need to reset
> GICv3CPUState of the CPU. In such scenario, this pointer
> becomes handy.
>
> This patch take care of only GICv3.

I'm not sure what you mean to say with this sentence ?

>
> Signed-off-by: Vijaya Kumar K 

Otherwise
Reviewed-by: Peter Maydell 

thanks
-- PMM

Re: [Qemu-devel] [PATCH v15 08/25] block: introduce auto-loading bitmaps

2017-02-17 Thread Kevin Wolf

Am 17.02.2017 um 14:22 hat Denis V. Lunev geschrieben:
> On 02/17/2017 03:48 PM, Kevin Wolf wrote:
> > Am 17.02.2017 um 13:40 hat Vladimir Sementsov-Ogievskiy geschrieben:
> >> 17.02.2017 15:09, Kevin Wolf wrote:
> >>> Am 17.02.2017 um 12:46 hat Vladimir Sementsov-Ogievskiy geschrieben:
>  16.02.2017 14:49, Kevin Wolf wrote:
> > Am 16.02.2017 um 12:25 hat Kevin Wolf geschrieben:
> >> Am 15.02.2017 um 11:10 hat Vladimir Sementsov-Ogievskiy geschrieben:
> >>> Auto loading bitmaps are bitmaps stored in the disk image, which 
> >>> should
> >>> be loaded when the image is opened and become BdrvDirtyBitmaps for the
> >>> corresponding drive.
> >>>
> >>> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> >>> Reviewed-by: John Snow 
> >>> Reviewed-by: Max Reitz 
> >> Why do we need a new BlockDriver callback and special code for it in
> >> bdrv_open_common()? The callback is only ever called immediately after
> >> .bdrv_open/.bdrv_file_open, so can't the drivers just do this 
> >> internally
> >> in their .bdrv_open implementation? Even more so because qcow2 is the
> >> only driver that supports this callback.
> > Actually, don't we have to call this in qcow2_invalidate_cache()?
> > Currently, I think, after a migration, the autoload bitmaps aren't
> > loaded.
> >
> > By moving the qcow2_load_autoloading_dirty_bitmaps() call to
> > qcow2_open(), this would be fixed.
> >
> > Kevin
>  Bitmap should not be reloaded on any intermediate qcow2-open's,
>  reopens, etc. It should be loaded once, on bdrv_open, to not create
>  extra collisions (between in-memory bitmap and it's stored version).
>  That was the idea.
> 
>  For bitmaps migration there are separate series, we shouldn't load
>  bitmap from file on migration, as it's version in the file is
>  outdated.
> >>> That's not what your series is doing, though. It loads the bitmaps when
> >> Actually, they will not be loaded as they will have IN_USE flag.
> >>
> >>> migration starts and doesn't reload then when migration completes, even
> >>> though they are stale. Migration with shared storage would just work
> >>> without an extra series if you did these things in the correct places.
> >>>
> >>> As a reminder, this is how migration with shared storage works (or
> >>> should work with your series):
> >>>
> >>> 1. Start destination qemu instance. This calls bdrv_open() with
> >>>BDRV_O_INACTIVE. We can read in some metadata, though we don't need
> >>>much more than the image size at this point. Writing to the image is
> >>>still impossible.
> >>>
> >>> 2. Start migration on the source, while the VM is still writing to the
> >>>image, rendering the cached metadata from step 1 stale.
> >>>
> >>> 3. Migration completes:
> >>>
> >>> a. Stop the VM
> >>>
> >>> b. Inactivate all images in the source qemu. This is where all
> >>>metadata needs to be written back to the image file, including
> >>>bitmaps. No writes to the image are possible after this point
> >>>because BDRV_O_INACTIVE is set.
> >>>
> >>> c. Invalidate the caches in the destination qemu, i.e. reload
> >>>everything from the file that could have changed since step 1,
> >>>including bitmaps. BDRV_O_INACTIVE is cleared, making the image
> >>>ready for writes.
> >>>
> >>> d. Resume the VM on the destination
> >>>
> >>> 4. Exit the source qemu process, which involves bdrv_close(). Note that
> >>>at this point, no writing to the image file is possible any more,
> >>>it's the destination qemu process that own the image file now.
> >>>
> >>> Your series loads and stores bitmaps in steps 1 and 4. This means that
> >> Actually - not. in 1 bitmaps are "in use", in 4 INACTIVE is set (and
> >> it is checked), nothing is stored.
> >>
> >>> they are stale on the destination when migration completes, and that
> >>> bdrv_close() wants to write to an image file that it doesn't own any
> >>> more, which will cause an assertion failure. If you instead move things
> >>> to steps 3b and 3c, it will just work.
> >> Hmm, I understand the idea.. But this will interfere with postcopy
> >> bitmap migration. So if we really need this, there should be some
> >> additional control flags or capabilities.. The problem of your
> >> approach is that bitmap actually migrated in the short state when
> >> source and destination are stopped, it may take time, as bitmaps may
> >> be large.
> > You can always add optimisations, but this is the basic lifecycle
> > process of block devices in qemu, so it would be good to adhere to it.
> > So far there are no other pieces of information that are ignored in
> > bdrv_invalidate()/bdrv_inactivate() and instead only handled in
> > bdrv_open()/bdrv_close(). It's a matter of consistency, too.
> >
> > And not having to add special cases for specific features in the generic
> > bdrv_open()/close()

Re: [Qemu-devel] [PATCH v8 4/8] ACPI: Add Virtual Machine Generation ID support

2017-02-17 Thread Laszlo Ersek

On 02/17/17 14:05, Igor Mammedov wrote:
> On Fri, 17 Feb 2017 13:50:40 +0100
> Laszlo Ersek  wrote:
> 
>> CC Dave
>>
>> On 02/17/17 11:43, Igor Mammedov wrote:
>>> On Thu, 16 Feb 2017 15:15:36 -0800
>>> b...@skyportsystems.com wrote:
>>>
 From: Ben Warren 

 This implements the VM Generation ID feature by passing a 128-bit
 GUID to the guest via a fw_cfg blob.
 Any time the GUID changes, an ACPI notify event is sent to the guest

 The user interface is a simple device with one parameter:
  - guid (string, must be "auto" or in UUID format
----)
>>> I've given it some testing with WS2012R2 and v4 patches for Seabios,
>>>
>>> Windows is able to read initial GUID allocation and writeback
>>> seems to work somehow:
>>>
>>> (qemu) info vm-generation-id 
>>> c109c09b-0e8b-42d5-9b33-8409c9dcd16c
>>>
>>> vmgenid client in Windows reads it as 2 following 64bit integers:
>>> 42d50e8bc109c09b:6cd1dcc90984339b
>>>
>>> However update path/restore from snapshot doesn't
>>> here is as I've tested it:
>>>
>>> qemu-system-x86_64 -device vmgenid,id=testvgid,guid=auto -monitor stdio
>>> (qemu) info vm-generation-id 
>>> c109c09b-0e8b-42d5-9b33-8409c9dcd16c
>>> (qemu) stop
>>> (qemu) migrate "exec:gzip -c > STATEFILE.gz" 
>>> (qemu) quit
>>>
>>> qemu-system-x86_64 -device vmgenid,id=testvgid,guid=auto -monitor stdio
>>> -incoming "exec: gzip -c -d STATEFILE.gz"
>>> (qemu) info vm-generation-id 
>>> 28b587fa-991b-4267-80d7-9cf28b746fe9
>>>
>>> guest
>>>  1. doesn't get GPE notification that it must receive
>>>  2. vmgenid client in Windows reads the same value
>>>   42d50e8bc109c09b:6cd1dcc90984339b
>>
>> Hmmm, I wonder if we need something like this, in vmgenid_post_load():
>>
>> commit 90c647db8d59e47c9000affc0d81754eb346e939
>> Author: Dr. David Alan Gilbert 
>> Date:   Fri Apr 15 12:41:30 2016 +0100
>>
>> Fix pflash migration
>>
>> with the idea being that in a single device's post_load callback, we
>> shouldn't perform machine-wide actions (post_load is likely for fixing
>> up the device itself). If machine-wide actions are necessary, we should
>> temporarily register a "vm change state handler", and do the thing once
>> that handler is called (when the machine has been loaded fully and is
>> about to continue execution).
>>
>> Can you please try the attached patch on top? (Build tested only.)
> it doesn't help

Thanks for trying! And, well, sh*t. :(

I guess it's time to resurrect the monitor command (temporarily, for
testing) so we can inject the SCI at will, without migration. I don't
want to burden you unreasonably, so I'll make an effort to try that myself.

Thanks!
Laszlo

Re: [Qemu-devel] [PATCH v8 5/5] hw/intc/arm_gicv3_kvm: Reset GICv3 cpu interface registers

2017-02-17 Thread Peter Maydell

On 17 February 2017 at 06:31,   wrote:
> From: Vijaya Kumar K 
>
> Reset CPU interface registers of GICv3 when CPU is reset.
> For this, ARMCPRegInfo struct is registered with one ICC
> register whose resetfn is called when cpu is reset.
>
> All the ICC registers are reset under one single register
> reset function instead of calling resetfn for each ICC
> register.
>
> Signed-off-by: Vijaya Kumar K 
> ---
>  hw/intc/arm_gicv3_kvm.c | 58 
> +
>  1 file changed, 58 insertions(+)
>
> diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
> index cda1af4..6377dc3 100644
> --- a/hw/intc/arm_gicv3_kvm.c
> +++ b/hw/intc/arm_gicv3_kvm.c
> @@ -604,6 +604,34 @@ static void kvm_arm_gicv3_get(GICv3State *s)
>  }
>  }
>
> +static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri)
> +{
> +ARMCPU *cpu;
> +GICv3State *s;
> +GICv3CPUState *c;
> +
> +c = (GICv3CPUState *)env->gicv3state;
> +assert(!(!c || !c->cpu || !c->gic));

I thought I'd made a comment about these asserts on a previous
version of this code... they're pretty unnecessary since if
any of them are untrue we'll just segfault in this function.

The aim of an assert is to turn a hard-to-debug failure that
only becomes visible late into an easy-to-debug failure that
happens earlier. Assertions which don't move the failure
significantly forward in time or turn an obscure problem into a
clear one aren't really worth having.

> +s = c->gic;
> +cpu = ARM_CPU(c->cpu);
> +
> +/* Initialize to actual HW supported configuration */
> +kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
> +  KVM_VGIC_ATTR(ICC_CTLR_EL1, cpu->mp_affinity),
> +  &c->icc_ctlr_el1[GICV3_NS], false);
> +
> +c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS];
> +c->icc_pmr_el1 = 0;
> +c->icc_bpr[GICV3_G0] = GIC_MIN_BPR;
> +c->icc_bpr[GICV3_G1] = GIC_MIN_BPR;
> +c->icc_bpr[GICV3_G1NS] = GIC_MIN_BPR;
> +
> +c->icc_sre_el1 = 0x7;
> +memset(c->icc_apr, 0, sizeof(c->icc_apr));
> +memset(c->icc_igrpen, 0, sizeof(c->icc_igrpen));
> +}
> +
>  static void kvm_arm_gicv3_reset(DeviceState *dev)
>  {
>  GICv3State *s = ARM_GICV3_COMMON(dev);
> @@ -621,6 +649,30 @@ static void kvm_arm_gicv3_reset(DeviceState *dev)
>  kvm_arm_gicv3_put(s);
>  }
>
> +/*
> + * CPU interface registers of GIC needs to be reset on CPU reset.
> + * For the calling arm_gicv3_icc_reset() on CPU reset, we register
> + * below ARMCPRegInfo. As we reset the whole cpu interface under single
> + * register reset, we define only one register of CPU interface instead
> + * of defining all the registers.
> + */
> +static const ARMCPRegInfo gicv3_cpuif_reginfo[] = {
> +{ .name = "ICC_CTLR_EL1", .state = ARM_CP_STATE_BOTH,
> +  .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 12, .opc2 = 4,
> +  .type = ARM_CP_NO_RAW,
> +  .access = PL1_RW,
> +  .readfn = arm_cp_read_zero,
> +  .writefn = arm_cp_write_ignore,
> +  /*
> +   * We hang the whole cpu interface reset routine off here
> +   * rather than parcelling it out into one little function
> +   * per register
> +   */
> +  .resetfn = arm_gicv3_icc_reset,
> +},
> +REGINFO_SENTINEL

I asked for a comment saying why we can't use ARM_CP_NOP...

thanks
-- PMM

[Qemu-devel] [PATCH] hw/arm/virt: Add a user option to disallow ITS instantiation

2017-02-17 Thread Eric Auger

In 2.9 ITS will block save/restore and migration use cases. As
such let's introduce a user option that disallows its instantiation
along with the GICv3. With no-its option turned true, migration will
be possible, obviously at the expense of MSI support (with GICv3).

Signed-off-by: Eric Auger 

---

With this patch the option also is added in virt 2.8. I don't know
if it is acceptable.
---
 hw/arm/virt.c | 28 +++-
 include/hw/arm/virt.h |  1 +
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index f3440f2..702a392 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -605,7 +605,7 @@ static void create_gic(VirtMachineState *vms, qemu_irq *pic)
 
 fdt_add_gic_node(vms);
 
-if (type == 3 && !vmc->no_its) {
+if (type == 3 && !vmc->no_its && !vms->no_its) {
 create_its(vms, gicdev);
 } else if (type == 2) {
 create_v2m(vms, pic);
@@ -1480,6 +1480,21 @@ static void virt_set_highmem(Object *obj, bool value, 
Error **errp)
 vms->highmem = value;
 }
 
+static bool virt_get_no_its(Object *obj, Error **errp)
+{
+VirtMachineState *vms = VIRT_MACHINE(obj);
+
+return vms->no_its;
+}
+
+static void virt_set_no_its(Object *obj, bool value, Error **errp)
+{
+VirtMachineState *vms = VIRT_MACHINE(obj);
+
+vms->no_its = value;
+}
+
+
 static char *virt_get_gic_version(Object *obj, Error **errp)
 {
 VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -1540,6 +1555,7 @@ type_init(machvirt_machine_init);
 static void virt_2_9_instance_init(Object *obj)
 {
 VirtMachineState *vms = VIRT_MACHINE(obj);
+VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
 
 /* EL3 is disabled by default on virt: this makes us consistent
  * between KVM and TCG for this board, and it also allows us to
@@ -1579,6 +1595,16 @@ static void virt_2_9_instance_init(Object *obj)
 "Set GIC version. "
 "Valid values are 2, 3 and host", NULL);
 
+/* Default allows ITS instantiation */
+if (!vmc->no_its) {
+object_property_add_bool(obj, "no-its", virt_get_no_its,
+ virt_set_no_its, NULL);
+vms->no_its = false;
+object_property_set_description(obj, "no-its",
+"Disallow the ITS instantiation"
+NULL);
+}
+
 vms->memmap = a15memmap;
 vms->irqmap = a15irqmap;
 }
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 58ce74e..5e73be9 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -93,6 +93,7 @@ typedef struct {
 FWCfgState *fw_cfg;
 bool secure;
 bool highmem;
+bool no_its;
 bool virt;
 int32_t gic_version;
 struct arm_boot_info bootinfo;
-- 
2.5.5

Re: [Qemu-devel] [PATCH v8 2/5] hw/intc/arm_gicv3_kvm: Add ICC_SRE_EL1 register to vmstate

2017-02-17 Thread Peter Maydell

On 17 February 2017 at 06:31,   wrote:
> From: Vijaya Kumar K 
>
> To Save and Restore ICC_SRE_EL1 register introduce vmstate
> subsection and load only if non-zero.
> Also initialize icc_sre_el1 with to 0x7 in pre_load
> function.
>
> Signed-off-by: Vijaya Kumar K 
> ---
>  hw/intc/arm_gicv3_common.c | 32 
>  include/hw/intc/arm_gicv3_common.h |  1 +
>  2 files changed, 33 insertions(+)
>
> diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
> index 16b9b0f..e62480e 100644
> --- a/hw/intc/arm_gicv3_common.c
> +++ b/hw/intc/arm_gicv3_common.c
> @@ -70,6 +70,34 @@ static const VMStateDescription vmstate_gicv3_cpu_virt = {
>  }
>  };
>
> +static int icc_sre_el1_reg_pre_load(void *opaque)
> +{
> +GICv3CPUState *cs = opaque;
> +
> +/* By default enable SRE and disable IRQ & FIQ bypass. */
> +cs->icc_sre_el1 = 0x7;

Why do we need the pre_load function? I would have
expected that reset would have given us these defaults
already.

> +return 0;
> +}
> +
> +static bool icc_sre_el1_reg_needed(void *opaque)
> +{
> +GICv3CPUState *cs = opaque;
> +
> +return cs->icc_sre_el1 != 0;

I expected this to say "we need to transfer the value if
it isn't 0x7" (since the current situation of migration
is "we assume that the value is 0x7").

Something should probably fail inbound migration for TCG
if the value isn't 0x7, for that matter.

Is there a situation where KVM might allow a value other
than 0x7?

> +}
> +
> +const VMStateDescription vmstate_gicv3_cpu_sre_el1 = {
> +.name = "arm_gicv3_cpu/sre_el1",
> +.version_id = 1,
> +.minimum_version_id = 1,
> +.pre_load = icc_sre_el1_reg_pre_load,
> +.needed = icc_sre_el1_reg_needed,
> +.fields = (VMStateField[]) {
> +VMSTATE_UINT64(icc_sre_el1, GICv3CPUState),
> +VMSTATE_END_OF_LIST()
> +}
> +};
> +
>  static const VMStateDescription vmstate_gicv3_cpu = {
>  .name = "arm_gicv3_cpu",
>  .version_id = 1,
> @@ -100,6 +128,10 @@ static const VMStateDescription vmstate_gicv3_cpu = {
>  .subsections = (const VMStateDescription * []) {
>  &vmstate_gicv3_cpu_virt,
>  NULL
> +},
> +.subsections = (const VMStateDescription * []) {
> +&vmstate_gicv3_cpu_sre_el1,
> +NULL
>  }
>  };
>
> diff --git a/include/hw/intc/arm_gicv3_common.h 
> b/include/hw/intc/arm_gicv3_common.h
> index 4156051..bccdfe1 100644
> --- a/include/hw/intc/arm_gicv3_common.h
> +++ b/include/hw/intc/arm_gicv3_common.h
> @@ -172,6 +172,7 @@ struct GICv3CPUState {
>  uint8_t gicr_ipriorityr[GIC_INTERNAL];
>
>  /* CPU interface */
> +uint64_t icc_sre_el1;
>  uint64_t icc_ctlr_el1[2];
>  uint64_t icc_pmr_el1;
>  uint64_t icc_bpr[3];
> --
> 1.9.1

thanks
-- PMM

Re: [Qemu-devel] [PATCH v15 08/25] block: introduce auto-loading bitmaps

2017-02-17 Thread Denis V. Lunev

On 02/17/2017 03:48 PM, Kevin Wolf wrote:
> Am 17.02.2017 um 13:40 hat Vladimir Sementsov-Ogievskiy geschrieben:
>> 17.02.2017 15:09, Kevin Wolf wrote:
>>> Am 17.02.2017 um 12:46 hat Vladimir Sementsov-Ogievskiy geschrieben:
 16.02.2017 14:49, Kevin Wolf wrote:
> Am 16.02.2017 um 12:25 hat Kevin Wolf geschrieben:
>> Am 15.02.2017 um 11:10 hat Vladimir Sementsov-Ogievskiy geschrieben:
>>> Auto loading bitmaps are bitmaps stored in the disk image, which should
>>> be loaded when the image is opened and become BdrvDirtyBitmaps for the
>>> corresponding drive.
>>>
>>> Signed-off-by: Vladimir Sementsov-Ogievskiy 
>>> Reviewed-by: John Snow 
>>> Reviewed-by: Max Reitz 
>> Why do we need a new BlockDriver callback and special code for it in
>> bdrv_open_common()? The callback is only ever called immediately after
>> .bdrv_open/.bdrv_file_open, so can't the drivers just do this internally
>> in their .bdrv_open implementation? Even more so because qcow2 is the
>> only driver that supports this callback.
> Actually, don't we have to call this in qcow2_invalidate_cache()?
> Currently, I think, after a migration, the autoload bitmaps aren't
> loaded.
>
> By moving the qcow2_load_autoloading_dirty_bitmaps() call to
> qcow2_open(), this would be fixed.
>
> Kevin
 Bitmap should not be reloaded on any intermediate qcow2-open's,
 reopens, etc. It should be loaded once, on bdrv_open, to not create
 extra collisions (between in-memory bitmap and it's stored version).
 That was the idea.

 For bitmaps migration there are separate series, we shouldn't load
 bitmap from file on migration, as it's version in the file is
 outdated.
>>> That's not what your series is doing, though. It loads the bitmaps when
>> Actually, they will not be loaded as they will have IN_USE flag.
>>
>>> migration starts and doesn't reload then when migration completes, even
>>> though they are stale. Migration with shared storage would just work
>>> without an extra series if you did these things in the correct places.
>>>
>>> As a reminder, this is how migration with shared storage works (or
>>> should work with your series):
>>>
>>> 1. Start destination qemu instance. This calls bdrv_open() with
>>>BDRV_O_INACTIVE. We can read in some metadata, though we don't need
>>>much more than the image size at this point. Writing to the image is
>>>still impossible.
>>>
>>> 2. Start migration on the source, while the VM is still writing to the
>>>image, rendering the cached metadata from step 1 stale.
>>>
>>> 3. Migration completes:
>>>
>>> a. Stop the VM
>>>
>>> b. Inactivate all images in the source qemu. This is where all
>>>metadata needs to be written back to the image file, including
>>>bitmaps. No writes to the image are possible after this point
>>>because BDRV_O_INACTIVE is set.
>>>
>>> c. Invalidate the caches in the destination qemu, i.e. reload
>>>everything from the file that could have changed since step 1,
>>>including bitmaps. BDRV_O_INACTIVE is cleared, making the image
>>>ready for writes.
>>>
>>> d. Resume the VM on the destination
>>>
>>> 4. Exit the source qemu process, which involves bdrv_close(). Note that
>>>at this point, no writing to the image file is possible any more,
>>>it's the destination qemu process that own the image file now.
>>>
>>> Your series loads and stores bitmaps in steps 1 and 4. This means that
>> Actually - not. in 1 bitmaps are "in use", in 4 INACTIVE is set (and
>> it is checked), nothing is stored.
>>
>>> they are stale on the destination when migration completes, and that
>>> bdrv_close() wants to write to an image file that it doesn't own any
>>> more, which will cause an assertion failure. If you instead move things
>>> to steps 3b and 3c, it will just work.
>> Hmm, I understand the idea.. But this will interfere with postcopy
>> bitmap migration. So if we really need this, there should be some
>> additional control flags or capabilities.. The problem of your
>> approach is that bitmap actually migrated in the short state when
>> source and destination are stopped, it may take time, as bitmaps may
>> be large.
> You can always add optimisations, but this is the basic lifecycle
> process of block devices in qemu, so it would be good to adhere to it.
> So far there are no other pieces of information that are ignored in
> bdrv_invalidate()/bdrv_inactivate() and instead only handled in
> bdrv_open()/bdrv_close(). It's a matter of consistency, too.
>
> And not having to add special cases for specific features in the generic
> bdrv_open()/close() paths is a big plus for me anyway.
>
> Kevin
But for sure this is bad from the downtime point of view.
On migrate you will have to write to the image and re-read
it again on the target. This would be very slow. This will
not help for the migration with non-shared d

Re: [Qemu-devel] [PATCH] hw/arm/virt: Add a user option to disallow ITS instantiation

2017-02-17 Thread no-reply

Hi,

This series failed automatic build test. Please find the testing commands and
their output below. If you have docker installed, you can probably reproduce it
locally.

Type: series
Subject: [Qemu-devel] [PATCH] hw/arm/virt: Add a user option to disallow ITS 
instantiation
Message-id: 1487339599-20910-1-git-send-email-eric.au...@redhat.com

=== TEST SCRIPT BEGIN ===
#!/bin/bash
set -e
git submodule update --init dtc
# Let docker tests dump environment info
export SHOW_ENV=1
export J=16
make docker-test-quick@centos6
make docker-test-mingw@fedora
make docker-test-build@min-glib
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
c718093 hw/arm/virt: Add a user option to disallow ITS instantiation

=== OUTPUT BEGIN ===
Submodule 'dtc' (git://git.qemu-project.org/dtc.git) registered for path 'dtc'
Cloning into 'dtc'...
Submodule path 'dtc': checked out '65cc4d2748a2c2e6f27f1cf39e07a5dbabd80ebf'
  BUILD   centos6
make[1]: Entering directory `/var/tmp/patchew-tester-tmp-cl7pvpih/src'
  ARCHIVE qemu.tgz
  ARCHIVE dtc.tgz
  COPYRUNNER
RUN test-quick in qemu:centos6 
Packages installed:
SDL-devel-1.2.14-7.el6_7.1.x86_64
ccache-3.1.6-2.el6.x86_64
epel-release-6-8.noarch
gcc-4.4.7-17.el6.x86_64
git-1.7.1-4.el6_7.1.x86_64
glib2-devel-2.28.8-5.el6.x86_64
libfdt-devel-1.4.0-1.el6.x86_64
make-3.81-23.el6.x86_64
package g++ is not installed
pixman-devel-0.32.8-1.el6.x86_64
tar-1.23-15.el6_8.x86_64
zlib-devel-1.2.3-29.el6.x86_64

Environment variables:
PACKAGES=libfdt-devel ccache tar git make gcc g++ zlib-devel 
glib2-devel SDL-devel pixman-devel epel-release
HOSTNAME=136ee63d9391
TERM=xterm
MAKEFLAGS= -j16
HISTSIZE=1000
J=16
USER=root
CCACHE_DIR=/var/tmp/ccache
EXTRA_CONFIGURE_OPTS=
V=
SHOW_ENV=1
MAIL=/var/spool/mail/root
PATH=/usr/lib/ccache:/usr/lib64/ccache:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
PWD=/
LANG=en_US.UTF-8
TARGET_LIST=
HISTCONTROL=ignoredups
SHLVL=1
HOME=/root
TEST_DIR=/tmp/qemu-test
LOGNAME=root
LESSOPEN=||/usr/bin/lesspipe.sh %s
FEATURES= dtc
DEBUG=
G_BROKEN_FILENAMES=1
CCACHE_HASHDIR=
_=/usr/bin/env

Configure options:
--enable-werror --target-list=x86_64-softmmu,aarch64-softmmu 
--prefix=/var/tmp/qemu-build/install
No C++ compiler available; disabling C++ specific optional code
Install prefix/var/tmp/qemu-build/install
BIOS directory/var/tmp/qemu-build/install/share/qemu
binary directory  /var/tmp/qemu-build/install/bin
library directory /var/tmp/qemu-build/install/lib
module directory  /var/tmp/qemu-build/install/lib/qemu
libexec directory /var/tmp/qemu-build/install/libexec
include directory /var/tmp/qemu-build/install/include
config directory  /var/tmp/qemu-build/install/etc
local state directory   /var/tmp/qemu-build/install/var
Manual directory  /var/tmp/qemu-build/install/share/man
ELF interp prefix /usr/gnemul/qemu-%M
Source path   /tmp/qemu-test/src
C compilercc
Host C compiler   cc
C++ compiler  
Objective-C compiler cc
ARFLAGS   rv
CFLAGS-O2 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -g 
QEMU_CFLAGS   -I/usr/include/pixman-1-pthread -I/usr/include/glib-2.0 
-I/usr/lib64/glib-2.0/include   -fPIE -DPIE -m64 -mcx16 -D_GNU_SOURCE 
-D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -Wstrict-prototypes 
-Wredundant-decls -Wall -Wundef -Wwrite-strings -Wmissing-prototypes 
-fno-strict-aliasing -fno-common -fwrapv  -Wendif-labels 
-Wno-missing-include-dirs -Wempty-body -Wnested-externs -Wformat-security 
-Wformat-y2k -Winit-self -Wignored-qualifiers -Wold-style-declaration 
-Wold-style-definition -Wtype-limits -fstack-protector-all
LDFLAGS   -Wl,--warn-common -Wl,-z,relro -Wl,-z,now -pie -m64 -g 
make  make
install   install
pythonpython -B
smbd  /usr/sbin/smbd
module supportno
host CPU  x86_64
host big endian   no
target list   x86_64-softmmu aarch64-softmmu
tcg debug enabled no
gprof enabled no
sparse enabledno
strip binariesyes
profiler  no
static build  no
pixmansystem
SDL support   yes (1.2.14)
GTK support   no 
GTK GL supportno
VTE support   no 
TLS priority  NORMAL
GNUTLS supportno
GNUTLS rndno
libgcrypt no
libgcrypt kdf no
nettleno 
nettle kdfno
libtasn1  no
curses supportno
virgl support no
curl support  no
mingw32 support   no
Audio drivers oss
Block whitelist (rw) 
Block whitelist (ro) 
VirtFS supportno
VNC support   yes
VNC SASL support  no
VNC JPEG support  no
VNC PNG support   no
xen support   no
brlapi supportno
bluez  supportno
Documentation no
PIE   yes
vde support   no
netmap supportno
Linux AIO support no
ATTR/XATTR support yes
Install blobs yes
KVM support   yes
HAX support   no
RDMA support  no
TCG interpreter   no
fdt support   yes
preadv supportyes
fdatasync yes
madvise

Re: [Qemu-devel] [PATCH] bcm2835_rng: Use qcrypto_random_bytes() rather than rand()

2017-02-17 Thread Daniel P. Berrange

On Fri, Feb 17, 2017 at 12:22:39PM +, Peter Maydell wrote:
> Switch to using qcrypto_random_bytes() rather than rand() as
> our source of randomness for the BCM2835 RNG.
> 
> If qcrypto_random_bytes() fails, we don't want to return the guest a
> non-random value in case they're really using it for cryptographic
> purposes, so the best we can do is a fatal error.  This shouldn't
> happen unless something's broken, though.
> 
> In theory we could implement this device's full FIFO and interrupt
> semantics and then just stop filling the FIFO.  That's a lot of work,
> though, and doesn't really give a very nice diagnostic to the user
> since the guest will just seem to hang.
> 
> Signed-off-by: Peter Maydell 
> ---
> This patch sits on top of http://patchwork.ozlabs.org/patch/726744/
> (though for review purposes I think it's pretty self explanatory).
> The interesting question here is the failure case handling, where
> we're a bit between a rock and a hard place because we don't have
> a nice way to report it to the guest, but we don't want to return
> a non-random value either...
> 
> We should probably improve crypto/random-platform.c to use
> getentropy() if available, which would fix the "BSD or OSX
> host and not using gcrypt or gnutls" case which I think is
> the most likely cause of qcrypto_random_bytes() failing.

randopm-platform.c currentl uses /dev/urandom or /dev/random,
so that should work when GNUTLS/gcrypt are both disabled at
build time.

What would fail, is uses it from a chroot with an empty /dev
of course.

So we should definitely try getentropy/getrandom as a preferred
approach, falling back to /dev nodes only if those syscalls don't
exist in the current kenrel. Looks like I already put a TODO
comment in the file to this effect.

> 
>  hw/misc/bcm2835_rng.c | 26 +-
>  1 file changed, 25 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/misc/bcm2835_rng.c b/hw/misc/bcm2835_rng.c
> index 2242bc5..bbe903d 100644
> --- a/hw/misc/bcm2835_rng.c
> +++ b/hw/misc/bcm2835_rng.c
> @@ -9,8 +9,32 @@
>  
>  #include "qemu/osdep.h"
>  #include "qemu/log.h"
> +#include "qapi/error.h"
> +#include "crypto/random.h"
>  #include "hw/misc/bcm2835_rng.h"
>  
> +static uint32_t get_random_bytes(void)
> +{
> +uint32_t res;
> +Error *err = NULL;
> +
> +if (qcrypto_random_bytes((uint8_t *)&res, sizeof(res), &err) < 0) {
> +/* On failure we don't want to return the guest a non-random
> + * value in case they're really using it for cryptographic
> + * purposes, so the best we can do is die here.
> + * This shouldn't happen unless something's broken.
> + * In theory we could implement this device's full FIFO
> + * and interrupt semantics and then just stop filling the
> + * FIFO. That's a lot of work, though, so we assume any
> + * errors are systematic problems and trust that the check
> + * on init is sufficient.
> + */
> +error_report_err(err);
> +exit(1);
> +}
> +return res;
> +}
> +
>  static uint64_t bcm2835_rng_read(void *opaque, hwaddr offset,
>   unsigned size)
>  {
> @@ -27,7 +51,7 @@ static uint64_t bcm2835_rng_read(void *opaque, hwaddr 
> offset,
>  res = s->rng_status | (1 << 24);
>  break;
>  case 0x8:/* rng_data */
> -res = rand();
> +res = get_random_bytes();
>  break;
>  
>  default:

Reviewed-by: Daniel P. Berrange 

Regards,
Daniel
-- 
|: http://berrange.com  -o-http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org  -o- http://virt-manager.org :|
|: http://entangle-photo.org   -o-http://search.cpan.org/~danberr/ :|

Re: [Qemu-devel] [PATCH 15/17] iotests: add default node-name

2017-02-17 Thread Fam Zheng

On Fri, 02/17 16:36, Vladimir Sementsov-Ogievskiy wrote:
> 17.02.2017 15:21, Fam Zheng wrote:
> > On Fri, 02/17 13:20, Vladimir Sementsov-Ogievskiy wrote:
> > > 16.02.2017 16:48, Fam Zheng wrote:
> > > > On Mon, 02/13 12:54, Vladimir Sementsov-Ogievskiy wrote:
> > > > > When testing migration, auto-generated by qemu node-names differs in
> > > > > source and destination qemu and migration fails. After this patch,
> > > > > auto-generated by iotest nodenames will be the same.
> > > > What should be done in libvirt to make sure the node-names are matching
> > > > correctly at both sides?
> > > Hmm, just set node names appropriately?
> > But I think the problem is that node names are not configurable from libvirt
> > today, and then the migration will fail. Should the device name take 
> > precedence
> > in the code, to make it easier?
> 
> libvirt can use same parameters as I in this patch..

If I'm not mistaken, libvirt can be patched to explicitly set the same node
names in the QEMU command line, but that is some extra work to do there. My
point is if device names are used during migration, when available, this patch
and the libvirt change is not necessary.

Fam

Re: [Qemu-devel] [PATCH v2 2/7] iscsi: Handle -iscsi user/password in bdrv_parse_filename()

2017-02-17 Thread Fam Zheng

On Fri, 02/17 14:26, Kevin Wolf wrote:
> It is the one that it put into the QDict by iscsi_parse_iscsi_option(),
> which is supposed to be the value from -iscsi.

OK! This is what I was missing. :)

Fam

Re: [Qemu-devel] [PATCH v2 2/7] iscsi: Handle -iscsi user/password in bdrv_parse_filename()

2017-02-17 Thread Fam Zheng

On Wed, 01/25 12:42, Jeff Cody wrote:
> From: Kevin Wolf 
> 
> This splits the logic in the old parse_chap() function into a part that
> parses the -iscsi options into the new driver-specific options, and
> another part that actually applies those options (called apply_chap()
> now).
> 
> Note that this means that username and password specified with -iscsi
> only take effect when a URL is provided. This is intentional, -iscsi is
> a legacy interface only supported for compatibility, new users should
> use the proper driver-specific options.
> 
> Signed-off-by: Kevin Wolf 
> Signed-off-by: Jeff Cody 

Reviewed-by: Fam Zheng

Re: [Qemu-devel] [PATCH] bcm2835_rng: Use qcrypto_random_bytes() rather than rand()

2017-02-17 Thread Peter Maydell

On 17 February 2017 at 14:05, Daniel P. Berrange  wrote:
> On Fri, Feb 17, 2017 at 12:22:39PM +, Peter Maydell wrote:
>> We should probably improve crypto/random-platform.c to use
>> getentropy() if available, which would fix the "BSD or OSX
>> host and not using gcrypt or gnutls" case which I think is
>> the most likely cause of qcrypto_random_bytes() failing.
>
> randopm-platform.c currentl uses /dev/urandom or /dev/random,
> so that should work when GNUTLS/gcrypt are both disabled at
> build time.

Ah, and OSX and the BSDs have those devices. (I had
mistakenly assumed they were a linuxism.)

> What would fail, is uses it from a chroot with an empty /dev
> of course.

Yes; replacing the rand() in linux-user/main.c would require
this I think (unless we're willing to fall back to rand ;-))

thanks
-- PMM

Re: [Qemu-devel] [PATCH 1/3] qemu-img: Add tests for raw image preallocation

2017-02-17 Thread Nir Soffer

On Fri, Feb 17, 2017 at 11:14 AM, Kevin Wolf  wrote:
> Am 17.02.2017 um 01:51 hat Nir Soffer geschrieben:
>> Add tests for creating raw image with and without the preallocation
>> option.
>>
>> Signed-off-by: Nir Soffer 
>
> Looks good, but 175 is already (multiply) taken. Not making this a
> blocker, but I just want to remind everyone to check the mailing list
> for pending patches which add new tests before using a new number in
> order to avoid unnecessary rebases for everyone. In general, it's as
> easy as searching for the string "175.out" in the mailbox.
>
> The next free one seems to be 177 currently.

Thanks, will change to 177 in the next version.

For next patches, what do you mean by "pending"? patches sent
to the block mailing list?

Nir

Re: [Qemu-devel] [PATCH v15 08/25] block: introduce auto-loading bitmaps

2017-02-17 Thread Kevin Wolf

Am 17.02.2017 um 14:48 hat Denis V. Lunev geschrieben:
> On 02/17/2017 04:34 PM, Kevin Wolf wrote:
> > Am 17.02.2017 um 14:22 hat Denis V. Lunev geschrieben:
> >> But for sure this is bad from the downtime point of view.
> >> On migrate you will have to write to the image and re-read
> >> it again on the target. This would be very slow. This will
> >> not help for the migration with non-shared disk too.
> >>
> >> That is why we have specifically worked in a migration,
> >> which for a good does not influence downtime at all now.
> >>
> >> With a write we are issuing several write requests + sync.
> >> Our measurements shows that bdrv_drain could take around
> >> a second on an averagely loaded conventional system, which
> >> seems unacceptable addition to me.
> > I'm not arguing against optimising migration, I fully agree with you. I
> > just think that we should start with a correct if slow base version and
> > then add optimisation to that, instead of starting with a broken base
> > version and adding to that.
> >
> > Look, whether you do the expensive I/O on open/close and make that a
> > slow operation or whether you do it on invalidate_cache/inactivate
> > doesn't really make a difference in term of slowness because in general
> > both operations are called exactly once. But it does make a difference
> > in terms of correctness.
> >
> > Once you do the optimisation, of course, you'll skip writing those
> > bitmaps that you transfer using a different channel, no matter whether
> > you skip it in bdrv_close() or in bdrv_inactivate().
> >
> > Kevin
> I do not understand this point as in order to optimize this
> we will have to create specific code path or option from
> the migration code and keep this as an ugly kludge forever.

The point that I don't understand is why it makes any difference for the
follow-up migration series whether the writeout is in bdrv_close() or
bdrv_inactivate(). I don't really see the difference between the two
from a migration POV; both need to be skipped if we transfer the bitmap
using a different channel.

Maybe I would see the reason if I could find the time to look at the
migration patches first, but unfortunately I don't have this time at the
moment.

My point is just that generally we want to have a correctly working qemu
after every single patch, and even more importantly after every series.
As the migration series is separate from this, I don't think it's a good
excuse for doing worse than we could easily do here.

Kevin

Re: [Qemu-devel] [PATCH 1/3] qemu-img: Add tests for raw image preallocation

2017-02-17 Thread Kevin Wolf

Am 17.02.2017 um 15:20 hat Nir Soffer geschrieben:
> On Fri, Feb 17, 2017 at 11:14 AM, Kevin Wolf  wrote:
> > Am 17.02.2017 um 01:51 hat Nir Soffer geschrieben:
> >> Add tests for creating raw image with and without the preallocation
> >> option.
> >>
> >> Signed-off-by: Nir Soffer 
> >
> > Looks good, but 175 is already (multiply) taken. Not making this a
> > blocker, but I just want to remind everyone to check the mailing list
> > for pending patches which add new tests before using a new number in
> > order to avoid unnecessary rebases for everyone. In general, it's as
> > easy as searching for the string "175.out" in the mailbox.
> >
> > The next free one seems to be 177 currently.
> 
> Thanks, will change to 177 in the next version.

If there needs to be a next version for other reasons. Otherwise it's
not important enough to respin, it just means that someone else will
have to rebase.

> For next patches, what do you mean by "pending"? patches sent
> to the block mailing list?

Yes, that's what I'm looking for when I add a new test case myself. It's
just an easy way to avoid stepping on each others toes.

Kevin

[Qemu-devel] [PATCH v2] hw/arm/virt: Add a user option to disallow ITS instantiation

2017-02-17 Thread Eric Auger

In 2.9 ITS will block save/restore and migration use cases. As
such let's introduce a user option that disallows its instantiation
along with the GICv3. With no-its option turned true, migration will
be possible, obviously at the expense of MSI support (with GICv3).

Signed-off-by: Eric Auger 

---

v1 -> v2: fix omitted coma in object_property_set_description

With this patch the option also is added in virt 2.8. I don't know
if it is acceptable.
---
 hw/arm/virt.c | 28 +++-
 include/hw/arm/virt.h |  1 +
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index f3440f2..c08deb0 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -605,7 +605,7 @@ static void create_gic(VirtMachineState *vms, qemu_irq *pic)
 
 fdt_add_gic_node(vms);
 
-if (type == 3 && !vmc->no_its) {
+if (type == 3 && !vmc->no_its && !vms->no_its) {
 create_its(vms, gicdev);
 } else if (type == 2) {
 create_v2m(vms, pic);
@@ -1480,6 +1480,21 @@ static void virt_set_highmem(Object *obj, bool value, 
Error **errp)
 vms->highmem = value;
 }
 
+static bool virt_get_no_its(Object *obj, Error **errp)
+{
+VirtMachineState *vms = VIRT_MACHINE(obj);
+
+return vms->no_its;
+}
+
+static void virt_set_no_its(Object *obj, bool value, Error **errp)
+{
+VirtMachineState *vms = VIRT_MACHINE(obj);
+
+vms->no_its = value;
+}
+
+
 static char *virt_get_gic_version(Object *obj, Error **errp)
 {
 VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -1540,6 +1555,7 @@ type_init(machvirt_machine_init);
 static void virt_2_9_instance_init(Object *obj)
 {
 VirtMachineState *vms = VIRT_MACHINE(obj);
+VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
 
 /* EL3 is disabled by default on virt: this makes us consistent
  * between KVM and TCG for this board, and it also allows us to
@@ -1579,6 +1595,16 @@ static void virt_2_9_instance_init(Object *obj)
 "Set GIC version. "
 "Valid values are 2, 3 and host", NULL);
 
+/* Default allows ITS instantiation */
+if (!vmc->no_its) {
+object_property_add_bool(obj, "no-its", virt_get_no_its,
+ virt_set_no_its, NULL);
+vms->no_its = false;
+object_property_set_description(obj, "no-its",
+"Disallow the ITS instantiation",
+NULL);
+}
+
 vms->memmap = a15memmap;
 vms->irqmap = a15irqmap;
 }
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 58ce74e..5e73be9 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -93,6 +93,7 @@ typedef struct {
 FWCfgState *fw_cfg;
 bool secure;
 bool highmem;
+bool no_its;
 bool virt;
 int32_t gic_version;
 struct arm_boot_info bootinfo;
-- 
2.5.5

Re: [Qemu-devel] [PATCH v15 08/25] block: introduce auto-loading bitmaps

2017-02-17 Thread Vladimir Sementsov-Ogievskiy


17.02.2017 17:24, Kevin Wolf wrote:

Am 17.02.2017 um 14:48 hat Denis V. Lunev geschrieben:

On 02/17/2017 04:34 PM, Kevin Wolf wrote:

Am 17.02.2017 um 14:22 hat Denis V. Lunev geschrieben:

But for sure this is bad from the downtime point of view.
On migrate you will have to write to the image and re-read
it again on the target. This would be very slow. This will
not help for the migration with non-shared disk too.

That is why we have specifically worked in a migration,
which for a good does not influence downtime at all now.

With a write we are issuing several write requests + sync.
Our measurements shows that bdrv_drain could take around
a second on an averagely loaded conventional system, which
seems unacceptable addition to me.

I'm not arguing against optimising migration, I fully agree with you. I
just think that we should start with a correct if slow base version and
then add optimisation to that, instead of starting with a broken base
version and adding to that.

Look, whether you do the expensive I/O on open/close and make that a
slow operation or whether you do it on invalidate_cache/inactivate
doesn't really make a difference in term of slowness because in general
both operations are called exactly once. But it does make a difference
in terms of correctness.

Once you do the optimisation, of course, you'll skip writing those
bitmaps that you transfer using a different channel, no matter whether
you skip it in bdrv_close() or in bdrv_inactivate().

Kevin

I do not understand this point as in order to optimize this
we will have to create specific code path or option from
the migration code and keep this as an ugly kludge forever.

The point that I don't understand is why it makes any difference for the
follow-up migration series whether the writeout is in bdrv_close() or
bdrv_inactivate(). I don't really see the difference between the two
from a migration POV; both need to be skipped if we transfer the bitmap
using a different channel.

Maybe I would see the reason if I could find the time to look at the
migration patches first, but unfortunately I don't have this time at the
moment.

My point is just that generally we want to have a correctly working qemu
after every single patch, and even more importantly after every series.
As the migration series is separate from this, I don't think it's a good
excuse for doing worse than we could easily do here.

Kevin


With open/close all is already ok - bitmaps will not be saved because of 
BDRV_O_INACTIVE  and will not be loaded because of IN_USE.



--
Best regards,
Vladimir

[Qemu-devel] [PATCH] Makefile: Put VERSION info into version.texi rather than using -D

2017-02-17 Thread Peter Maydell

Unfortunately some older versions of makeinfo don't correctly
handle the -D command line option and fail to set the variable.
This then causes them to complain
 docs/qemu-ga-ref.texi:41: warning: undefined flag: VERSION

Work around this by doing as the autotools do, and writing
the information into a version.texi file which we then
include from the .texi files that need it.

Signed-off-by: Peter Maydell 
---
 Makefile   | 17 ++---
 docs/qemu-ga-ref.texi  |  2 ++
 docs/qemu-qmp-ref.texi |  2 ++
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/Makefile b/Makefile
index 830fa5a..1c4c04f 100644
--- a/Makefile
+++ b/Makefile
@@ -516,7 +516,7 @@ distclean: clean
rm -f qemu-doc.vr qemu-doc.txt
rm -f config.log
rm -f linux-headers/asm
-   rm -f qemu-ga-qapi.texi qemu-qapi.texi
+   rm -f qemu-ga-qapi.texi qemu-qapi.texi version.texi
rm -f docs/qemu-qmp-ref.7 docs/qemu-ga-ref.7
rm -f docs/qemu-qmp-ref.txt docs/qemu-ga-ref.txt
rm -f docs/qemu-qmp-ref.pdf docs/qemu-ga-ref.pdf
@@ -663,21 +663,24 @@ ui/console-gl.o: $(SRC_PATH)/ui/console-gl.c \
 
 # documentation
 MAKEINFO=makeinfo
-MAKEINFOFLAGS=--no-split --number-sections -D 'VERSION $(VERSION)'
-TEXIFLAG=$(if $(V),,--quiet) --command='@set VERSION $(VERSION)'
+MAKEINFOFLAGS=--no-split --number-sections
+TEXIFLAG=$(if $(V),,--quiet)
 
-%.html: %.texi
+version.texi: $(SRC_PATH)/VERSION
+   $(call quiet-command,echo "@set VERSION $(VERSION)" > $@,"GEN","$@")
+
+%.html: %.texi version.texi
$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers 
\
--html $< -o $@,"GEN","$@")
 
-%.info: %.texi
+%.info: %.texi version.texi
$(call quiet-command,$(MAKEINFO) $(MAKEINFOFLAGS) $< -o $@,"GEN","$@")
 
-%.txt: %.texi
+%.txt: %.texi version.texi
$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers 
\
--plaintext $< -o $@,"GEN","$@")
 
-%.pdf: %.texi
+%.pdf: %.texi version.texi
$(call quiet-command,texi2pdf $(TEXIFLAG) -I $(SRC_PATH) -I . $< -o 
$@,"GEN","$@")
 
 qemu-options.texi: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool
diff --git a/docs/qemu-ga-ref.texi b/docs/qemu-ga-ref.texi
index 87cc8d0..ddb76ce 100644
--- a/docs/qemu-ga-ref.texi
+++ b/docs/qemu-ga-ref.texi
@@ -1,6 +1,8 @@
 \input texinfo
 @setfilename qemu-ga-ref.info
 
+@include version.texi
+
 @exampleindent 0
 @paragraphindent 0
 
diff --git a/docs/qemu-qmp-ref.texi b/docs/qemu-qmp-ref.texi
index 818e525..0a00569 100644
--- a/docs/qemu-qmp-ref.texi
+++ b/docs/qemu-qmp-ref.texi
@@ -1,6 +1,8 @@
 \input texinfo
 @setfilename qemu-qmp-ref.info
 
+@include version.texi
+
 @exampleindent 0
 @paragraphindent 0
 
-- 
2.7.4

Re: [Qemu-devel] [PATCH] hw/arm/virt: fix cpu object reference leak

2017-02-17 Thread Igor Mammedov

On Fri, 17 Feb 2017 13:32:15 +
Peter Maydell  wrote:

> On 16 February 2017 at 15:11, Igor Mammedov  wrote:
> > On Thu, 16 Feb 2017 14:18:05 +
> > Peter Maydell  wrote:  
> >> I've always found the object reference semantics somewhat
> >> confusing (why does realizing a device add a reference,
> >> for instance?). Do we document them anywhere?  
> > I'm not aware of a place where it's documented.
> >
> > currently device_realize() sets parent thus increasing
> > ref counter only if device creator haven't set parent
> > explicitly.  
> 
> It doesn't seem to:
> 
> static void device_realize(DeviceState *dev, Error **errp)
> {
> DeviceClass *dc = DEVICE_GET_CLASS(dev);
> 
> if (dc->init) {
> int rc = dc->init(dev);
> if (rc < 0) {
> error_setg(errp, "Device initialization failed.");
> return;
> }
> }
> }
static void device_set_realized(Object *obj, bool value, Error **errp)
{
...
if (value && !dev->realized) {  
 
if (!obj->parent) { 
 
gchar *name = g_strdup_printf("device[%d]", unattached_count++);
 

 
object_property_add_child(container_get(qdev_get_machine(), 
 
"/unattached"), 
 
  name, obj, &error_abort); 
 
unattached_parent = true;   
 
g_free(name);   
 
}

 
> ...it just calls the device's init function if it has one.
> 
> It's also pretty confusing that qdev_try_create()
> and qdev_create() return a pointer to an object
> that has been put into a bus and had unref called
> (so the caller doesn't need to manually unref),
qdev_try_create() when puts device on bus,
it creates QOM link property to device which increases refcnt
qdev_try_create() -> qdev_set_parent_bus() -> bus_add_child()

link is not really usable at that time as device doesn't have
parent (in QOM terms) and attempt to resolve it to path would
assert, so it does set link manually by hack
bus_add_child()
kid->child = child; 
 
object_ref(OBJECT(kid->child)); 

and then as bus holds reference, device won't disappear 
until it's attached to bus, it unrefs original
(qdev_try_create owned) pointer and returns pointer
owned by qdev framework.

later device creator calls
 qdev_init_nofail() ->
   object_property_set_bool(true, "realized");

which sets QOM parent for device to "/machine/unattached"
if caller hasn't set it manually,

like
 qdev_device_add() ->
qdev_set_id() ->
   object_property_add_child("/peripheral" | "/peripheral-anon")

or
 ioapic_init_gsi() ->
 qdev_create()
 object_property_add_child(...)
 qdev_init_nofail()

> but plain object_new() returns a pointer to an
> object that hasn't been put into a bus, yet
it's like malloc/new and used for all objects including
ones without realize which is Device concept.
So naturally caller hold/owns the first reference
and should take care of it.

> realizing does put it into a bus but doesn't do the
> corresponding unref.
it might add extra reference so successfully created device
won't disappear once original owner 'frees' pointer that
goes out of scope.


> I'd be a lot happier if we had clear documentation that
> described how our object model, plugging things into buses,
> etc handled reference counting and what the expected
> "correct" code patterns are for using it.
I see 2 APIs in use:
 1: legacy qdev_create() + qdev_init_nofail()
 for hardwired on board devices bus attached oriented
 2: object_new() (+ device.realize() in case if object is Device)
 used by device_add() used for both bus/bus-less device
 post machine_init time.


> That said, I guess this patch is correct so I'm applying
> it to target-arm.next.
> 
> thanks
> -- PMM

Re: [Qemu-devel] [PATCH v8 4/8] ACPI: Add Virtual Machine Generation ID support

2017-02-17 Thread Ben Warren


> On Feb 17, 2017, at 2:43 AM, Igor Mammedov  wrote:
> 
> On Thu, 16 Feb 2017 15:15:36 -0800
> b...@skyportsystems.com wrote:
> 
>> From: Ben Warren 
>> 
>> This implements the VM Generation ID feature by passing a 128-bit
>> GUID to the guest via a fw_cfg blob.
>> Any time the GUID changes, an ACPI notify event is sent to the guest
>> 
>> The user interface is a simple device with one parameter:
>> - guid (string, must be "auto" or in UUID format
>>   ----)
> I've given it some testing with WS2012R2 and v4 patches for Seabios,
> 
> Windows is able to read initial GUID allocation and writeback
> seems to work somehow:
> 
> (qemu) info vm-generation-id 
> c109c09b-0e8b-42d5-9b33-8409c9dcd16c
> 
> vmgenid client in Windows reads it as 2 following 64bit integers:
> 42d50e8bc109c09b:6cd1dcc90984339b
> 
> However update path/restore from snapshot doesn't
> here is as I've tested it:
> 
> qemu-system-x86_64 -device vmgenid,id=testvgid,guid=auto -monitor stdio
> (qemu) info vm-generation-id 
> c109c09b-0e8b-42d5-9b33-8409c9dcd16c
> (qemu) stop
> (qemu) migrate "exec:gzip -c > STATEFILE.gz" 
> (qemu) quit
> 
> qemu-system-x86_64 -device vmgenid,id=testvgid,guid=auto -monitor stdio
> -incoming "exec: gzip -c -d STATEFILE.gz"
> (qemu) info vm-generation-id 
> 28b587fa-991b-4267-80d7-9cf28b746fe9
> 
> guest
> 1. doesn't get GPE notification that it must receive
> 2. vmgenid client in Windows reads the same value
>  42d50e8bc109c09b:6cd1dcc90984339b
> 
Strange, this was working for me, but with a slightly different test method:
I use virsh save/restore
While I do later testing with Windows, during development I use a Linux kernel 
module I wrote that keeps track of GUID and notifications.  I’m happy to share 
this with you if interested.

I’ll dig into this morning.

—Ben


smime.p7s
Description: S/MIME cryptographic signature

Re: [Qemu-devel] Fix build break during configuration on musl-libc based Linux systems.

2017-02-17 Thread Eric Blake

On 02/17/2017 03:28 AM, Peter Maydell wrote:
> On 17 February 2017 at 06:43, Fam Zheng  wrote:
>> But your point is taken, we should make the first (or a one-shot)
>> contribution as easy as possible.
> 
> Yes; we could do with providing a "This page seems very long..."
> introduction section. The absolute bare minimum requirements
> for a submitter I think are:
>  * Provide a Signed-off-by: line (this is a hard requirement
>because it's how you say "I'm legally OK to contribute this
>and am happy for it to go into QEMU")
>  * send patch by email
>  * read replies and act on them if you want your patch to go in
> 
> The larger your contribution is, the more important the other
> requirements detailed on the page are; but personally I'm
> happy to manually fix up patches from a first-time submitter,
> and I think most other maintainers are too.

I've updated the wiki to put in that nice bullet list, prior to the
table of contents.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH v2] target-arm: Implement BCM2835 hardware RNG

2017-02-17 Thread Peter Maydell

On 17 February 2017 at 12:54, Peter Maydell  wrote:
> On 10 February 2017 at 21:08, Marcin Chojnacki  wrote:
>> Recent vanilla Raspberry Pi kernels started to make use of
>> the hardware random number generator in BCM2835 SoC. As a
>> result, those kernels wouldn't work anymore under QEMU
>> but rather just freeze during the boot process.
>>
>> This patch implements a trivial BCM2835 compatible RNG,
>> and adds it as a peripheral to BCM2835 platform, which
>> allows to boot a vanilla Raspberry Pi kernel under Qemu.
>
> So this looks good to me (other than the rand issue which
> I just posted a patch for).

Dan's reviewed the fix for the rand issue so I'm going to
apply this to target-arm.next. Thanks!

-- PMM

[Qemu-devel] (no subject)

2017-02-17 Thread Pranith Kumar

Subject: [PATCH] aarch64: Change ext type to TCGType to fix warnings

To fix the following warnings:

In file included from /users/pranith/qemu/tcg/tcg.c:255:
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:879:24: warning: implicit 
conversion from enumeration type 'TCGMemOp' (aka 'enum TCGMemOp') to different 
enumeration type 'TCGType' (aka 'enum TCGType')
  [-Wenum-conversion]
tcg_out_cmp(s, ext, a, b, b_const);
~~~^~~
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:893:36: warning: implicit 
conversion from enumeration type 'TCGMemOp' (aka 'enum TCGMemOp') to different 
enumeration type 'TCGType' (aka 'enum TCGType')
  [-Wenum-conversion]
tcg_out_insn(s, 3201, CBZ, ext, a, offset);
~~~^~~
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:389:65: note: expanded from 
macro 'tcg_out_insn'
glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
^
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:895:37: warning: implicit 
conversion from enumeration type 'TCGMemOp' (aka 'enum TCGMemOp') to different 
enumeration type 'TCGType' (aka 'enum TCGType')
  [-Wenum-conversion]
tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
^~~
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:389:65: note: expanded from 
macro 'tcg_out_insn'
glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
^
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:1610:27: warning: implicit 
conversion from enumeration type 'TCGType' (aka 'enum TCGType') to different 
enumeration type 'TCGMemOp' (aka 'enum TCGMemOp')
  [-Wenum-conversion]
tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
~~^~~

Signed-off-by: Pranith Kumar 
---
 tcg/aarch64/tcg-target.inc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index 6d227a5a6a..290de6dae6 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -866,7 +866,7 @@ static inline void tcg_out_goto_label(TCGContext *s, 
TCGLabel *l)
 }
 }
 
-static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a,
+static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
TCGArg b, bool b_const, TCGLabel *l)
 {
 intptr_t offset;
@@ -937,7 +937,7 @@ static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg 
rd,
 }
 }
 
-static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
+static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
TCGReg rh, TCGReg al, TCGReg ah,
tcg_target_long bl, tcg_target_long bh,
bool const_bl, bool const_bh, bool sub)
-- 
2.11.0

[Qemu-devel] [PATCH RESEND] aarch64: Change ext type to TCGType to fix warnings

2017-02-17 Thread Pranith Kumar

To fix the following warnings:

In file included from /users/pranith/qemu/tcg/tcg.c:255:
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:879:24: warning: implicit 
conversion from enumeration type 'TCGMemOp' (aka 'enum TCGMemOp') to different 
enumeration type 'TCGType' (aka 'enum TCGType')
  [-Wenum-conversion]
tcg_out_cmp(s, ext, a, b, b_const);
~~~^~~
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:893:36: warning: implicit 
conversion from enumeration type 'TCGMemOp' (aka 'enum TCGMemOp') to different 
enumeration type 'TCGType' (aka 'enum TCGType')
  [-Wenum-conversion]
tcg_out_insn(s, 3201, CBZ, ext, a, offset);
~~~^~~
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:389:65: note: expanded from 
macro 'tcg_out_insn'
glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
^
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:895:37: warning: implicit 
conversion from enumeration type 'TCGMemOp' (aka 'enum TCGMemOp') to different 
enumeration type 'TCGType' (aka 'enum TCGType')
  [-Wenum-conversion]
tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
^~~
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:389:65: note: expanded from 
macro 'tcg_out_insn'
glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
^
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:1610:27: warning: implicit 
conversion from enumeration type 'TCGType' (aka 'enum TCGType') to different 
enumeration type 'TCGMemOp' (aka 'enum TCGMemOp')
  [-Wenum-conversion]
tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
~~^~~

Signed-off-by: Pranith Kumar 
---
 tcg/aarch64/tcg-target.inc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index 6d227a5a6a..290de6dae6 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -866,7 +866,7 @@ static inline void tcg_out_goto_label(TCGContext *s, 
TCGLabel *l)
 }
 }
 
-static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a,
+static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
TCGArg b, bool b_const, TCGLabel *l)
 {
 intptr_t offset;
@@ -937,7 +937,7 @@ static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg 
rd,
 }
 }
 
-static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
+static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
TCGReg rh, TCGReg al, TCGReg ah,
tcg_target_long bl, tcg_target_long bh,
bool const_bl, bool const_bh, bool sub)
-- 
2.11.0

Re: [Qemu-devel] [PATCH] Makefile: Put VERSION info into version.texi rather than using -D

2017-02-17 Thread Marc-André Lureau

Hi

- Original Message -
> Unfortunately some older versions of makeinfo don't correctly
> handle the -D command line option and fail to set the variable.
> This then causes them to complain
>  docs/qemu-ga-ref.texi:41: warning: undefined flag: VERSION
> 
> Work around this by doing as the autotools do, and writing
> the information into a version.texi file which we then
> include from the .texi files that need it.
> 
> Signed-off-by: Peter Maydell 
> ---
>  Makefile   | 17 ++---
>  docs/qemu-ga-ref.texi  |  2 ++
>  docs/qemu-qmp-ref.texi |  2 ++
>  3 files changed, 14 insertions(+), 7 deletions(-)

looks good,
version.texi should probably be in .gitignore though

> diff --git a/Makefile b/Makefile
> index 830fa5a..1c4c04f 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -516,7 +516,7 @@ distclean: clean
>   rm -f qemu-doc.vr qemu-doc.txt
>   rm -f config.log
>   rm -f linux-headers/asm
> - rm -f qemu-ga-qapi.texi qemu-qapi.texi
> + rm -f qemu-ga-qapi.texi qemu-qapi.texi version.texi
>   rm -f docs/qemu-qmp-ref.7 docs/qemu-ga-ref.7
>   rm -f docs/qemu-qmp-ref.txt docs/qemu-ga-ref.txt
>   rm -f docs/qemu-qmp-ref.pdf docs/qemu-ga-ref.pdf
> @@ -663,21 +663,24 @@ ui/console-gl.o: $(SRC_PATH)/ui/console-gl.c \
>  
>  # documentation
>  MAKEINFO=makeinfo
> -MAKEINFOFLAGS=--no-split --number-sections -D 'VERSION $(VERSION)'
> -TEXIFLAG=$(if $(V),,--quiet) --command='@set VERSION $(VERSION)'
> +MAKEINFOFLAGS=--no-split --number-sections
> +TEXIFLAG=$(if $(V),,--quiet)
>  
> -%.html: %.texi
> +version.texi: $(SRC_PATH)/VERSION
> + $(call quiet-command,echo "@set VERSION $(VERSION)" > $@,"GEN","$@")
> +
> +%.html: %.texi version.texi
>   $(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers 
> \
>   --html $< -o $@,"GEN","$@")
>  
> -%.info: %.texi
> +%.info: %.texi version.texi
>   $(call quiet-command,$(MAKEINFO) $(MAKEINFOFLAGS) $< -o $@,"GEN","$@")
>  
> -%.txt: %.texi
> +%.txt: %.texi version.texi
>   $(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers 
> \
>   --plaintext $< -o $@,"GEN","$@")
>  
> -%.pdf: %.texi
> +%.pdf: %.texi version.texi
>   $(call quiet-command,texi2pdf $(TEXIFLAG) -I $(SRC_PATH) -I . $< -o
>   $@,"GEN","$@")
>  
>  qemu-options.texi: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool
> diff --git a/docs/qemu-ga-ref.texi b/docs/qemu-ga-ref.texi
> index 87cc8d0..ddb76ce 100644
> --- a/docs/qemu-ga-ref.texi
> +++ b/docs/qemu-ga-ref.texi
> @@ -1,6 +1,8 @@
>  \input texinfo
>  @setfilename qemu-ga-ref.info
>  
> +@include version.texi
> +
>  @exampleindent 0
>  @paragraphindent 0
>  
> diff --git a/docs/qemu-qmp-ref.texi b/docs/qemu-qmp-ref.texi
> index 818e525..0a00569 100644
> --- a/docs/qemu-qmp-ref.texi
> +++ b/docs/qemu-qmp-ref.texi
> @@ -1,6 +1,8 @@
>  \input texinfo
>  @setfilename qemu-qmp-ref.info
>  
> +@include version.texi
> +
>  @exampleindent 0
>  @paragraphindent 0
>  
> --
> 2.7.4
> 
>

Re: [Qemu-devel] [PATCH v15 25/25] qcow2-bitmap: improve check_constraints_on_bitmap

2017-02-17 Thread Eric Blake

On 02/17/2017 04:18 AM, Vladimir Sementsov-Ogievskiy wrote:
> 16.02.2017 17:21, Kevin Wolf wrote:
>> Am 15.02.2017 um 11:10 hat Vladimir Sementsov-Ogievskiy geschrieben:
>>> Add detailed error messages.
>>>
>>> Signed-off-by: Vladimir Sementsov-Ogievskiy 
>> Why not merge this patch into the one that originally introduced the
>> function?
> 
> Just to not create extra work for reviewers

It's extra work for reviewers if you don't rebase obvious fixes where
they belong - a new reviewer may flag the issue in the earlier patch
only to find out later in the series that you've already fixed it.
Avoiding needless code churn is part of what rebasing is all about - you
want each step of the series to be self-contained and as correct as
possible, by adding in the fixes at the point where they make sense,
rather than at the end of the series.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org

signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH] Makefile: Put VERSION info into version.texi rather than using -D

2017-02-17 Thread Eric Blake

On 02/17/2017 09:12 AM, Peter Maydell wrote:
> Unfortunately some older versions of makeinfo don't correctly
> handle the -D command line option and fail to set the variable.
> This then causes them to complain
>  docs/qemu-ga-ref.texi:41: warning: undefined flag: VERSION
> 
> Work around this by doing as the autotools do, and writing
> the information into a version.texi file which we then
> include from the .texi files that need it.

Autotools defines a few more macros, but we can always beef up our
version.texi if we find ourselves needing any of them.

> 
> Signed-off-by: Peter Maydell 
> ---
>  Makefile   | 17 ++---
>  docs/qemu-ga-ref.texi  |  2 ++
>  docs/qemu-qmp-ref.texi |  2 ++
>  3 files changed, 14 insertions(+), 7 deletions(-)

Missing a change to .gitignore for version.texi.  With that fixed, you
can add:

Reviewed-by: Eric Blake 

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH] Makefile: Put VERSION info into version.texi rather than using -D

2017-02-17 Thread Peter Maydell

On 17 February 2017 at 15:50, Eric Blake  wrote:
> On 02/17/2017 09:12 AM, Peter Maydell wrote:
>> Unfortunately some older versions of makeinfo don't correctly
>> handle the -D command line option and fail to set the variable.
>> This then causes them to complain
>>  docs/qemu-ga-ref.texi:41: warning: undefined flag: VERSION
>>
>> Work around this by doing as the autotools do, and writing
>> the information into a version.texi file which we then
>> include from the .texi files that need it.
>
> Autotools defines a few more macros, but we can always beef up our
> version.texi if we find ourselves needing any of them.
>
>>
>> Signed-off-by: Peter Maydell 
>> ---
>>  Makefile   | 17 ++---
>>  docs/qemu-ga-ref.texi  |  2 ++
>>  docs/qemu-qmp-ref.texi |  2 ++
>>  3 files changed, 14 insertions(+), 7 deletions(-)
>
> Missing a change to .gitignore for version.texi.

Should be
--- a/.gitignore
+++ b/.gitignore
@@ -107,6 +107,7 @@ docs/qemu-ga-ref.info*
 docs/qemu-qmp-ref.info*
 /qemu-ga-qapi.texi
 /qemu-qapi.texi
+/version.texi
 *.tps
 .stgit-*
 cscope.*

right?

thanks
-- PMM

Re: [Qemu-devel] [PATCH] Makefile: Put VERSION info into version.texi rather than using -D

2017-02-17 Thread Eric Blake

On 02/17/2017 09:52 AM, Peter Maydell wrote:

>>>  Makefile   | 17 ++---
>>>  docs/qemu-ga-ref.texi  |  2 ++
>>>  docs/qemu-qmp-ref.texi |  2 ++
>>>  3 files changed, 14 insertions(+), 7 deletions(-)
>>
>> Missing a change to .gitignore for version.texi.
> 
> Should be
> --- a/.gitignore
> +++ b/.gitignore
> @@ -107,6 +107,7 @@ docs/qemu-ga-ref.info*
>  docs/qemu-qmp-ref.info*
>  /qemu-ga-qapi.texi
>  /qemu-qapi.texi
> +/version.texi
>  *.tps
>  .stgit-*
>  cscope.*
> 
> right?

Yes, that looks right.

(Hmm - another file stuck in the top-level directory; a separate patch
might be worth sticking some of these generated .texi files under docs/
instead)

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

[Qemu-devel] [RFC PATCH V4] qemu-img: make convert async

2017-02-17 Thread Peter Lieven

this is something I have been thinking about for almost 2 years now.
we heavily have the following two use cases when using qemu-img convert.

a) reading from NFS and writing to iSCSI for deploying templates
b) reading from iSCSI and writing to NFS for backups

In both processes we use libiscsi and libnfs so we have no kernel pagecache.
As qemu-img convert is implemented with sync operations that means we
read one buffer and then write it. No parallelism and each sync request
takes as long as it takes until it is completed.

This is version 4 of the approach using coroutine worker "threads".

So far I have the following runtimes when reading an uncompressed QCOW2 from
NFS and writing it to iSCSI (raw):

qemu-img (master)
 nfs -> iscsi 22.8 secs
 nfs -> ram   11.7 secs
 ram -> iscsi 12.3 secs

qemu-img-async (8 coroutines, in-order write disabled)
 nfs -> iscsi 11.0 secs
 nfs -> ram   10.4 secs
 ram -> iscsi  9.0 secs

The following are the runtimes found with different settings between V3 and V4.
This is always the best runtime out of 10 runs when converting from nfs to 
iscsi.
Please note that in V4 in-order write scenarios show a very high jitter. I think
this is because the get_block_status on the NFS share is delayed by concurrent 
read
requests.

   in-orderout-of-order
V3  - 16 coroutines12.4 seconds11.1 seconds
-  8 coroutines12.2 seconds11.3 seconds
-  4 coroutines12.5 seconds11.1 seconds
-  2 coroutines14.8 seconds14.9 seconds

V4  - 32 coroutines15.9 seconds11.5 seconds
- 16 coroutines12.5 seconds11.0 seconds
-  8 coroutines12.9 seconds11.0 seconds
-  4 coroutines14.1 seconds11.5 seconds
-  2 coroutines16.9 seconds13.2 seconds

Comments appreciated.

Thank you,
Peter

Signed-off-by: Peter Lieven 
---
v3->v4: - avoid to prepare a request queue upfront [Kevin]
- do not ignore the BLK_BACKING_FILE status [Kevin]
- redesign the interface to the read and write routines [Kevin]

v2->v3: - updated stats in the commit msg from a host with a better network card
- only wake up the coroutine that is acutally waiting for a write to 
complete.
  this was not only overhead, but also breaking at least linux AIO.
- fix coding style complaints
- rename some variables and structs

v1->v2: - using coroutine as worker "threads". [Max]
- keeping the request queue as otherwise it happens
  that we wait on BLK_ZERO chunks while keeping the write order.
  it also avoids redundant calls to get_block_status and helps
  to skip some conditions for fully allocated imaged (!s->min_sparse)
---
 qemu-img.c | 260 -
 1 file changed, 187 insertions(+), 73 deletions(-)

diff --git a/qemu-img.c b/qemu-img.c
index cff22e3..6bac980 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1448,6 +1448,8 @@ enum ImgConvertBlockStatus {
 BLK_BACKING_FILE,
 };
 
+#define MAX_COROUTINES 16
+
 typedef struct ImgConvertState {
 BlockBackend **src;
 int64_t *src_sectors;
@@ -1455,15 +1457,25 @@ typedef struct ImgConvertState {
 int64_t src_cur_offset;
 int64_t total_sectors;
 int64_t allocated_sectors;
+int64_t allocated_done;
+int64_t sector_num;
+int64_t wr_offs;
 enum ImgConvertBlockStatus status;
 int64_t sector_next_status;
 BlockBackend *target;
 bool has_zero_init;
 bool compressed;
 bool target_has_backing;
+bool wr_in_order;
 int min_sparse;
 size_t cluster_sectors;
 size_t buf_sectors;
+int num_coroutines;
+int running_coroutines;
+Coroutine *co[MAX_COROUTINES];
+int64_t wait_sector_num[MAX_COROUTINES];
+CoMutex lock;
+int ret;
 } ImgConvertState;
 
 static void convert_select_part(ImgConvertState *s, int64_t sector_num)
@@ -1544,11 +1556,12 @@ static int convert_iteration_sectors(ImgConvertState 
*s, int64_t sector_num)
 return n;
 }
 
-static int convert_read(ImgConvertState *s, int64_t sector_num, int nb_sectors,
-uint8_t *buf)
+static int convert_co_read(ImgConvertState *s, int64_t sector_num,
+   int nb_sectors, uint8_t *buf)
 {
-int n;
-int ret;
+int n, ret;
+QEMUIOVector qiov;
+struct iovec iov;
 
 assert(nb_sectors <= s->buf_sectors);
 while (nb_sectors > 0) {
@@ -1563,9 +1576,13 @@ static int convert_read(ImgConvertState *s, int64_t 
sector_num, int nb_sectors,
 bs_sectors = s->src_sectors[s->src_cur];
 
 n = MIN(nb_sectors, bs_sectors - (sector_num - s->src_cur_offset));
-ret = blk_pread(blk,
-(sector_num - s->src_cur_offset) << BDRV_SECTOR_BITS,
-buf, n << BDRV_SECTOR_BITS);
+iov.iov_base = buf;
+iov.iov_len = n << BDRV_SECTOR_BITS;
+qemu_iovec_init_external(&qiov, &iov, 1);
+
+ret = blk_co_

Re: [Qemu-devel] [PATCH v8 4/8] ACPI: Add Virtual Machine Generation ID support

2017-02-17 Thread Laszlo Ersek

On 02/17/17 16:33, Ben Warren wrote:
> 
>> On Feb 17, 2017, at 2:43 AM, Igor Mammedov > > wrote:
>>
>> On Thu, 16 Feb 2017 15:15:36 -0800
>> b...@skyportsystems.com  wrote:
>>
>>> From: Ben Warren mailto:b...@skyportsystems.com>>
>>>
>>> This implements the VM Generation ID feature by passing a 128-bit
>>> GUID to the guest via a fw_cfg blob.
>>> Any time the GUID changes, an ACPI notify event is sent to the guest
>>>
>>> The user interface is a simple device with one parameter:
>>> - guid (string, must be "auto" or in UUID format
>>>   ----)
>> I've given it some testing with WS2012R2 and v4 patches for Seabios,
>>
>> Windows is able to read initial GUID allocation and writeback
>> seems to work somehow:
>>
>> (qemu) info vm-generation-id
>> c109c09b-0e8b-42d5-9b33-8409c9dcd16c
>>
>> vmgenid client in Windows reads it as 2 following 64bit integers:
>> 42d50e8bc109c09b:6cd1dcc90984339b
>>
>> However update path/restore from snapshot doesn't
>> here is as I've tested it:
>>
>> qemu-system-x86_64 -device vmgenid,id=testvgid,guid=auto -monitor stdio
>> (qemu) info vm-generation-id
>> c109c09b-0e8b-42d5-9b33-8409c9dcd16c
>> (qemu) stop
>> (qemu) migrate "exec:gzip -c > STATEFILE.gz"
>> (qemu) quit
>>
>> qemu-system-x86_64 -device vmgenid,id=testvgid,guid=auto -monitor stdio
>> -incoming "exec: gzip -c -d STATEFILE.gz"
>> (qemu) info vm-generation-id
>> 28b587fa-991b-4267-80d7-9cf28b746fe9
>>
>> guest
>> 1. doesn't get GPE notification that it must receive
>> 2. vmgenid client in Windows reads the same value
>>  42d50e8bc109c09b:6cd1dcc90984339b
>>
> Strange, this was working for me, but with a slightly different test method:
> 
>   * I use virsh save/restore

Awesome, this actually what I should try. All my guests are managed by
libvirt (with the occasional , for development), and direct
QEMU monitor commands such as

  virsh qemu-monitor-command ovmf.rhel7 --hmp 'info vm-generation-id'

only work for me if they are reasonably non-intrusive.

>   * While I do later testing with Windows, during development I use a
> Linux kernel module I wrote that keeps track of GUID and
> notifications.  I’m happy to share this with you if interested.

Please do. If you have a public git repo somewhere, that would be
awesome. (Bonus points if the module builds out-of-tree, if the
kernel-devel package is installed.)

NB: while the set-id monitor command was part of the series, I did test
it to the extent that I checked the SCI ("ACPI interrupt") count in the
guest, in /proc/interrupts. I did see it increase, so minimally the SCI
injection was fine.

Thanks!
Laszlo

> I’ll dig into this morning.
> 
> —Ben
>

[Qemu-devel] [PATCH 0/2] block/nfs optimizations

2017-02-17 Thread Peter Lieven

Peter Lieven (2):
  block/nfs: convert to preadv / pwritev
  block/nfs: try to avoid the bounce buffer in pwritev

 block/nfs.c | 50 --
 1 file changed, 28 insertions(+), 22 deletions(-)

-- 
1.9.1

[Qemu-devel] [PATCH 2/2] block/nfs: try to avoid the bounce buffer in pwritev

2017-02-17 Thread Peter Lieven

if the passed qiov contains exactly one iov we can
pass the buffer directly.

Signed-off-by: Peter Lieven 
---
 block/nfs.c | 23 ---
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/block/nfs.c b/block/nfs.c
index ab5dcc2..bb4b75f 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -295,20 +295,27 @@ static int coroutine_fn nfs_co_pwritev(BlockDriverState 
*bs, uint64_t offset,
 NFSClient *client = bs->opaque;
 NFSRPC task;
 char *buf = NULL;
+bool my_buffer = false;
 
 nfs_co_init_task(bs, &task);
 
-buf = g_try_malloc(bytes);
-if (bytes && buf == NULL) {
-return -ENOMEM;
+if (iov->niov != 1) {
+buf = g_try_malloc(bytes);
+if (bytes && buf == NULL) {
+return -ENOMEM;
+}
+qemu_iovec_to_buf(iov, 0, buf, bytes);
+my_buffer = true;
+} else {
+buf = iov->iov[0].iov_base;
 }
 
-qemu_iovec_to_buf(iov, 0, buf, bytes);
-
 if (nfs_pwrite_async(client->context, client->fh,
  offset, bytes, buf,
  nfs_co_generic_cb, &task) != 0) {
-g_free(buf);
+if (my_buffer) {
+g_free(buf);
+}
 return -ENOMEM;
 }
 
@@ -317,7 +324,9 @@ static int coroutine_fn nfs_co_pwritev(BlockDriverState 
*bs, uint64_t offset,
 qemu_coroutine_yield();
 }
 
-g_free(buf);
+if (my_buffer) {
+g_free(buf);
+}
 
 if (task.ret != bytes) {
 return task.ret < 0 ? task.ret : -EIO;
-- 
1.9.1

[Qemu-devel] [PATCH 1/2] block/nfs: convert to preadv / pwritev

2017-02-17 Thread Peter Lieven

Signed-off-by: Peter Lieven 
---
 block/nfs.c | 33 +++--
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/block/nfs.c b/block/nfs.c
index 689eaa7..ab5dcc2 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -256,9 +256,9 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void 
*data,
 nfs_co_generic_bh_cb, task);
 }
 
-static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov)
+static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset,
+  uint64_t bytes, QEMUIOVector *iov,
+  int flags)
 {
 NFSClient *client = bs->opaque;
 NFSRPC task;
@@ -267,9 +267,7 @@ static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
 task.iov = iov;
 
 if (nfs_pread_async(client->context, client->fh,
-sector_num * BDRV_SECTOR_SIZE,
-nb_sectors * BDRV_SECTOR_SIZE,
-nfs_co_generic_cb, &task) != 0) {
+offset, bytes, nfs_co_generic_cb, &task) != 0) {
 return -ENOMEM;
 }
 
@@ -290,9 +288,9 @@ static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
 return 0;
 }
 
-static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
-int64_t sector_num, int nb_sectors,
-QEMUIOVector *iov)
+static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, uint64_t offset,
+   uint64_t bytes, QEMUIOVector *iov,
+   int flags)
 {
 NFSClient *client = bs->opaque;
 NFSRPC task;
@@ -300,17 +298,16 @@ static int coroutine_fn nfs_co_writev(BlockDriverState 
*bs,
 
 nfs_co_init_task(bs, &task);
 
-buf = g_try_malloc(nb_sectors * BDRV_SECTOR_SIZE);
-if (nb_sectors && buf == NULL) {
+buf = g_try_malloc(bytes);
+if (bytes && buf == NULL) {
 return -ENOMEM;
 }
 
-qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
+qemu_iovec_to_buf(iov, 0, buf, bytes);
 
 if (nfs_pwrite_async(client->context, client->fh,
- sector_num * BDRV_SECTOR_SIZE,
- nb_sectors * BDRV_SECTOR_SIZE,
- buf, nfs_co_generic_cb, &task) != 0) {
+ offset, bytes, buf,
+ nfs_co_generic_cb, &task) != 0) {
 g_free(buf);
 return -ENOMEM;
 }
@@ -322,7 +319,7 @@ static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
 
 g_free(buf);
 
-if (task.ret != nb_sectors * BDRV_SECTOR_SIZE) {
+if (task.ret != bytes) {
 return task.ret < 0 ? task.ret : -EIO;
 }
 
@@ -856,8 +853,8 @@ static BlockDriver bdrv_nfs = {
 .bdrv_create= nfs_file_create,
 .bdrv_reopen_prepare= nfs_reopen_prepare,
 
-.bdrv_co_readv  = nfs_co_readv,
-.bdrv_co_writev = nfs_co_writev,
+.bdrv_co_preadv = nfs_co_preadv,
+.bdrv_co_pwritev= nfs_co_pwritev,
 .bdrv_co_flush_to_disk  = nfs_co_flush,
 
 .bdrv_detach_aio_context= nfs_detach_aio_context,
-- 
1.9.1

Re: [Qemu-devel] [PATCH v2 00/16] Postcopy: Hugepage support

2017-02-17 Thread Andrea Arcangeli

Hello Alexey,

On Tue, Feb 14, 2017 at 05:48:25PM +0300, Alexey Perevalov wrote:
> On Mon, Feb 13, 2017 at 06:57:22PM +0100, Andrea Arcangeli wrote:
> > Hello,
> > 
> > On Mon, Feb 13, 2017 at 08:11:06PM +0300, Alexey Perevalov wrote:
> > > Another one request.
> > > QEMU could use mem_path in hugefs with share key simultaneously
> > > (-object 
> > > memory-backend-file,id=mem,size=${mem_size},mem-path=${mem_path},share=on)
> > >  and vm
> > > in this case will start and will properly work (it will allocate memory
> > > with mmap), but in case of destination for postcopy live migration
> > > UFFDIO_COPY ioctl will fail for
> > > such region, in Arcangeli's git tree there is such prevent check
> > > (if (!vma_is_shmem(dst_vma) && dst_vma->vm_flags & VM_SHARED).
> > > Is it possible to handle such situation at qemu?
> > 
> > It'd be nice to lift this hugetlbfs !VM_SHARED restriction I agree, I
> > already asked Mike (CC'ed) why is there, because I'm afraid it's a
> > leftover from the anon version where VM_SHARED means a very different
> > thing but it was already lifted for shmem. share=on should already
> > work on top of tmpfs and also with THP on tmpfs enabled.
> > 
> > For hugetlbfs and shmem it should be generally more complicated to
> > cope with private mappings than shared ones, shared is just the native
> > form of the pseudofs without having to deal with private COWs aliases
> > so it's hard to imagine something going wrong for VM_SHARED if the
> > MAP_PRIVATE mapping already works fine. If it turns out to be
> > superflous the check may be just turned into
> > "vma_is_anonymous(dst_vma) && dst_vma->vm_flags & VM_SHARED".
> 
> Great, as I know  -netdev type=vhost-user requires share=on in
> -object memory-backend in ovs-dpdk scenario
> http://wiki.qemu-project.org/Documentation/vhost-user-ovs-dpdk

share=on should work now with current aa.git userfault branch, and the
support is already included in -mm, it should all get merged upstream
in kernel 4.11.

Could you test the current aa.git userfault branch to verify postcopy
live migration works fine on hugetlbfs share=on?

Thanks!
Andrea

Re: [Qemu-devel] Fix build break during configuration on musl-libc based Linux systems.

2017-02-17 Thread Chad Joan

Wow, that is some quick turn-around.  Well done!

My thoughts:

   - I find this summary info very helpful!  On behalf of the N people
   trying to heal paper cuts: thank you!
   - I still recommend an example snippet for shell/bash interaction that
   demonstrates the workflow you expect from a first-time contributor.  It
   should be populated with commonly used values (ex: mailing list
   addresses).  I don't expect this to happen fast like the summary info; I
   suspect someone is going to have to pretend they are submitting a patch,
   write down what they do, and then think about how to present this.
   - Regarding the signature: IIRC, setting up certificates on a machine
   using gpg can be quite time consuming and learning-intensive if you've
   never needed to do it before.  Having that example will go a long way to
   help with this.  There is still a possible pain-point: you might write one
   line of git code in the example, and it is easy for you due to your
   workflow, but it could be hours of fiddling for someone who has never done
   it before.  If I'm wrong, show me (the hypothetical reader) how easy it is
   ;)  If I'm right (and that would be unfortunate, in this case), then it
   might be helpful if you politely ask the reader to spend time X amount of
   time on it (establish accurate expectations) and then provide a link to the
   most helpful how-to article you can find on the subject.
   - The email thing is a pain-point, but I wonder if QEMU can make it
   wonderful without sacrificing much--perhaps just a few words on the page.
   Presumably, not every email client mangles patches.  Can we have a
   whitelist?  So far the whitelist has one item: git send-email.  But git
   send-email is probably not even part of the majority population's workflow,
   so if we can test and approve even /some/ of the more popular mail clients
   (ex: gmail, thunderbird, outlook, etc) for use, it would help newbies A
   LOT.  Less importantly; a blacklist could be useful too, to prevent
   unnecessary "what about my mail client?" questions and unnecessary
   redundant testing in the future.
   - I should also mention that I found the rest of the document to be very
   well-written.  It's comprehensiveness became its weakness, but that's still
   important long-term, hence why I think an alternative path with a short
   example for trivial patches is plenty sufficient: from my perspective,
   there's no need to change the rest of the text; it is already good :).

Note that I'm bothering to stick around and provide feedback, despite other
pressing life obligations.  Providing advice on submit-a-patch usability
for QEMU isn't on my schedule, but I don't have the heart to bail on this,
especially when you all are kindly listening, having high quality
discussion, and sincerely trying to improve things.  If you read between
the lines, you see the truth: I am a yak shaver!

Oh man, when I hit a topic like "use git send-email", the hair started
flying: learning new git commands, two-factor auth on gmail, U2F keys,
governance for the no-mans-land of a server, and so on, a real yak-shaving
party.  After an hour or two, my safety triggered, and I thought, "man, I
am spending way too much time perfecting this workflow that I might never
do again" and I spent a few minutes writing a message in gmail.  I
certainly don't expect QEMU devs to fix garbled patches either: that also
seems like a huge waste of valuable time (and for talented and passionate
individuals, too).  There has to be a better way!  So I hope the whitelist
idea helps, or maybe it's enough to just call awareness to this potential
improvement.

Well, that ended up being very long.  I hope this is helpful and doesn't
spend too much of your time.

Thanks for listening!

On Fri, Feb 17, 2017 at 10:34 AM, Eric Blake  wrote:

> On 02/17/2017 03:28 AM, Peter Maydell wrote:
> > On 17 February 2017 at 06:43, Fam Zheng  wrote:
> >> But your point is taken, we should make the first (or a one-shot)
> >> contribution as easy as possible.
> >
> > Yes; we could do with providing a "This page seems very long..."
> > introduction section. The absolute bare minimum requirements
> > for a submitter I think are:
> >  * Provide a Signed-off-by: line (this is a hard requirement
> >because it's how you say "I'm legally OK to contribute this
> >and am happy for it to go into QEMU")
> >  * send patch by email
> >  * read replies and act on them if you want your patch to go in
> >
> > The larger your contribution is, the more important the other
> > requirements detailed on the page are; but personally I'm
> > happy to manually fix up patches from a first-time submitter,
> > and I think most other maintainers are too.
>
> I've updated the wiki to put in that nice bullet list, prior to the
> table of contents.
>
> --
> Eric Blake   eblake redhat com+1-919-301-3266
> Libvirt virtualization library http://libvirt.org
>
>

Re: [Qemu-devel] Fix build break during configuration on musl-libc based Linux systems.

2017-02-17 Thread Peter Maydell

On 17 February 2017 at 16:54, Chad Joan  wrote:
> Regarding the signature: IIRC, setting up certificates on a machine using
> gpg can be quite time consuming and learning-intensive if you've never
> needed to do it before.

There is no requirement for gpg (except for maintainers submitting
pull requests). When we say "We need a signed-off-by" what we mean
is literally "your commit message must include a line of the form
Signed-off-by Some Person ".

I agree that gpg is terrible as a UI and we definitely don't
want it in our patch workflow for submitters ;-)

thanks
-- PMM

Re: [Qemu-devel] Fix build break during configuration on musl-libc based Linux systems.

2017-02-17 Thread Paolo Bonzini



On 17/02/2017 17:54, Chad Joan wrote:
> Regarding the signature: IIRC, setting up certificates on a machine
> using gpg can be quite time consuming and learning-intensive if you've
> never needed to do it before.  Having that example will go a long way to
> help with this.  There is still a possible pain-point: you might write
> one line of git code in the example, and it is easy for you due to your
> workflow, but it could be hours of fiddling for someone who has never
> done it before.  If I'm wrong, show me (the hypothetical reader) how
> easy it is ;)  If I'm right (and that would be unfortunate, in this
> case), then it might be helpful if you politely ask the reader to spend
> time X amount of time on it (establish accurate expectations) and then
> provide a link to the most helpful how-to article you can find on the
> subject.

GPG signing is not needed.  All you need is "git commit -s".

Adding "Signed-off-by: Chad Joan " basically is a
way to tell us "I understand I'm contributing this under the GNU GPL or
a compatible license".  It's not a cryptographic signature.

Paolo

Re: [Qemu-devel] Fix build break during configuration on musl-libc based Linux systems.

2017-02-17 Thread Peter Maydell

On 17 February 2017 at 11:20, Paolo Bonzini  wrote:
>
>
> On 17/02/2017 11:18, Peter Maydell wrote:
>> Defining _XOPEN_SOURCE is easy enough, and I think we should
>> do it unconditionally. We should check what effect this has
>> on the BSD hosts though I guess. (You could argue that we
>> should be defining _XOPEN_SOURCE anyway for the benefit of
>> the non-glibc BSD/Solaris/etc platforms.)
>
> Sounds good, then I think we should define it to 700 just like glibc does.

Unfortunately this idea turns out to break OSX compiles,
because on OSX saying _XOPEN_SOURCE=anything disables
all the non-X/Open APIs (which you get by default, and
some of which like mkdtemp we use).

Looking at the manpage for Solaris
https://www.freebsd.org/cgi/man.cgi?query=standards&apropos=0&sektion=0&manpath=SunOS+5.8&format=html
it seems to also go with "_XOPEN_SOURCE means *only* those
functions", though its mechanism for saying "and the other
stuff too" is different (define __EXTENSION__).

So perhaps we'd better stick with "only define this for
Linux hosts".

thanks
-- PMM

Re: [Qemu-devel] Fix build break during configuration on musl-libc based Linux systems.

2017-02-17 Thread Chad Joan

How wonderful!  Problem solved.  Now I think that just having an example
could kill the misconception forever ;)

On Fri, Feb 17, 2017 at 11:57 AM, Paolo Bonzini  wrote:

>
>
> On 17/02/2017 17:54, Chad Joan wrote:
> > Regarding the signature: IIRC, setting up certificates on a machine
> > using gpg can be quite time consuming and learning-intensive if you've
> > never needed to do it before.  Having that example will go a long way to
> > help with this.  There is still a possible pain-point: you might write
> > one line of git code in the example, and it is easy for you due to your
> > workflow, but it could be hours of fiddling for someone who has never
> > done it before.  If I'm wrong, show me (the hypothetical reader) how
> > easy it is ;)  If I'm right (and that would be unfortunate, in this
> > case), then it might be helpful if you politely ask the reader to spend
> > time X amount of time on it (establish accurate expectations) and then
> > provide a link to the most helpful how-to article you can find on the
> > subject.
>
> GPG signing is not needed.  All you need is "git commit -s".
>
> Adding "Signed-off-by: Chad Joan " basically is a
> way to tell us "I understand I'm contributing this under the GNU GPL or
> a compatible license".  It's not a cryptographic signature.
>
> Paolo
>

Re: [Qemu-devel] Fix build break during configuration on musl-libc based Linux systems.

2017-02-17 Thread Peter Maydell

On 17 February 2017 at 16:54, Chad Joan  wrote:
> so if we can test and
> approve even /some/ of the more popular mail clients (ex: gmail,
> thunderbird, outlook, etc) for use, it would help newbies A LOT.

Pretty sure we've seen mangled emails from all of those.
The problem is that most email clients will automatically
wrap long lines, which is fine for text but breaks an
inline patch. Changing trailing whitespace in patches is
also a common issue.

The documentation in "git help format-patch" has a section
"MUA-SPECIFIC HINTS" which says that gmail's web interface
is definitely no good, and Thunderbird defaults to the wrong
thing and requires a bunch of config changes which you'd then
have to switch back to your preferences for normal mail when
you're done sending patches. Outlook is so hopeless for
patch mail it isn't even listed :-)

The kernel docs have a longer list of mail clients with
notes about suitability:
https://kernel.org/doc/html/latest/process/email-clients.html
but the set of "just works" clients is very small.

thanks
-- PMM

Re: [Qemu-devel] Fix build break during configuration on musl-libc based Linux systems.

2017-02-17 Thread Eric Blake

On 02/17/2017 10:54 AM, Chad Joan wrote:
> Wow, that is some quick turn-around.  Well done!
> 
> My thoughts:
> 
>- I find this summary info very helpful!  On behalf of the N people
>trying to heal paper cuts: thank you!
>- I still recommend an example snippet for shell/bash interaction that
>demonstrates the workflow you expect from a first-time contributor.  It
>should be populated with commonly used values (ex: mailing list
>addresses).  I don't expect this to happen fast like the summary info; I
>suspect someone is going to have to pretend they are submitting a patch,
>write down what they do, and then think about how to present this.

For a quickie patch, I make my edits, then 'git commit -a -s' and 'git
send-email -1' - but that works because I've already set up git hooks to
auto-cc the list, and I already debugged my 'get send-email' setup years
ago.  So yeah, doing it from a completely virgin environment could
benefit from a complete command line that reproduces what I take for
granted in my normal environment.  And some email setups are a lot
friendlier than others (I personally do not use gmail, but will readily
admit that it is probably a lot easier to set up my work emails to use
Red Hat's SMTP servers than it is for a gmail contributor to get their
email setup working in a way that does not mangle patches).

>- Regarding the signature: IIRC, setting up certificates on a machine
>using gpg can be quite time consuming and learning-intensive if you've
>never needed to do it before.

But nothing requires you to set up a certificate to submit a patch.  I'm
not sure which piece of the documentation got you steered in that
direction, but gpg signing of patches is only required of maintainers,
not contributors (or maybe you're hinting at the extra effort required
to set up gmail as a valid 'git send-email' target, to which I have no
experience, but which starts to leave the realm of qemu-specific
instructions into something where it would be better to link to a good
git setup tutorial, if one exists).

>  Having that example will go a long way to
>help with this.

>- I should also mention that I found the rest of the document to be very
>well-written.  It's comprehensiveness became its weakness, but that's still
>important long-term, hence why I think an alternative path with a short
>example for trivial patches is plenty sufficient: from my perspective,
>there's no need to change the rest of the text; it is already good :).

Thanks for that feedback.  It's often hard for a core contributor to
remember what it was like to submit their first patch years ago, and
having a fresh take on the matter from a new contributor is well worth
the reminders.  It's also nice to hear this as a compliment, and not
just a complaint.

> 
> Note that I'm bothering to stick around and provide feedback, despite other
> pressing life obligations.  Providing advice on submit-a-patch usability
> for QEMU isn't on my schedule, but I don't have the heart to bail on this,
> especially when you all are kindly listening, having high quality
> discussion, and sincerely trying to improve things.  If you read between
> the lines, you see the truth: I am a yak shaver!

At the risk of pushing too hard, you could always turn your (good!)
suggestions into concrete wording, or even request a wiki account to
make the changes yourself. But even if that is beyond your planned level
of involvement, I do hope that various readers will be able to act on
the suggestions in this mail to improve things.

> 
> Oh man, when I hit a topic like "use git send-email", the hair started
> flying: learning new git commands, two-factor auth on gmail, U2F keys,
> governance for the no-mans-land of a server, and so on, a real yak-shaving
> party.  After an hour or two, my safety triggered, and I thought, "man, I
> am spending way too much time perfecting this workflow that I might never
> do again" and I spent a few minutes writing a message in gmail.  I
> certainly don't expect QEMU devs to fix garbled patches either: that also
> seems like a huge waste of valuable time (and for talented and passionate
> individuals, too).  There has to be a better way!  So I hope the whitelist
> idea helps, or maybe it's enough to just call awareness to this potential
> improvement.

Again, part of the problem may be that gmail is not really suited to the
ideal patch flow, and so that's going to be a pain point for any such
contributor.  Submitting patches as an attachment is harder than the
inline version you get with 'git send-email', but it is one of those
one-off manual fixups that a maintainer can overlook, as long as it
really is a one-off thing and not a repeated pattern.  And yes, we
should document that use of an attachment is the most likely to avoid
mangling the patch if you don't have 'git send-email' working, even if
it is harder for the maintainer to apply such a patch.

-- 
Eric Blake   e

Re: [Qemu-devel] [PATCH v7 00/17] VT-d: vfio enablement and misc enhances

2017-02-17 Thread Alex Williamson

On Tue,  7 Feb 2017 16:28:02 +0800
Peter Xu  wrote:

> This is v7 of vt-d vfio enablement series.
[snip]
> =
> Test Done
> =
> 
> Build test passed for x86_64/arm/ppc64.
> 
> Simply tested with x86_64, assigning two PCI devices to a single VM,
> boot the VM using:
> 
> bin=x86_64-softmmu/qemu-system-x86_64
> $bin -M q35,accel=kvm,kernel-irqchip=split -m 1G \
>  -device intel-iommu,intremap=on,eim=off,caching-mode=on \
>  -netdev user,id=net0,hostfwd=tcp::-:22 \
>  -device virtio-net-pci,netdev=net0 \
>  -device vfio-pci,host=03:00.0 \
>  -device vfio-pci,host=02:00.0 \
>  -trace events=".trace.vfio" \
>  /var/lib/libvirt/images/vm1.qcow2
> 
> pxdev:bin [vtd-vfio-enablement]# cat .trace.vfio
> vtd_page_walk*
> vtd_replay*
> vtd_inv_desc*
> 
> Then, in the guest, run the following tool:
> 
>   
> https://github.com/xzpeter/clibs/blob/master/gpl/userspace/vfio-bind-group/vfio-bind-group.c
> 
> With parameter:
> 
>   ./vfio-bind-group 00:03.0 00:04.0
> 
> Check host side trace log, I can see pages are replayed and mapped in
> 00:04.0 device address space, like:
> 
> ...
> vtd_replay_ce_valid replay valid context device 00:04.00 hi 0x401 lo 
> 0x38fe1001
> vtd_page_walk Page walk for ce (0x401, 0x38fe1001) iova range 0x0 - 
> 0x80
> vtd_page_walk_level Page walk (base=0x38fe1000, level=3) iova range 0x0 - 
> 0x80
> vtd_page_walk_level Page walk (base=0x35d31000, level=2) iova range 0x0 - 
> 0x4000
> vtd_page_walk_level Page walk (base=0x34979000, level=1) iova range 0x0 - 
> 0x20
> vtd_page_walk_one Page walk detected map level 0x1 iova 0x0 -> gpa 0x22dc3000 
> mask 0xfff perm 3
> vtd_page_walk_one Page walk detected map level 0x1 iova 0x1000 -> gpa 
> 0x22e25000 mask 0xfff perm 3
> vtd_page_walk_one Page walk detected map level 0x1 iova 0x2000 -> gpa 
> 0x22e12000 mask 0xfff perm 3
> vtd_page_walk_one Page walk detected map level 0x1 iova 0x3000 -> gpa 
> 0x22e2d000 mask 0xfff perm 3
> vtd_page_walk_one Page walk detected map level 0x1 iova 0x4000 -> gpa 
> 0x12a49000 mask 0xfff perm 3
> vtd_page_walk_one Page walk detected map level 0x1 iova 0x5000 -> gpa 
> 0x129bb000 mask 0xfff perm 3
> vtd_page_walk_one Page walk detected map level 0x1 iova 0x6000 -> gpa 
> 0x128db000 mask 0xfff perm 3
> vtd_page_walk_one Page walk detected map level 0x1 iova 0x7000 -> gpa 
> 0x12a8 mask 0xfff perm 3
> vtd_page_walk_one Page walk detected map level 0x1 iova 0x8000 -> gpa 
> 0x12a7e000 mask 0xfff perm 3
> vtd_page_walk_one Page walk detected map level 0x1 iova 0x9000 -> gpa 
> 0x12b22000 mask 0xfff perm 3
> vtd_page_walk_one Page walk detected map level 0x1 iova 0xa000 -> gpa 
> 0x12b41000 mask 0xfff perm 3
> ...

Hi Peter,

I'm trying to make use of this, with your vtd-vfio-enablement-v7 branch
(HEAD 0c1c4e738095).  I'm assigning an 82576 PF to a VM.  It works with
iommu=pt, but if I remove that option, the device does not work and
vfio_iommu_map_notify is never called.  Any suggestions?  My
commandline is below.  Thanks,

Alex

/usr/local/bin/qemu-system-x86_64 \
-name guest=l1,debug-threads=on -S \
-machine 
pc-q35-2.9,accel=kvm,usb=off,dump-guest-core=off,kernel-irqchip=split \
-cpu host -m 10240 -realtime mlock=off -smp 
4,sockets=1,cores=2,threads=2 \
-no-user-config -nodefaults -monitor stdio -rtc base=utc,driftfix=slew \
-global kvm-pit.lost_tick_policy=discard -no-hpet -no-shutdown \
-global ICH9-LPC.disable_s3=1 -global ICH9-LPC.disable_s4=1 \
-boot strict=on \
-device ioh3420,port=0x10,chassis=1,id=pci.1,bus=pcie.0,addr=0x2 \
-device i82801b11-bridge,id=pci.2,bus=pcie.0,addr=0x1e \
-device pci-bridge,chassis_nr=3,id=pci.3,bus=pci.2,addr=0x0 \
-device ioh3420,port=0x18,chassis=4,id=pci.4,bus=pcie.0,addr=0x3 \
-device ioh3420,port=0x20,chassis=5,id=pci.5,bus=pcie.0,addr=0x4 \
-device ioh3420,port=0x28,chassis=6,id=pci.6,bus=pcie.0,addr=0x5 \
-device ioh3420,port=0x30,chassis=7,id=pci.7,bus=pcie.0,addr=0x6 \
-device ioh3420,port=0x38,chassis=8,id=pci.8,bus=pcie.0,addr=0x7 \
-device ich9-usb-ehci1,id=usb,bus=pcie.0,addr=0x1d.0x7 \
-device 
ich9-usb-uhci1,masterbus=usb.0,firstport=0,bus=pcie.0,multifunction=on,addr=0x1d
 \
-device 
ich9-usb-uhci2,masterbus=usb.0,firstport=2,bus=pcie.0,addr=0x1d.0x1 \
-device 
ich9-usb-uhci3,masterbus=usb.0,firstport=4,bus=pcie.0,addr=0x1d.0x2 \
-device virtio-serial-pci,id=virtio-serial0,bus=pci.4,addr=0x0 \
-drive 
file=/dev/vg_s20/lv_l1,format=raw,if=none,id=drive-virtio-disk0,cache=none,aio=native
 \
-device 
virtio-blk-pci,scsi=off,bus=pci.5,addr=0x0,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1
 \
-netdev user,id=hostnet0 \
-device 
virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:c2:62:30,bus=pci.1,addr=0x0 
\
-device usb-tablet,id=input0,bus=usb.0,port=1 \
-vnc :0 -vga std \
-devic

Re: [Qemu-devel] [PATCH v12 21/24] target-arm: don't generate WFE/YIELD calls for MTTCG

2017-02-17 Thread Pranith Kumar

On Mon, Feb 13, 2017 at 7:10 AM, Alex Bennée  wrote:
> The WFE and YIELD instructions are really only hints and in TCG's case
> they were useful to move the scheduling on from one vCPU to the next. In
> the parallel context (MTTCG) this just causes an unnecessary cpu_exit
> and contention of the BQL.
>

I think something similar needs to be done for 'pause' on x86.

-- 
Pranith

Re: [Qemu-devel] [PATCH v15 08/25] block: introduce auto-loading bitmaps

2017-02-17 Thread Denis V. Lunev

On 02/17/2017 04:34 PM, Kevin Wolf wrote:
> Am 17.02.2017 um 14:22 hat Denis V. Lunev geschrieben:
>> On 02/17/2017 03:48 PM, Kevin Wolf wrote:
>>> Am 17.02.2017 um 13:40 hat Vladimir Sementsov-Ogievskiy geschrieben:
 17.02.2017 15:09, Kevin Wolf wrote:
> Am 17.02.2017 um 12:46 hat Vladimir Sementsov-Ogievskiy geschrieben:
>> 16.02.2017 14:49, Kevin Wolf wrote:
>>> Am 16.02.2017 um 12:25 hat Kevin Wolf geschrieben:
 Am 15.02.2017 um 11:10 hat Vladimir Sementsov-Ogievskiy geschrieben:
> Auto loading bitmaps are bitmaps stored in the disk image, which 
> should
> be loaded when the image is opened and become BdrvDirtyBitmaps for the
> corresponding drive.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> Reviewed-by: John Snow 
> Reviewed-by: Max Reitz 
 Why do we need a new BlockDriver callback and special code for it in
 bdrv_open_common()? The callback is only ever called immediately after
 .bdrv_open/.bdrv_file_open, so can't the drivers just do this 
 internally
 in their .bdrv_open implementation? Even more so because qcow2 is the
 only driver that supports this callback.
>>> Actually, don't we have to call this in qcow2_invalidate_cache()?
>>> Currently, I think, after a migration, the autoload bitmaps aren't
>>> loaded.
>>>
>>> By moving the qcow2_load_autoloading_dirty_bitmaps() call to
>>> qcow2_open(), this would be fixed.
>>>
>>> Kevin
>> Bitmap should not be reloaded on any intermediate qcow2-open's,
>> reopens, etc. It should be loaded once, on bdrv_open, to not create
>> extra collisions (between in-memory bitmap and it's stored version).
>> That was the idea.
>>
>> For bitmaps migration there are separate series, we shouldn't load
>> bitmap from file on migration, as it's version in the file is
>> outdated.
> That's not what your series is doing, though. It loads the bitmaps when
 Actually, they will not be loaded as they will have IN_USE flag.

> migration starts and doesn't reload then when migration completes, even
> though they are stale. Migration with shared storage would just work
> without an extra series if you did these things in the correct places.
>
> As a reminder, this is how migration with shared storage works (or
> should work with your series):
>
> 1. Start destination qemu instance. This calls bdrv_open() with
>BDRV_O_INACTIVE. We can read in some metadata, though we don't need
>much more than the image size at this point. Writing to the image is
>still impossible.
>
> 2. Start migration on the source, while the VM is still writing to the
>image, rendering the cached metadata from step 1 stale.
>
> 3. Migration completes:
>
> a. Stop the VM
>
> b. Inactivate all images in the source qemu. This is where all
>metadata needs to be written back to the image file, including
>bitmaps. No writes to the image are possible after this point
>because BDRV_O_INACTIVE is set.
>
> c. Invalidate the caches in the destination qemu, i.e. reload
>everything from the file that could have changed since step 1,
>including bitmaps. BDRV_O_INACTIVE is cleared, making the image
>ready for writes.
>
> d. Resume the VM on the destination
>
> 4. Exit the source qemu process, which involves bdrv_close(). Note that
>at this point, no writing to the image file is possible any more,
>it's the destination qemu process that own the image file now.
>
> Your series loads and stores bitmaps in steps 1 and 4. This means that
 Actually - not. in 1 bitmaps are "in use", in 4 INACTIVE is set (and
 it is checked), nothing is stored.

> they are stale on the destination when migration completes, and that
> bdrv_close() wants to write to an image file that it doesn't own any
> more, which will cause an assertion failure. If you instead move things
> to steps 3b and 3c, it will just work.
 Hmm, I understand the idea.. But this will interfere with postcopy
 bitmap migration. So if we really need this, there should be some
 additional control flags or capabilities.. The problem of your
 approach is that bitmap actually migrated in the short state when
 source and destination are stopped, it may take time, as bitmaps may
 be large.
>>> You can always add optimisations, but this is the basic lifecycle
>>> process of block devices in qemu, so it would be good to adhere to it.
>>> So far there are no other pieces of information that are ignored in
>>> bdrv_invalidate()/bdrv_inactivate() and instead only handled in
>>> bdrv_open()/bdrv_close(). It's a matter of consistency, too.
>>>
>>> And not having to add special cases for specific fe

[Qemu-devel] [PATCH] Changing error message of QMP 'migrate_set_downtime' to seconds

2017-02-17 Thread Daniel Henrique Barboza

The previous error message was displaying the values in miliseconds,
being misleading with the command that accepts the value in seconds:

{ "execute": "migrate_set_downtime", "arguments": {"value": 3000}}
{"error": {"class": "GenericError", "desc": "Parameter 'downtime_limit'
expects an integer in the range of 0 to 200 milliseconds"}}

This patch changes it to '2000 seconds' to keep consistency with
the expected parameter.

Signed-off-by: Daniel Henrique Barboza 
---
 migration/migration.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index c6ae69d..2dc63b1 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -49,6 +49,9 @@
  * for sending the last part */
 #define DEFAULT_MIGRATE_SET_DOWNTIME 300
 
+/* Maximum migrate downtime set to 2000*1000 miliseconds */
+#define MAX_MIGRATE_SET_DOWNTIME (2000 * 1000)
+
 /* Default compression thread count */
 #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
 /* Default decompression thread count, usually decompression is at
@@ -843,10 +846,11 @@ void qmp_migrate_set_parameters(MigrationParameters 
*params, Error **errp)
 return;
 }
 if (params->has_downtime_limit &&
-(params->downtime_limit < 0 || params->downtime_limit > 200)) {
+(params->downtime_limit < 0 ||
+ params->downtime_limit > MAX_MIGRATE_SET_DOWNTIME)) {
 error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
"downtime_limit",
-   "an integer in the range of 0 to 200 milliseconds");
+   "an integer in the range of 0 to 2000 seconds");
 return;
 }
 if (params->has_x_checkpoint_delay && (params->x_checkpoint_delay < 0)) {
-- 
2.9.3

Re: [Qemu-devel] [PATCH] Changing error message of QMP 'migrate_set_downtime' to seconds

2017-02-17 Thread Paolo Bonzini



On 17/02/2017 18:26, Daniel Henrique Barboza wrote:
> The previous error message was displaying the values in miliseconds,
> being misleading with the command that accepts the value in seconds:
> 
> { "execute": "migrate_set_downtime", "arguments": {"value": 3000}}
> {"error": {"class": "GenericError", "desc": "Parameter 'downtime_limit'
> expects an integer in the range of 0 to 200 milliseconds"}}
> 
> This patch changes it to '2000 seconds' to keep consistency with
> the expected parameter.
> 
> Signed-off-by: Daniel Henrique Barboza 
> ---
>  migration/migration.c | 8 ++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/migration/migration.c b/migration/migration.c
> index c6ae69d..2dc63b1 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -49,6 +49,9 @@
>   * for sending the last part */
>  #define DEFAULT_MIGRATE_SET_DOWNTIME 300
>  
> +/* Maximum migrate downtime set to 2000*1000 miliseconds */
> +#define MAX_MIGRATE_SET_DOWNTIME (2000 * 1000)
> +
>  /* Default compression thread count */
>  #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
>  /* Default decompression thread count, usually decompression is at
> @@ -843,10 +846,11 @@ void qmp_migrate_set_parameters(MigrationParameters 
> *params, Error **errp)
>  return;
>  }
>  if (params->has_downtime_limit &&
> -(params->downtime_limit < 0 || params->downtime_limit > 200)) {
> +(params->downtime_limit < 0 ||
> + params->downtime_limit > MAX_MIGRATE_SET_DOWNTIME)) {
>  error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
> "downtime_limit",
> -   "an integer in the range of 0 to 200 milliseconds");
> +   "an integer in the range of 0 to 2000 seconds");

Perhaps you could use %d and set  MAX_MIGRATE_SET_DOWNTIME to 2000?
Though perhaps the migration maintainers are okay with the patch as is.

Paolo

>  return;
>  }
>  if (params->has_x_checkpoint_delay && (params->x_checkpoint_delay < 0)) {
>

Re: [Qemu-devel] Fix build break during configuration on musl-libc based Linux systems.

2017-02-17 Thread John Snow

On 02/17/2017 10:34 AM, Eric Blake wrote:
> On 02/17/2017 03:28 AM, Peter Maydell wrote:
>> On 17 February 2017 at 06:43, Fam Zheng  wrote:
>>> But your point is taken, we should make the first (or a one-shot)
>>> contribution as easy as possible.
>>
>> Yes; we could do with providing a "This page seems very long..."
>> introduction section. The absolute bare minimum requirements
>> for a submitter I think are:
>>  * Provide a Signed-off-by: line (this is a hard requirement
>>because it's how you say "I'm legally OK to contribute this
>>and am happy for it to go into QEMU")
>>  * send patch by email
>>  * read replies and act on them if you want your patch to go in
>>
>> The larger your contribution is, the more important the other
>> requirements detailed on the page are; but personally I'm
>> happy to manually fix up patches from a first-time submitter,
>> and I think most other maintainers are too.
> 
> I've updated the wiki to put in that nice bullet list, prior to the
> table of contents.
> 

Is there a way we can make the quickstart exist in a pop-box?

Also, any way to wrap the text on the rest of the introductory article?
If you fullscreen this on a widescreen monitor you have to chase the
letters to the far corners of the earth.

Formatting the article more nicely might help its readability. It's also
worth noting that just /reading the outline/ gives you enough advice,
reading the details are actually not strictly necessary.

--js

Re: [Qemu-devel] [PATCH] Add missing fp_access_check() to aarch64 crypto instructions

2017-02-17 Thread Peter Maydell

On 17 February 2017 at 17:58, Nick Reilly  wrote:
> The aarch64 crypto instructions for AES and SHA are missing the
> check for if the FPU is enabled.
>
> Signed-off-by: Nick Reilly 

Oops, nice catch. Applied to target-arm.next, thanks.

-- PMM

Re: [Qemu-devel] [PATCH v8 4/8] ACPI: Add Virtual Machine Generation ID support

2017-02-17 Thread Ben Warren


> On Feb 17, 2017, at 8:03 AM, Laszlo Ersek  wrote:
> 
> On 02/17/17 16:33, Ben Warren wrote:
>> 
>>> On Feb 17, 2017, at 2:43 AM, Igor Mammedov >> 
>>> >> wrote:
>>> 
>>> On Thu, 16 Feb 2017 15:15:36 -0800
>>> b...@skyportsystems.com  
>>> > wrote:
>>> 
 From: Ben Warren mailto:b...@skyportsystems.com> 
 >>
 
 This implements the VM Generation ID feature by passing a 128-bit
 GUID to the guest via a fw_cfg blob.
 Any time the GUID changes, an ACPI notify event is sent to the guest
 
 The user interface is a simple device with one parameter:
 - guid (string, must be "auto" or in UUID format
  ----)
>>> I've given it some testing with WS2012R2 and v4 patches for Seabios,
>>> 
>>> Windows is able to read initial GUID allocation and writeback
>>> seems to work somehow:
>>> 
>>> (qemu) info vm-generation-id
>>> c109c09b-0e8b-42d5-9b33-8409c9dcd16c
>>> 
>>> vmgenid client in Windows reads it as 2 following 64bit integers:
>>> 42d50e8bc109c09b:6cd1dcc90984339b
>>> 
>>> However update path/restore from snapshot doesn't
>>> here is as I've tested it:
>>> 
>>> qemu-system-x86_64 -device vmgenid,id=testvgid,guid=auto -monitor stdio
>>> (qemu) info vm-generation-id
>>> c109c09b-0e8b-42d5-9b33-8409c9dcd16c
>>> (qemu) stop
>>> (qemu) migrate "exec:gzip -c > STATEFILE.gz"
>>> (qemu) quit
>>> 
>>> qemu-system-x86_64 -device vmgenid,id=testvgid,guid=auto -monitor stdio
>>> -incoming "exec: gzip -c -d STATEFILE.gz"
>>> (qemu) info vm-generation-id
>>> 28b587fa-991b-4267-80d7-9cf28b746fe9
>>> 
>>> guest
>>> 1. doesn't get GPE notification that it must receive
>>> 2. vmgenid client in Windows reads the same value
>>> 42d50e8bc109c09b:6cd1dcc90984339b
>>> 
>> Strange, this was working for me, but with a slightly different test method:
>> 
>>  * I use virsh save/restore
> 
> Awesome, this actually what I should try. All my guests are managed by
> libvirt (with the occasional , for development), and direct
> QEMU monitor commands such as
> 
>  virsh qemu-monitor-command ovmf.rhel7 --hmp 'info vm-generation-id'
> 
> only work for me if they are reasonably non-intrusive.
> 
>>  * While I do later testing with Windows, during development I use a
>>Linux kernel module I wrote that keeps track of GUID and
>>notifications.  I’m happy to share this with you if interested.
> 
> Please do. If you have a public git repo somewhere, that would be
> awesome. (Bonus points if the module builds out-of-tree, if the
> kernel-devel package is installed.)
> 
Here you go:
https://github.com/ben-skyportsystems/vmgenid-test 


I don’t know if something like this would ever be accepted into the Linux 
kernel, but it has been invaluable to me, and I’d like to see it somewhere 
better.

> NB: while the set-id monitor command was part of the series, I did test
> it to the extent that I checked the SCI ("ACPI interrupt") count in the
> guest, in /proc/interrupts. I did see it increase, so minimally the SCI
> injection was fine.
> 
> Thanks!
> Laszlo
> 
>> I’ll dig into this morning.
>> 
>> —Ben
>> 



smime.p7s
Description: S/MIME cryptographic signature

[Qemu-devel] [RFC 0/3] generalize parsing of cpu_model

2017-02-17 Thread Igor Mammedov


Some callers call CPUClass->parse_features manually to convert
'-cpu cpufoo,featurestr' string to cpu type and featurestr
into a set of global properties. And theni do controlled
cpu creation with setting properties and completing it with realize.
That's a lot of code duplication as they are practically
reimplement the same parsing logic.

Some don't and use cpu_generic_init() instead which does
the same parsing along with creation/realizing cpu within one
wrapper.

And some trying to switch to controlled cpu creation,
implement object_new()/set properties/realize steps
but forget feature parsing logic witch lieads to 'bugs'
commit 00909b585 (hw/arm/integratorcp: Support specifying features via -cpu)


This series moves -cpu option parsing to generic machine code
that removes a little of code duplication and makes cpus creation
process more unified.

PS:
As I don't have time to rewrite this part QEMU, being busy
rewritting yet another part of QEMU,
SERIES IS UNFINISHED AND SERVERS TO SHOW IDEA HOW IT SHOULD
BE DONE. FEEL FREE TO PICK UP AND COMPLETE THIS PATCHES
TO HANDLE ALL BOARDS (series does it only for virt-arm/spapr/pc)

It compiles and pc machine even starts but otherwise is untested.

CC: Eduardo Habkost 
CC: patc...@linaro.org
CC: Peter Maydell 
CC: Marcel Apfelbaum 
CC: Paolo Bonzini 
CC: David Gibson 
CC: qemu-...@nongnu.org
CC: qemu-...@nongnu.org
CC: "Michael S. Tsirkin" 

Igor Mammedov (3):
  machine: call machine init from wrapper
  machine: generalize handling of default cpu_model
  machine: generilize cpu_model parsing

 include/hw/boards.h  |  5 +
 include/hw/ppc/ppc.h |  2 --
 hw/arm/virt.c| 46 ++-
 hw/core/machine.c| 55 
 hw/i386/pc.c | 42 +--
 hw/ppc/ppc.c | 25 
 hw/ppc/spapr.c   | 14 +++--
 vl.c |  2 +-
 8 files changed, 92 insertions(+), 99 deletions(-)

-- 
2.7.4

[Qemu-devel] [RFC 3/3] machine: generilize cpu_model parsing

2017-02-17 Thread Igor Mammedov

Parse cpu_model string into cpu_type and
[=-]foo features in common machine code
instead of doing the same on every board.

TODO:
patch handles only virt-arm/spapr/pc boards,
but to avoid bisection breakage it should take
care of all boards.

Signed-off-by: Igor Mammedov 
---
 include/hw/boards.h  |  3 +++
 include/hw/ppc/ppc.h |  2 --
 hw/arm/virt.c| 36 
 hw/core/machine.c| 44 +++-
 hw/i386/pc.c | 24 +++-
 hw/ppc/ppc.c | 25 -
 hw/ppc/spapr.c   |  3 +--
 7 files changed, 54 insertions(+), 83 deletions(-)

diff --git a/include/hw/boards.h b/include/hw/boards.h
index 9f2dbfd..3374a49 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -136,12 +136,14 @@ struct MachineClass {
 bool rom_file_has_mr;
 int minimum_page_bits;
 bool has_hotpluggable_cpus;
+const char *base_cpu_type;
 
 HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
DeviceState *dev);
 unsigned (*cpu_index_to_socket_id)(unsigned cpu_index);
 const CPUArchIdList *(*possible_cpu_arch_ids)(MachineState *machine);
 const char *(*default_cpu_model)(MachineState *machine);
+bool (*cpu_model_valid)(MachineState *machine, const char *cpu_model);
 };
 
 /**
@@ -182,6 +184,7 @@ struct MachineState {
 char *kernel_cmdline;
 char *initrd_filename;
 const char *cpu_model;
+const char *cpu_typename;
 AccelState *accelerator;
 CPUArchIdList *possible_cpus;
 };
diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h
index 4e7fe11..ff0ac30 100644
--- a/include/hw/ppc/ppc.h
+++ b/include/hw/ppc/ppc.h
@@ -105,6 +105,4 @@ enum {
 
 /* ppc_booke.c */
 void ppc_booke_timers_init(PowerPCCPU *cpu, uint32_t freq, uint32_t flags);
-
-void ppc_cpu_parse_features(const char *cpu_model);
 #endif
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 8380540..d767200 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -169,7 +169,7 @@ static const char *valid_cpus[] = {
 "host",
 };
 
-static bool cpuname_valid(const char *cpu)
+static bool cpuname_valid(MachineState *machine, const char *cpu)
 {
 int i;
 
@@ -1244,12 +1244,6 @@ static void machvirt_init(MachineState *machine)
 MemoryRegion *secure_sysmem = NULL;
 int n, virt_max_cpus;
 MemoryRegion *ram = g_new(MemoryRegion, 1);
-const char *cpu_model = machine->cpu_model;
-char **cpustr;
-ObjectClass *oc;
-const char *typename;
-CPUClass *cc;
-Error *err = NULL;
 bool firmware_loaded = bios_name || drive_get(IF_PFLASH, 0, 0);
 
 /* We can probe only here because during property set
@@ -1268,14 +1262,6 @@ static void machvirt_init(MachineState *machine)
 }
 }
 
-/* Separate the actual CPU model name from any appended features */
-cpustr = g_strsplit(cpu_model, ",", 2);
-
-if (!cpuname_valid(cpustr[0])) {
-error_report("mach-virt: CPU %s not supported", cpustr[0]);
-exit(1);
-}
-
 /* If we have an EL3 boot ROM then the assumption is that it will
  * implement PSCI itself, so disable QEMU's internal implementation
  * so it doesn't get in the way. Instead of starting secondary
@@ -1342,22 +1328,6 @@ static void machvirt_init(MachineState *machine)
 
 create_fdt(vms);
 
-oc = cpu_class_by_name(TYPE_ARM_CPU, cpustr[0]);
-if (!oc) {
-error_report("Unable to find CPU definition");
-exit(1);
-}
-typename = object_class_get_name(oc);
-
-/* convert -smp CPU options specified by the user into global props */
-cc = CPU_CLASS(oc);
-cc->parse_features(typename, cpustr[1], &err);
-g_strfreev(cpustr);
-if (err) {
-error_report_err(err);
-exit(1);
-}
-
 mc->possible_cpu_arch_ids(machine);
 for (n = 0; n < machine->possible_cpus->len; n++) {
 Object *cpuobj;
@@ -1367,7 +1337,7 @@ static void machvirt_init(MachineState *machine)
 break;
 }
 
-cpuobj = object_new(typename);
+cpuobj = object_new(machine->cpu_typename);
 object_property_set_int(cpuobj, 
machine->possible_cpus->cpus[n].arch_id,
 "mp-affinity", NULL);
 
@@ -1583,6 +1553,8 @@ static void virt_machine_class_init(ObjectClass *oc, void 
*data)
 mc->minimum_page_bits = 12;
 mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids;
 mc->default_cpu_model = virt_default_cpu_model;
+mc->cpu_model_valid = cpuname_valid;
+mc->base_cpu_type = TYPE_ARM_CPU;
 }
 
 static const TypeInfo virt_machine_info = {
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 2a954f0..42923b1 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -575,8 +575,12 @@ bool machine_mem_merge(MachineState *machine)
 return machine->mem_merge;
 }
 
-void machine_run_board_init(MachineState *machine)
+static void machine_parse_cpu_model(MachineState *m

[Qemu-devel] [RFC 2/3] machine: generalize handling of default cpu_model

2017-02-17 Thread Igor Mammedov

currently all boards have opencoded default cpu_model
selection. I most cases it's just a string and in others
it's a 'function'. Add to machine callback
that returns default cpu_model and make boards return
it as const string.

That allows to move detection of non specified cpu_model
i.e. missing CLI '-cpu' option and move detection
to generic machine code. And would allow to generalize
parsing cpu features in follow up patch.

TODO:
 complete conversion for all boards

Signed-off-by: Igor Mammedov 
---
 include/hw/boards.h |  1 +
 hw/arm/virt.c   | 10 ++
 hw/core/machine.c   |  7 +++
 hw/i386/pc.c| 18 ++
 hw/ppc/spapr.c  | 11 +++
 5 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/include/hw/boards.h b/include/hw/boards.h
index 04f5352..9f2dbfd 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -141,6 +141,7 @@ struct MachineClass {
DeviceState *dev);
 unsigned (*cpu_index_to_socket_id)(unsigned cpu_index);
 const CPUArchIdList *(*possible_cpu_arch_ids)(MachineState *machine);
+const char *(*default_cpu_model)(MachineState *machine);
 };
 
 /**
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index a98cb91..8380540 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1252,10 +1252,6 @@ static void machvirt_init(MachineState *machine)
 Error *err = NULL;
 bool firmware_loaded = bios_name || drive_get(IF_PFLASH, 0, 0);
 
-if (!cpu_model) {
-cpu_model = "cortex-a15";
-}
-
 /* We can probe only here because during property set
  * KVM is not available yet
  */
@@ -1564,6 +1560,11 @@ static const CPUArchIdList 
*virt_possible_cpu_arch_ids(MachineState *ms)
 return ms->possible_cpus;
 }
 
+static const char *virt_default_cpu_model(MachineState *machine)
+{
+return "cortex-a15";
+}
+
 static void virt_machine_class_init(ObjectClass *oc, void *data)
 {
 MachineClass *mc = MACHINE_CLASS(oc);
@@ -1581,6 +1582,7 @@ static void virt_machine_class_init(ObjectClass *oc, void 
*data)
 /* We know we will never create a pre-ARMv7 CPU which needs 1K pages */
 mc->minimum_page_bits = 12;
 mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids;
+mc->default_cpu_model = virt_default_cpu_model;
 }
 
 static const TypeInfo virt_machine_info = {
diff --git a/hw/core/machine.c b/hw/core/machine.c
index fe82529..2a954f0 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -578,6 +578,13 @@ bool machine_mem_merge(MachineState *machine)
 void machine_run_board_init(MachineState *machine)
 {
 MachineClass *machine_class = MACHINE_GET_CLASS(machine);
+
+/* Force all boards to provide default_cpu_model callback */
+assert(machine_class->default_cpu_model);
+if (machine->cpu_model == NULL) {
+machine->cpu_model = machine_class->default_cpu_model(machine);
+}
+
 machine_class->init(machine);
 }
 
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index a8660d4..0073469 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1149,14 +1149,6 @@ void pc_cpus_init(PCMachineState *pcms)
 MachineClass *mc = MACHINE_GET_CLASS(pcms);
 
 /* init CPUs */
-if (machine->cpu_model == NULL) {
-#ifdef TARGET_X86_64
-machine->cpu_model = "qemu64";
-#else
-machine->cpu_model = "qemu32";
-#endif
-}
-
 model_pieces = g_strsplit(machine->cpu_model, ",", 2);
 if (!model_pieces[0]) {
 error_report("Invalid/empty CPU model name");
@@ -2296,6 +2288,15 @@ static void x86_nmi(NMIState *n, int cpu_index, Error 
**errp)
 }
 }
 
+static const char *pc_default_cpu_model(MachineState *machine)
+{
+#ifdef TARGET_X86_64
+return "qemu64";
+#else
+return "qemu32";
+#endif
+}
+
 static void pc_machine_class_init(ObjectClass *oc, void *data)
 {
 MachineClass *mc = MACHINE_CLASS(oc);
@@ -2324,6 +2325,7 @@ static void pc_machine_class_init(ObjectClass *oc, void 
*data)
 mc->default_boot_order = "cad";
 mc->hot_add_cpu = pc_hot_add_cpu;
 mc->max_cpus = 255;
+mc->default_cpu_model = pc_default_cpu_model;
 mc->reset = pc_machine_reset;
 hc->pre_plug = pc_machine_device_pre_plug_cb;
 hc->plug = pc_machine_device_plug_cb;
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 6f37288..8f30765 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1920,10 +1920,6 @@ static void ppc_spapr_init(MachineState *machine)
 }
 
 /* init CPUs */
-if (machine->cpu_model == NULL) {
-machine->cpu_model = kvm_enabled() ? "host" : smc->tcg_default_cpu;
-}
-
 ppc_cpu_parse_features(machine->cpu_model);
 
 spapr_init_cpus(spapr);
@@ -2861,6 +2857,12 @@ static void spapr_phb_placement(sPAPRMachineState 
*spapr, uint32_t index,
 *mmio64 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM64_WIN_SIZE;
 }
 
+static const char *spapr_default_cpu_model(MachineState *machine)
+{
+sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
+return machine->cpu_model = kv

Re: [Qemu-devel] [PATCH v8 4/8] ACPI: Add Virtual Machine Generation ID support

2017-02-17 Thread Michael S. Tsirkin

On Fri, Feb 17, 2017 at 10:34:29AM -0800, Ben Warren wrote:
> 
> On Feb 17, 2017, at 8:03 AM, Laszlo Ersek  wrote:
> 
> On 02/17/17 16:33, Ben Warren wrote:
> 
> 
> 
> On Feb 17, 2017, at 2:43 AM, Igor Mammedov  > wrote:
> 
> On Thu, 16 Feb 2017 15:15:36 -0800
> b...@skyportsystems.com  wrote:
> 
> 
> From: Ben Warren  mailto:b...@skyportsystems.com>>
> 
> This implements the VM Generation ID feature by passing a
> 128-bit
> GUID to the guest via a fw_cfg blob.
> Any time the GUID changes, an ACPI notify event is sent to the
> guest
> 
> The user interface is a simple device with one parameter:
> - guid (string, must be "auto" or in UUID format
>  ----)
> 
> I've given it some testing with WS2012R2 and v4 patches for
> Seabios,
> 
> Windows is able to read initial GUID allocation and writeback
> seems to work somehow:
> 
> (qemu) info vm-generation-id
> c109c09b-0e8b-42d5-9b33-8409c9dcd16c
> 
> vmgenid client in Windows reads it as 2 following 64bit integers:
> 42d50e8bc109c09b:6cd1dcc90984339b
> 
> However update path/restore from snapshot doesn't
> here is as I've tested it:
> 
> qemu-system-x86_64 -device vmgenid,id=testvgid,guid=auto -monitor
> stdio
> (qemu) info vm-generation-id
> c109c09b-0e8b-42d5-9b33-8409c9dcd16c
> (qemu) stop
> (qemu) migrate "exec:gzip -c > STATEFILE.gz"
> (qemu) quit
> 
> qemu-system-x86_64 -device vmgenid,id=testvgid,guid=auto -monitor
> stdio
> -incoming "exec: gzip -c -d STATEFILE.gz"
> (qemu) info vm-generation-id
> 28b587fa-991b-4267-80d7-9cf28b746fe9
> 
> guest
> 1. doesn't get GPE notification that it must receive
> 2. vmgenid client in Windows reads the same value
> 42d50e8bc109c09b:6cd1dcc90984339b
> 
> 
> Strange, this was working for me, but with a slightly different test
> method:
> 
>  * I use virsh save/restore
> 
> 
> Awesome, this actually what I should try. All my guests are managed by
> libvirt (with the occasional , for development), and direct
> QEMU monitor commands such as
> 
>  virsh qemu-monitor-command ovmf.rhel7 --hmp 'info vm-generation-id'
> 
> only work for me if they are reasonably non-intrusive.
> 
> 
>  * While I do later testing with Windows, during development I use a
>Linux kernel module I wrote that keeps track of GUID and
>notifications.  I’m happy to share this with you if interested.
> 
> 
> Please do. If you have a public git repo somewhere, that would be
> awesome. (Bonus points if the module builds out-of-tree, if the
> kernel-devel package is installed.)
> 
> 
> Here you go:
> https://github.com/ben-skyportsystems/vmgenid-test
> 
> I don’t know if something like this would ever be accepted into the Linux
> kernel, but it has been invaluable to me, and I’d like to see it somewhere
> better.

I think the main issue here is that there's no blocking
interface to wait for change events.



Also ioremap_nocache is definitely the wrong thing to do
since the spec says
It must not be in the same 4-kilobyte page as any memory that is
expected to be mapped by a page table entry with caching disabled.

Finally, I think it makes sense to add an mmap call to this driver.
Basically add some kind of interface telling guest that gen id does not
share a 4K page with any other structure. Maybe just a special HID
value?  Or a special method we can test for.  Then it's safe for guest
to map this page read-only into userspace memory. It should have an
interface to report the offset to userspace. Userspace can then get the
ID without a system call by doing

ptr = mmap(...)
offset = ioctl(... GET_OFFSET ...)
guid = *(ptr + offset)

Windows does not seem to have this ability but it might be
a significant performance enhancement IMHO.

> 
> NB: while the set-id monitor command was part of the series, I did test
> it to the extent that I checked the SCI ("ACPI interrupt") count in the
> guest, in /proc/interrupts. I did see it increase, so minimally the SCI
> injection was fine.
> 
> Thanks!
> Laszlo
> 
> 
> I’ll dig into this morning.
> 
> —Ben
> 
> 
>

[Qemu-devel] [RFC 1/3] machine: call machine init from wrapper

2017-02-17 Thread Igor Mammedov

add machine_run_board_init() wrapper that calls
machine init for now but in follow up patches
it will be used to run generic code that should run
before machine init.

Signed-off-by: Igor Mammedov 
---
 include/hw/boards.h | 1 +
 hw/core/machine.c   | 6 ++
 vl.c| 2 +-
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/hw/boards.h b/include/hw/boards.h
index 269d0ba..04f5352 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -32,6 +32,7 @@ void memory_region_allocate_system_memory(MemoryRegion *mr, 
Object *owner,
 MachineClass *find_default_machine(void);
 extern MachineState *current_machine;
 
+void machine_run_board_init(MachineState *machine);
 bool machine_usb(MachineState *machine);
 bool machine_kernel_irqchip_allowed(MachineState *machine);
 bool machine_kernel_irqchip_required(MachineState *machine);
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 0699750..fe82529 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -575,6 +575,12 @@ bool machine_mem_merge(MachineState *machine)
 return machine->mem_merge;
 }
 
+void machine_run_board_init(MachineState *machine)
+{
+MachineClass *machine_class = MACHINE_GET_CLASS(machine);
+machine_class->init(machine);
+}
+
 static void machine_class_finalize(ObjectClass *klass, void *data)
 {
 MachineClass *mc = MACHINE_CLASS(klass);
diff --git a/vl.c b/vl.c
index 93406ba..9af4462 100644
--- a/vl.c
+++ b/vl.c
@@ -4484,7 +4484,7 @@ int main(int argc, char **argv, char **envp)
 current_machine->boot_order = boot_order;
 current_machine->cpu_model = cpu_model;
 
-machine_class->init(current_machine);
+machine_run_board_init(current_machine);
 
 realtime_init();
 
-- 
2.7.4

Re: [Qemu-devel] [PATCH] Changing error message of QMP 'migrate_set_downtime' to seconds

2017-02-17 Thread Daniel Henrique Barboza




On 02/17/2017 03:37 PM, Paolo Bonzini wrote:


On 17/02/2017 18:26, Daniel Henrique Barboza wrote:

The previous error message was displaying the values in miliseconds,
being misleading with the command that accepts the value in seconds:

{ "execute": "migrate_set_downtime", "arguments": {"value": 3000}}
{"error": {"class": "GenericError", "desc": "Parameter 'downtime_limit'
expects an integer in the range of 0 to 200 milliseconds"}}

This patch changes it to '2000 seconds' to keep consistency with
the expected parameter.

Signed-off-by: Daniel Henrique Barboza 
---
  migration/migration.c | 8 ++--
  1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index c6ae69d..2dc63b1 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -49,6 +49,9 @@
   * for sending the last part */
  #define DEFAULT_MIGRATE_SET_DOWNTIME 300
  
+/* Maximum migrate downtime set to 2000*1000 miliseconds */

+#define MAX_MIGRATE_SET_DOWNTIME (2000 * 1000)
+
  /* Default compression thread count */
  #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
  /* Default decompression thread count, usually decompression is at
@@ -843,10 +846,11 @@ void qmp_migrate_set_parameters(MigrationParameters 
*params, Error **errp)
  return;
  }
  if (params->has_downtime_limit &&
-(params->downtime_limit < 0 || params->downtime_limit > 200)) {
+(params->downtime_limit < 0 ||
+ params->downtime_limit > MAX_MIGRATE_SET_DOWNTIME)) {
  error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
 "downtime_limit",
-   "an integer in the range of 0 to 200 milliseconds");
+   "an integer in the range of 0 to 2000 seconds");

Perhaps you could use %d and set  MAX_MIGRATE_SET_DOWNTIME to 2000?
Though perhaps the migration maintainers are okay with the patch as is.


I did that at first but I got errors on "error_setg" about the extra 
parameter.
I even considered using sprintf to format the string but I was afraid it 
would be

a little overkill.


Daniel



Paolo


  return;
  }
  if (params->has_x_checkpoint_delay && (params->x_checkpoint_delay < 0)) {

[Qemu-devel] [PATCH v2] Makefile: Put VERSION info into version.texi rather than using -D

2017-02-17 Thread Peter Maydell

Unfortunately some older versions of makeinfo don't correctly
handle the -D command line option and fail to set the variable.
This then causes them to complain
 docs/qemu-ga-ref.texi:41: warning: undefined flag: VERSION

Work around this by doing as the autotools do, and writing
the information into a version.texi file which we then
include from the .texi files that need it.

Signed-off-by: Peter Maydell 
Reviewed-by: Eric Blake 
---
Changes v1->v2: add .gitignore entry
---
 Makefile   | 17 ++---
 .gitignore |  1 +
 docs/qemu-ga-ref.texi  |  2 ++
 docs/qemu-qmp-ref.texi |  2 ++
 4 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/Makefile b/Makefile
index 830fa5a..1c4c04f 100644
--- a/Makefile
+++ b/Makefile
@@ -516,7 +516,7 @@ distclean: clean
rm -f qemu-doc.vr qemu-doc.txt
rm -f config.log
rm -f linux-headers/asm
-   rm -f qemu-ga-qapi.texi qemu-qapi.texi
+   rm -f qemu-ga-qapi.texi qemu-qapi.texi version.texi
rm -f docs/qemu-qmp-ref.7 docs/qemu-ga-ref.7
rm -f docs/qemu-qmp-ref.txt docs/qemu-ga-ref.txt
rm -f docs/qemu-qmp-ref.pdf docs/qemu-ga-ref.pdf
@@ -663,21 +663,24 @@ ui/console-gl.o: $(SRC_PATH)/ui/console-gl.c \
 
 # documentation
 MAKEINFO=makeinfo
-MAKEINFOFLAGS=--no-split --number-sections -D 'VERSION $(VERSION)'
-TEXIFLAG=$(if $(V),,--quiet) --command='@set VERSION $(VERSION)'
+MAKEINFOFLAGS=--no-split --number-sections
+TEXIFLAG=$(if $(V),,--quiet)
 
-%.html: %.texi
+version.texi: $(SRC_PATH)/VERSION
+   $(call quiet-command,echo "@set VERSION $(VERSION)" > $@,"GEN","$@")
+
+%.html: %.texi version.texi
$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers 
\
--html $< -o $@,"GEN","$@")
 
-%.info: %.texi
+%.info: %.texi version.texi
$(call quiet-command,$(MAKEINFO) $(MAKEINFOFLAGS) $< -o $@,"GEN","$@")
 
-%.txt: %.texi
+%.txt: %.texi version.texi
$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers 
\
--plaintext $< -o $@,"GEN","$@")
 
-%.pdf: %.texi
+%.pdf: %.texi version.texi
$(call quiet-command,texi2pdf $(TEXIFLAG) -I $(SRC_PATH) -I . $< -o 
$@,"GEN","$@")
 
 qemu-options.texi: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool
diff --git a/.gitignore b/.gitignore
index c563dc1..2849d75 100644
--- a/.gitignore
+++ b/.gitignore
@@ -107,6 +107,7 @@ docs/qemu-ga-ref.info*
 docs/qemu-qmp-ref.info*
 /qemu-ga-qapi.texi
 /qemu-qapi.texi
+/version.texi
 *.tps
 .stgit-*
 cscope.*
diff --git a/docs/qemu-ga-ref.texi b/docs/qemu-ga-ref.texi
index 87cc8d0..ddb76ce 100644
--- a/docs/qemu-ga-ref.texi
+++ b/docs/qemu-ga-ref.texi
@@ -1,6 +1,8 @@
 \input texinfo
 @setfilename qemu-ga-ref.info
 
+@include version.texi
+
 @exampleindent 0
 @paragraphindent 0
 
diff --git a/docs/qemu-qmp-ref.texi b/docs/qemu-qmp-ref.texi
index 818e525..0a00569 100644
--- a/docs/qemu-qmp-ref.texi
+++ b/docs/qemu-qmp-ref.texi
@@ -1,6 +1,8 @@
 \input texinfo
 @setfilename qemu-qmp-ref.info
 
+@include version.texi
+
 @exampleindent 0
 @paragraphindent 0
 
-- 
2.7.4

Re: [Qemu-devel] [RFC 0/3] generalize parsing of cpu_model

2017-02-17 Thread Peter Maydell

On 17 February 2017 at 18:56, Igor Mammedov  wrote:
> Some callers call CPUClass->parse_features manually to convert
> '-cpu cpufoo,featurestr' string to cpu type and featurestr
> into a set of global properties. And theni do controlled
> cpu creation with setting properties and completing it with realize.
> That's a lot of code duplication as they are practically
> reimplement the same parsing logic.
>
> Some don't and use cpu_generic_init() instead which does
> the same parsing along with creation/realizing cpu within one
> wrapper.
>
> And some trying to switch to controlled cpu creation,
> implement object_new()/set properties/realize steps
> but forget feature parsing logic witch lieads to 'bugs'
> commit 00909b585 (hw/arm/integratorcp: Support specifying features via -cpu)
>
>
> This series moves -cpu option parsing to generic machine code
> that removes a little of code duplication and makes cpus creation
> process more unified.

This API seems a little awkward for the SoC case, where
the board model doesn't actually know what the default
CPU model or the valid CPU models are, because it just
wants to say "create me a BCM2836 SoC" and let the SoC
object deal with determining whether it's always a cortex-a15
or if it might allow some user configurability either in
cpu choices or in optional flags.
Any thoughts about that use case?

(The stm32f205 SoC object has a "cpu-model" QOM property
that the board sets, but I think that's as much because
we somewhat awkwardly need to pass it into armv7m_init()
as a deliberate design choice.)

thanks
-- PMM

Re: [Qemu-devel] [PATCH 0/4] cpu: Implement cpu_generic_new()

2017-02-17 Thread Igor Mammedov

On Mon, 13 Feb 2017 14:28:15 +
Peter Maydell  wrote:

> This patchset adds a new function cpu_generic_new()
> which is similar to cpu_generic_init() except that it
> does not realize the created CPU object. This means that
> board code can do a "new cpu; set QOM properties; realize"
> sequence without having to do all the work of splitting
> the CPU model string and calling parse_features by hand.
> 
> Patch 2 clarifies a TODO comment, hopefully correctly,
> based on an email conversation I had with Eduardo a
> little while back.
> 
> Patches 3 and 4 change the ARM boards which currently
> call parse_features by hand to use the new function.
> 
> 
> If there's consensus that this is the right general
> direction to go in, then I think that some other
> architectures could also make cleanups to use this:
>  * cpu_s390x_create() is almost exactly this function,
>give or take some fine detail of error handling
>  * ppc_cpu_parse_features is almost the same thing,
>except that it doesn't actually create the CPU object,
>it only calls parse_features
>  * hw/i386/pc.c does a manual parse_features
> 
> I'm not strongly attached to this particular approach
> (though it seems like a reasonable one, especially given
> the proliferation of different arch-specific helpers
> listed above and the bugs in boards which don't call
> parse_features when they should), but I would like us to
> figure out and document what the right way for a board
> to create and configure its CPU objects is...

series looks like a step back adding yet another way
to create CPU and makes code even more inconsistent,
instead of removing TODO item by doing proper generalization.
So I'm sort of object to it.

I'll just posted RFC which show idea how generalization of
cpu_model/features parsing should be implemented.

However I don't have cycles to complete it, only
virt-arm/spapr/pc are converted as example.
One who would pick the task up should complete it for
all boards to make code consistent.

> 
> 
> Michael Davidsaver (1):
>   cpu: add cpu_generic_new()
> 
> Peter Maydell (3):
>   cpu: Clarify TODO comment in cpu_generic_new()
>   hw/arm/integrator: Use new cpu_generic_new()
>   hw/arm/virt: Use new cpu_generic_new()
> 
>  include/qom/cpu.h | 17 +
>  hw/arm/integratorcp.c | 22 ++
>  hw/arm/virt.c | 24 ++--
>  qom/cpu.c | 37 ++---
>  4 files changed, 47 insertions(+), 53 deletions(-)
>

Re: [Qemu-devel] [PATCH v2] Makefile: Put VERSION info into version.texi rather than using -D

2017-02-17 Thread Marc-André Lureau



- Original Message -
> Unfortunately some older versions of makeinfo don't correctly
> handle the -D command line option and fail to set the variable.
> This then causes them to complain
>  docs/qemu-ga-ref.texi:41: warning: undefined flag: VERSION
> 
> Work around this by doing as the autotools do, and writing
> the information into a version.texi file which we then
> include from the .texi files that need it.
> 
> Signed-off-by: Peter Maydell 
> Reviewed-by: Eric Blake 

Reviewed-by: Marc-André Lureau 

> ---
> Changes v1->v2: add .gitignore entry
> ---
>  Makefile   | 17 ++---
>  .gitignore |  1 +
>  docs/qemu-ga-ref.texi  |  2 ++
>  docs/qemu-qmp-ref.texi |  2 ++
>  4 files changed, 15 insertions(+), 7 deletions(-)
> 
> diff --git a/Makefile b/Makefile
> index 830fa5a..1c4c04f 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -516,7 +516,7 @@ distclean: clean
>   rm -f qemu-doc.vr qemu-doc.txt
>   rm -f config.log
>   rm -f linux-headers/asm
> - rm -f qemu-ga-qapi.texi qemu-qapi.texi
> + rm -f qemu-ga-qapi.texi qemu-qapi.texi version.texi
>   rm -f docs/qemu-qmp-ref.7 docs/qemu-ga-ref.7
>   rm -f docs/qemu-qmp-ref.txt docs/qemu-ga-ref.txt
>   rm -f docs/qemu-qmp-ref.pdf docs/qemu-ga-ref.pdf
> @@ -663,21 +663,24 @@ ui/console-gl.o: $(SRC_PATH)/ui/console-gl.c \
>  
>  # documentation
>  MAKEINFO=makeinfo
> -MAKEINFOFLAGS=--no-split --number-sections -D 'VERSION $(VERSION)'
> -TEXIFLAG=$(if $(V),,--quiet) --command='@set VERSION $(VERSION)'
> +MAKEINFOFLAGS=--no-split --number-sections
> +TEXIFLAG=$(if $(V),,--quiet)
>  
> -%.html: %.texi
> +version.texi: $(SRC_PATH)/VERSION
> + $(call quiet-command,echo "@set VERSION $(VERSION)" > $@,"GEN","$@")
> +
> +%.html: %.texi version.texi
>   $(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers 
> \
>   --html $< -o $@,"GEN","$@")
>  
> -%.info: %.texi
> +%.info: %.texi version.texi
>   $(call quiet-command,$(MAKEINFO) $(MAKEINFOFLAGS) $< -o $@,"GEN","$@")
>  
> -%.txt: %.texi
> +%.txt: %.texi version.texi
>   $(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers 
> \
>   --plaintext $< -o $@,"GEN","$@")
>  
> -%.pdf: %.texi
> +%.pdf: %.texi version.texi
>   $(call quiet-command,texi2pdf $(TEXIFLAG) -I $(SRC_PATH) -I . $< -o
>   $@,"GEN","$@")
>  
>  qemu-options.texi: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool
> diff --git a/.gitignore b/.gitignore
> index c563dc1..2849d75 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -107,6 +107,7 @@ docs/qemu-ga-ref.info*
>  docs/qemu-qmp-ref.info*
>  /qemu-ga-qapi.texi
>  /qemu-qapi.texi
> +/version.texi
>  *.tps
>  .stgit-*
>  cscope.*
> diff --git a/docs/qemu-ga-ref.texi b/docs/qemu-ga-ref.texi
> index 87cc8d0..ddb76ce 100644
> --- a/docs/qemu-ga-ref.texi
> +++ b/docs/qemu-ga-ref.texi
> @@ -1,6 +1,8 @@
>  \input texinfo
>  @setfilename qemu-ga-ref.info
>  
> +@include version.texi
> +
>  @exampleindent 0
>  @paragraphindent 0
>  
> diff --git a/docs/qemu-qmp-ref.texi b/docs/qemu-qmp-ref.texi
> index 818e525..0a00569 100644
> --- a/docs/qemu-qmp-ref.texi
> +++ b/docs/qemu-qmp-ref.texi
> @@ -1,6 +1,8 @@
>  \input texinfo
>  @setfilename qemu-qmp-ref.info
>  
> +@include version.texi
> +
>  @exampleindent 0
>  @paragraphindent 0
>  
> --
> 2.7.4
> 
>

Re: [Qemu-devel] [PATCH 5/6] target-ppc: support for 32-bit carry and overflow

2017-02-17 Thread Richard Henderson


On 02/17/2017 03:47 PM, Nikunj A Dadhania wrote:

Why do you want to extract these bits?


Convinient to copy that to XER later.


Ideally you make the most common operation cheapest, and the more rare 
operation more expensive.  That said, I suppose even using ADDO is rare in the 
first place.  So it probably doesn't much matter.



r~

Re: [Qemu-devel] [PATCH] Changing error message of QMP 'migrate_set_downtime' to seconds

2017-02-17 Thread Eric Blake

On 02/17/2017 01:01 PM, Daniel Henrique Barboza wrote:

>>> 200)) {
>>> +(params->downtime_limit < 0 ||
>>> + params->downtime_limit > MAX_MIGRATE_SET_DOWNTIME)) {
>>>   error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
>>>  "downtime_limit",
>>> -   "an integer in the range of 0 to 200
>>> milliseconds");
>>> +   "an integer in the range of 0 to 2000 seconds");
>> Perhaps you could use %d and set  MAX_MIGRATE_SET_DOWNTIME to 2000?
>> Though perhaps the migration maintainers are okay with the patch as is.
> 
> I did that at first but I got errors on "error_setg" about the extra
> parameter.

Ah, right, because QERR_INVALID_PARAMETER_VALUE is a macro that expands
to a fixed printf-style format string where you have to know how many
exact arguments it further expects.  The only way around that is to
open-code the error message you want, instead of forcing the use of the
awkward macro.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org

signature.asc
Description: OpenPGP digital signature

[Qemu-devel] [PATCH 0/3] do not use aio_context_acquire/release in AIO-based drivers

2017-02-17 Thread Paolo Bonzini

aio_context_acquire/release are only going away as soon as the block layer
becomes thread-safe, but we can already move away to other finer-grained
mutex whenever possible.

These three drivers don't use coroutines, hence a QemuMutex is a fine
primitive to use for protecting any per-BDS data in the libraries
they use.  The QemuMutex must protect any fd handlers or bottom halves,
and also the BlockDriver callbacks which were implicitly being called
under aio_context_acquire.

Paolo

Paolo Bonzini (3):
  curl: do not use aio_context_acquire/release
  nfs: do not use aio_context_acquire/release
  iscsi: do not use aio_context_acquire/release

 block/curl.c  | 24 ++---
 block/iscsi.c | 83 +--
 block/nfs.c   | 20 +++---
 3 files changed, 95 insertions(+), 32 deletions(-)

-- 
2.9.3

[Qemu-devel] [PATCH 1/3] curl: do not use aio_context_acquire/release

2017-02-17 Thread Paolo Bonzini

Now that all bottom halves and callbacks take care of taking the
AioContext lock, we can migrate some users away from it and to a
specific QemuMutex or CoMutex.

Protect BDRVCURLState access with a QemuMutex.

Signed-off-by: Paolo Bonzini 
---
 block/curl.c | 24 +++-
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/block/curl.c b/block/curl.c
index 2939cc7..e83dcd8 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -135,6 +135,7 @@ typedef struct BDRVCURLState {
 char *cookie;
 bool accept_range;
 AioContext *aio_context;
+QemuMutex mutex;
 char *username;
 char *password;
 char *proxyusername;
@@ -333,6 +334,7 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, 
size_t len,
 return FIND_RET_NONE;
 }
 
+/* Called with s->mutex held.  */
 static void curl_multi_check_completion(BDRVCURLState *s)
 {
 int msgs_in_queue;
@@ -374,7 +376,9 @@ static void curl_multi_check_completion(BDRVCURLState *s)
 continue;
 }
 
+qemu_mutex_unlock(&s->mutex);
 acb->common.cb(acb->common.opaque, -EPROTO);
+qemu_mutex_lock(&s->mutex);
 qemu_aio_unref(acb);
 state->acb[i] = NULL;
 }
@@ -386,6 +390,7 @@ static void curl_multi_check_completion(BDRVCURLState *s)
 }
 }
 
+/* Called with s->mutex held.  */
 static void curl_multi_do_locked(CURLState *s)
 {
 CURLSocket *socket, *next_socket;
@@ -409,19 +414,19 @@ static void curl_multi_do(void *arg)
 {
 CURLState *s = (CURLState *)arg;
 
-aio_context_acquire(s->s->aio_context);
+qemu_mutex_lock(&s->s->mutex);
 curl_multi_do_locked(s);
-aio_context_release(s->s->aio_context);
+qemu_mutex_unlock(&s->s->mutex);
 }
 
 static void curl_multi_read(void *arg)
 {
 CURLState *s = (CURLState *)arg;
 
-aio_context_acquire(s->s->aio_context);
+qemu_mutex_lock(&s->s->mutex);
 curl_multi_do_locked(s);
 curl_multi_check_completion(s->s);
-aio_context_release(s->s->aio_context);
+qemu_mutex_unlock(&s->s->mutex);
 }
 
 static void curl_multi_timeout_do(void *arg)
@@ -434,11 +439,11 @@ static void curl_multi_timeout_do(void *arg)
 return;
 }
 
-aio_context_acquire(s->aio_context);
+qemu_mutex_lock(&s->mutex);
 curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
 
 curl_multi_check_completion(s);
-aio_context_release(s->aio_context);
+qemu_mutex_unlock(&s->mutex);
 #else
 abort();
 #endif
@@ -771,6 +776,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, 
int flags,
 curl_easy_cleanup(state->curl);
 state->curl = NULL;
 
+qemu_mutex_init(&s->mutex);
 curl_attach_aio_context(bs, bdrv_get_aio_context(bs));
 
 qemu_opts_del(opts);
@@ -801,12 +807,11 @@ static void curl_readv_bh_cb(void *p)
 CURLAIOCB *acb = p;
 BlockDriverState *bs = acb->common.bs;
 BDRVCURLState *s = bs->opaque;
-AioContext *ctx = bdrv_get_aio_context(bs);
 
 size_t start = acb->sector_num * BDRV_SECTOR_SIZE;
 size_t end;
 
-aio_context_acquire(ctx);
+qemu_mutex_lock(&s->mutex);
 
 // In case we have the requested data already (e.g. read-ahead),
 // we can just call the callback and be done.
@@ -854,7 +859,7 @@ static void curl_readv_bh_cb(void *p)
 curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
 
 out:
-aio_context_release(ctx);
+qemu_mutex_unlock(&s->mutex);
 if (ret != -EINPROGRESS) {
 acb->common.cb(acb->common.opaque, ret);
 qemu_aio_unref(acb);
@@ -883,6 +888,7 @@ static void curl_close(BlockDriverState *bs)
 
 DPRINTF("CURL: Close\n");
 curl_detach_aio_context(bs);
+qemu_mutex_destroy(&s->mutex);
 
 g_free(s->cookie);
 g_free(s->url);
-- 
2.9.3

[Qemu-devel] [PATCH 3/3] iscsi: do not use aio_context_acquire/release

2017-02-17 Thread Paolo Bonzini

Now that all bottom halves and callbacks take care of taking the
AioContext lock, we can migrate some users away from it and to a
specific QemuMutex or CoMutex.

Protect libiscsi calls with a QemuMutex.  Callbacks are invoked
using bottom halves, so we don't even have to drop it around
callback invocations.

Signed-off-by: Paolo Bonzini 
---
 block/iscsi.c | 83 +--
 1 file changed, 64 insertions(+), 19 deletions(-)

diff --git a/block/iscsi.c b/block/iscsi.c
index 2561be9..e483f6d 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -58,6 +58,7 @@ typedef struct IscsiLun {
 int events;
 QEMUTimer *nop_timer;
 QEMUTimer *event_timer;
+QemuMutex mutex;
 struct scsi_inquiry_logical_block_provisioning lbp;
 struct scsi_inquiry_block_limits bl;
 unsigned char *zeroblock;
@@ -252,6 +253,7 @@ static int iscsi_translate_sense(struct scsi_sense *sense)
 return ret;
 }
 
+/* Called (via iscsi_service) with QemuMutex held.  */
 static void
 iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
 void *command_data, void *opaque)
@@ -352,6 +354,7 @@ static const AIOCBInfo iscsi_aiocb_info = {
 static void iscsi_process_read(void *arg);
 static void iscsi_process_write(void *arg);
 
+/* Called with QemuMutex held.  */
 static void
 iscsi_set_events(IscsiLun *iscsilun)
 {
@@ -395,10 +398,10 @@ iscsi_process_read(void *arg)
 IscsiLun *iscsilun = arg;
 struct iscsi_context *iscsi = iscsilun->iscsi;
 
-aio_context_acquire(iscsilun->aio_context);
+qemu_mutex_lock(&iscsilun->mutex);
 iscsi_service(iscsi, POLLIN);
 iscsi_set_events(iscsilun);
-aio_context_release(iscsilun->aio_context);
+qemu_mutex_unlock(&iscsilun->mutex);
 }
 
 static void
@@ -407,10 +410,10 @@ iscsi_process_write(void *arg)
 IscsiLun *iscsilun = arg;
 struct iscsi_context *iscsi = iscsilun->iscsi;
 
-aio_context_acquire(iscsilun->aio_context);
+qemu_mutex_lock(&iscsilun->mutex);
 iscsi_service(iscsi, POLLOUT);
 iscsi_set_events(iscsilun);
-aio_context_release(iscsilun->aio_context);
+qemu_mutex_unlock(&iscsilun->mutex);
 }
 
 static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
@@ -589,6 +592,7 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t 
sector_num, int nb_sectors,
 uint64_t lba;
 uint32_t num_sectors;
 bool fua = flags & BDRV_REQ_FUA;
+int r = 0;
 
 if (fua) {
 assert(iscsilun->dpofua);
@@ -604,6 +608,7 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t 
sector_num, int nb_sectors,
 lba = sector_qemu2lun(sector_num, iscsilun);
 num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
 iscsi_co_init_iscsitask(iscsilun, &iTask);
+qemu_mutex_lock(&iscsilun->mutex);
 retry:
 if (iscsilun->use_16_for_rw) {
 #if LIBISCSI_API_VERSION >= (20160603)
@@ -640,7 +645,9 @@ retry:
 #endif
 while (!iTask.complete) {
 iscsi_set_events(iscsilun);
+qemu_mutex_unlock(&iscsilun->mutex);
 qemu_coroutine_yield();
+qemu_mutex_lock(&iscsilun->mutex);
 }
 
 if (iTask.task != NULL) {
@@ -655,12 +662,15 @@ retry:
 
 if (iTask.status != SCSI_STATUS_GOOD) {
 iscsi_allocmap_set_invalid(iscsilun, sector_num, nb_sectors);
-return iTask.err_code;
+r = iTask.err_code;
+goto out_unlock;
 }
 
 iscsi_allocmap_set_allocated(iscsilun, sector_num, nb_sectors);
 
-return 0;
+out_unlock:
+qemu_mutex_unlock(&iscsilun->mutex);
+return r;
 }
 
 
@@ -693,18 +703,21 @@ static int64_t coroutine_fn 
iscsi_co_get_block_status(BlockDriverState *bs,
 goto out;
 }
 
+qemu_mutex_lock(&iscsilun->mutex);
 retry:
 if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
   sector_qemu2lun(sector_num, iscsilun),
   8 + 16, iscsi_co_generic_cb,
   &iTask) == NULL) {
 ret = -ENOMEM;
-goto out;
+goto out_unlock;
 }
 
 while (!iTask.complete) {
 iscsi_set_events(iscsilun);
+qemu_mutex_unlock(&iscsilun->mutex);
 qemu_coroutine_yield();
+qemu_mutex_lock(&iscsilun->mutex);
 }
 
 if (iTask.do_retry) {
@@ -721,20 +734,20 @@ retry:
  * because the device is busy or the cmd is not
  * supported) we pretend all blocks are allocated
  * for backwards compatibility */
-goto out;
+goto out_unlock;
 }
 
 lbas = scsi_datain_unmarshall(iTask.task);
 if (lbas == NULL) {
 ret = -EIO;
-goto out;
+goto out_unlock;
 }
 
 lbasd = &lbas->descriptors[0];
 
 if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
 ret = -EIO;
-goto out;
+goto out_unlock;
 }
 
 *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
@@ -756,6 +769,8 @@ retry:
 if (*pnum > nb_sectors) {
 *pn

Re: [Qemu-devel] [PATCH 3/5] migration/vmstate: fix array of ptr with nullptrs

2017-02-17 Thread Dr. David Alan Gilbert

* Halil Pasic (pa...@linux.vnet.ibm.com) wrote:
> Make VMS_ARRAY_OF_POINTER cope with null pointers. Previously the
> reward for trying to migrate an array with some null pointers in it was
> an illegal memory access, that is a swift and painless death of the
> process.  Let's make vmstate cope with this scenario.
> 
> The general approach is, when we encounter a null pointer (element),
> instead of following the pointer to save/load the data behind it, we
> save/load a placeholder. This way we can detect if we expected a null
> pointer at the load side but not null data was saved instead.
> 
> Signed-off-by: Halil Pasic 
> Reviewed-by: Guenther Hutzl 
> 
> ---
> We will need this to load/save some on demand created state in the
> (s390x) channel subsystem (see ChannelSubSys.css in hw/s390x/css.c for
> an example).
> ---
>  include/migration/vmstate.h |  4 
>  migration/vmstate.c | 33 +++--
>  2 files changed, 35 insertions(+), 2 deletions(-)
> 
> diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
> index 63e7b02..f2dbf84 100644
> --- a/include/migration/vmstate.h
> +++ b/include/migration/vmstate.h
> @@ -253,6 +253,10 @@ extern const VMStateInfo vmstate_info_uint16;
>  extern const VMStateInfo vmstate_info_uint32;
>  extern const VMStateInfo vmstate_info_uint64;
>  
> +/** Put this in the stream when migrating a null pointer.*/
> +#define VMS_NULLPTR_MARKER (0x30U) /* '0' */
> +extern const VMStateInfo vmstate_info_nullptr;
> +
>  extern const VMStateInfo vmstate_info_float64;
>  extern const VMStateInfo vmstate_info_cpudouble;
>  
> diff --git a/migration/vmstate.c b/migration/vmstate.c
> index 836a7a4..cb81cef 100644
> --- a/migration/vmstate.c
> +++ b/migration/vmstate.c
> @@ -117,7 +117,11 @@ int vmstate_load_state(QEMUFile *f, const 
> VMStateDescription *vmsd,
>  if (field->flags & VMS_ARRAY_OF_POINTER) {
>  curr_elem = *(void **)curr_elem;
>  }
> -if (field->flags & VMS_STRUCT) {
> +if (!curr_elem) {
> +/* if null pointer check placeholder and do not follow */
> +assert(field->flags & VMS_ARRAY_OF_POINTER);

That can return an error instead of asserting.

> +vmstate_info_nullptr.get(f, curr_elem, size, NULL);

You've ignored the return value of the get; that should be  ret = 

> +} else if (field->flags & VMS_STRUCT) {
>  ret = vmstate_load_state(f, field->vmsd, curr_elem,
>   field->vmsd->version_id);
>  } else {
> @@ -332,7 +336,11 @@ void vmstate_save_state(QEMUFile *f, const 
> VMStateDescription *vmsd,
>  assert(curr_elem);
>  curr_elem = *(void **)curr_elem;
>  }
> -if (field->flags & VMS_STRUCT) {
> +if (!curr_elem) {
> +/* if null pointer write placeholder and do not follow */
> +assert(field->flags & VMS_ARRAY_OF_POINTER);
> +vmstate_info_nullptr.put(f, curr_elem, size, NULL, NULL);
> +} else if (field->flags & VMS_STRUCT) {
>  vmstate_save_state(f, field->vmsd, curr_elem, 
> vmdesc_loop);
>  } else {
>  field->info->put(f, curr_elem, size, field, vmdesc_loop);
> @@ -747,6 +755,27 @@ const VMStateInfo vmstate_info_uint64 = {
>  .put  = put_uint64,
>  };
>  
> +static int get_nullptr(QEMUFile *f, void *pv, size_t size, VMStateField 
> *field)
> +
> +{
> +return qemu_get_byte(f) == VMS_NULLPTR_MARKER ? 0 : -EINVAL;
> +}
> +
> +static int put_nullptr(QEMUFile *f, void *pv, size_t size,
> +VMStateField *field, QJSON *vmdesc)
> +
> +{
> +assert(pv == NULL);
> +qemu_put_byte(f, VMS_NULLPTR_MARKER);
> +return 0;
> +}

Again that assert could just turn into a return -EINVAL

Dave

> +
> +const VMStateInfo vmstate_info_nullptr = {
> +.name = "uint64",
> +.get  = get_nullptr,
> +.put  = put_nullptr,
> +};
> +
>  /* 64 bit unsigned int. See that the received value is the same than the one
> in the field */
>  
> -- 
> 2.8.4
> 
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

[Qemu-devel] [PATCH 2/3] nfs: do not use aio_context_acquire/release

2017-02-17 Thread Paolo Bonzini

Now that all bottom halves and callbacks take care of taking the
AioContext lock, we can migrate some users away from it and to a
specific QemuMutex or CoMutex.

Protect libnfs calls with a QemuMutex.  Callbacks are invoked
using bottom halves, so we don't even have to drop it around
callback invocations.

Signed-off-by: Paolo Bonzini 
---
 block/nfs.c | 20 
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/block/nfs.c b/block/nfs.c
index 08b43dd..4eddcee 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -54,6 +54,7 @@ typedef struct NFSClient {
 int events;
 bool has_zero_init;
 AioContext *aio_context;
+QemuMutex mutex;
 blkcnt_t st_blocks;
 bool cache_used;
 NFSServer *server;
@@ -191,6 +192,7 @@ static void nfs_parse_filename(const char *filename, QDict 
*options,
 static void nfs_process_read(void *arg);
 static void nfs_process_write(void *arg);
 
+/* Called with QemuMutex held.  */
 static void nfs_set_events(NFSClient *client)
 {
 int ev = nfs_which_events(client->context);
@@ -209,20 +211,20 @@ static void nfs_process_read(void *arg)
 {
 NFSClient *client = arg;
 
-aio_context_acquire(client->aio_context);
+qemu_mutex_lock(&client->mutex);
 nfs_service(client->context, POLLIN);
 nfs_set_events(client);
-aio_context_release(client->aio_context);
+qemu_mutex_unlock(&client->mutex);
 }
 
 static void nfs_process_write(void *arg)
 {
 NFSClient *client = arg;
 
-aio_context_acquire(client->aio_context);
+qemu_mutex_lock(&client->mutex);
 nfs_service(client->context, POLLOUT);
 nfs_set_events(client);
-aio_context_release(client->aio_context);
+qemu_mutex_unlock(&client->mutex);
 }
 
 static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
@@ -242,6 +244,7 @@ static void nfs_co_generic_bh_cb(void *opaque)
 aio_co_wake(task->co);
 }
 
+/* Called (via nfs_service) with QemuMutex held.  */
 static void
 nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
   void *private_data)
@@ -273,6 +276,7 @@ static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
 nfs_co_init_task(bs, &task);
 task.iov = iov;
 
+qemu_mutex_lock(&client->mutex);
 if (nfs_pread_async(client->context, client->fh,
 sector_num * BDRV_SECTOR_SIZE,
 nb_sectors * BDRV_SECTOR_SIZE,
@@ -281,6 +285,7 @@ static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
 }
 
 nfs_set_events(client);
+qemu_mutex_unlock(&client->mutex);
 while (!task.complete) {
 qemu_coroutine_yield();
 }
@@ -314,6 +319,7 @@ static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
 
 qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
 
+qemu_mutex_lock(&client->mutex);
 if (nfs_pwrite_async(client->context, client->fh,
  sector_num * BDRV_SECTOR_SIZE,
  nb_sectors * BDRV_SECTOR_SIZE,
@@ -323,6 +329,7 @@ static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
 }
 
 nfs_set_events(client);
+qemu_mutex_unlock(&client->mutex);
 while (!task.complete) {
 qemu_coroutine_yield();
 }
@@ -343,12 +350,14 @@ static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
 
 nfs_co_init_task(bs, &task);
 
+qemu_mutex_lock(&client->mutex);
 if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
 &task) != 0) {
 return -ENOMEM;
 }
 
 nfs_set_events(client);
+qemu_mutex_unlock(&client->mutex);
 while (!task.complete) {
 qemu_coroutine_yield();
 }
@@ -434,6 +443,7 @@ static void nfs_file_close(BlockDriverState *bs)
 {
 NFSClient *client = bs->opaque;
 nfs_client_close(client);
+qemu_mutex_destroy(&client->mutex);
 }
 
 static NFSServer *nfs_config(QDict *options, Error **errp)
@@ -641,6 +651,7 @@ static int nfs_file_open(BlockDriverState *bs, QDict 
*options, int flags,
 if (ret < 0) {
 return ret;
 }
+qemu_mutex_init(&client->mutex);
 bs->total_sectors = ret;
 ret = 0;
 return ret;
@@ -696,6 +707,7 @@ static int nfs_has_zero_init(BlockDriverState *bs)
 return client->has_zero_init;
 }
 
+/* Called (via nfs_service) with QemuMutex held.  */
 static void
 nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data,
void *private_data)
-- 
2.9.3

[Qemu-devel] [PULL 02/23] virtio: Report real progress in VQ aio poll handler

2017-02-17 Thread Michael S. Tsirkin

From: Fam Zheng 

In virtio_queue_host_notifier_aio_poll, not all "!virtio_queue_empty()"
cases are making true progress.

Currently the offending one is virtio-scsi event queue, whose handler
does nothing if no event is pending. As a result aio_poll() will spin on
the "non-empty" VQ and take 100% host CPU.

Fix this by reporting actual progress from virtio queue aio handlers.

Reported-by: Ed Swierk 
Signed-off-by: Fam Zheng 
Tested-by: Ed Swierk 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/hw/virtio/virtio-blk.h  |  2 +-
 include/hw/virtio/virtio-scsi.h |  6 +++---
 include/hw/virtio/virtio.h  |  4 ++--
 hw/block/dataplane/virtio-blk.c |  4 ++--
 hw/block/virtio-blk.c   | 12 ++--
 hw/scsi/virtio-scsi-dataplane.c | 14 +++---
 hw/scsi/virtio-scsi.c   | 14 +++---
 hw/virtio/virtio.c  | 15 +--
 8 files changed, 45 insertions(+), 26 deletions(-)

diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index 9734b4c..d3c8a6f 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -80,6 +80,6 @@ typedef struct MultiReqBuffer {
 bool is_write;
 } MultiReqBuffer;
 
-void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq);
+bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq);
 
 #endif
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
index 7375196..f536f77 100644
--- a/include/hw/virtio/virtio-scsi.h
+++ b/include/hw/virtio/virtio-scsi.h
@@ -126,9 +126,9 @@ void virtio_scsi_common_realize(DeviceState *dev, Error 
**errp,
 VirtIOHandleOutput cmd);
 
 void virtio_scsi_common_unrealize(DeviceState *dev, Error **errp);
-void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq);
-void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq);
-void virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq);
+bool virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq);
+bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq);
+bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq);
 void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req);
 void virtio_scsi_free_req(VirtIOSCSIReq *req);
 void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev,
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 525da24..0863a25 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -154,6 +154,7 @@ void virtio_error(VirtIODevice *vdev, const char *fmt, ...) 
GCC_FMT_ATTR(2, 3);
 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name);
 
 typedef void (*VirtIOHandleOutput)(VirtIODevice *, VirtQueue *);
+typedef bool (*VirtIOHandleAIOOutput)(VirtIODevice *, VirtQueue *);
 
 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
 VirtIOHandleOutput handle_output);
@@ -284,8 +285,7 @@ bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev);
 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
 void virtio_queue_host_notifier_read(EventNotifier *n);
 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
-void (*fn)(VirtIODevice *,
-   VirtQueue *));
+VirtIOHandleAIOOutput 
handle_output);
 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector);
 VirtQueue *virtio_vector_next_queue(VirtQueue *vq);
 
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index d1f9f63..5556f0e 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -147,7 +147,7 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s)
 g_free(s);
 }
 
-static void virtio_blk_data_plane_handle_output(VirtIODevice *vdev,
+static bool virtio_blk_data_plane_handle_output(VirtIODevice *vdev,
 VirtQueue *vq)
 {
 VirtIOBlock *s = (VirtIOBlock *)vdev;
@@ -155,7 +155,7 @@ static void 
virtio_blk_data_plane_handle_output(VirtIODevice *vdev,
 assert(s->dataplane);
 assert(s->dataplane_started);
 
-virtio_blk_handle_vq(s, vq);
+return virtio_blk_handle_vq(s, vq);
 }
 
 /* Context: QEMU global mutex held */
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 702eda8..baaa195 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -581,10 +581,11 @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, 
MultiReqBuffer *mrb)
 return 0;
 }
 
-void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
+bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
 {
 VirtIOBlockReq *req;
 MultiReqBuffer mrb = {};
+bool progress = false;
 
 blk_io_plug(s->blk);
 
@@ -592,6 +593,7 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *v

[Qemu-devel] [PULL 12/23] virtio: use VRingMemoryRegionCaches for avail and used rings

2017-02-17 Thread Michael S. Tsirkin

From: Paolo Bonzini 

The virtio-net change is necessary because it uses virtqueue_fill
and virtqueue_flush instead of the more convenient virtqueue_push.

Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Paolo Bonzini 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/net/virtio-net.c |  14 +-
 hw/virtio/virtio.c  | 132 ++--
 2 files changed, 109 insertions(+), 37 deletions(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 354a19e..c321680 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1130,7 +1130,8 @@ static int receive_filter(VirtIONet *n, const uint8_t 
*buf, int size)
 return 0;
 }
 
-static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, 
size_t size)
+static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
+  size_t size)
 {
 VirtIONet *n = qemu_get_nic_opaque(nc);
 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
@@ -1233,6 +1234,17 @@ static ssize_t virtio_net_receive(NetClientState *nc, 
const uint8_t *buf, size_t
 return size;
 }
 
+static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
+  size_t size)
+{
+ssize_t r;
+
+rcu_read_lock();
+r = virtio_net_receive_rcu(nc, buf, size);
+rcu_read_unlock();
+return r;
+}
+
 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
 
 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index cdafcec..c08e50f 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -173,6 +173,7 @@ void virtio_queue_update_rings(VirtIODevice *vdev, int n)
 virtio_init_region_cache(vdev, n);
 }
 
+/* Called within rcu_read_lock().  */
 static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
 MemoryRegionCache *cache, int i)
 {
@@ -184,88 +185,110 @@ static void vring_desc_read(VirtIODevice *vdev, 
VRingDesc *desc,
 virtio_tswap16s(vdev, &desc->next);
 }
 
+/* Called within rcu_read_lock().  */
 static inline uint16_t vring_avail_flags(VirtQueue *vq)
 {
-hwaddr pa;
-pa = vq->vring.avail + offsetof(VRingAvail, flags);
-return virtio_lduw_phys(vq->vdev, pa);
+VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
+hwaddr pa = offsetof(VRingAvail, flags);
+return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 }
 
+/* Called within rcu_read_lock().  */
 static inline uint16_t vring_avail_idx(VirtQueue *vq)
 {
-hwaddr pa;
-pa = vq->vring.avail + offsetof(VRingAvail, idx);
-vq->shadow_avail_idx = virtio_lduw_phys(vq->vdev, pa);
+VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
+hwaddr pa = offsetof(VRingAvail, idx);
+vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, 
pa);
 return vq->shadow_avail_idx;
 }
 
+/* Called within rcu_read_lock().  */
 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
 {
-hwaddr pa;
-pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
-return virtio_lduw_phys(vq->vdev, pa);
+VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
+hwaddr pa = offsetof(VRingAvail, ring[i]);
+return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 }
 
+/* Called within rcu_read_lock().  */
 static inline uint16_t vring_get_used_event(VirtQueue *vq)
 {
 return vring_avail_ring(vq, vq->vring.num);
 }
 
+/* Called within rcu_read_lock().  */
 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
 int i)
 {
-hwaddr pa;
+VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
+hwaddr pa = offsetof(VRingUsed, ring[i]);
 virtio_tswap32s(vq->vdev, &uelem->id);
 virtio_tswap32s(vq->vdev, &uelem->len);
-pa = vq->vring.used + offsetof(VRingUsed, ring[i]);
-address_space_write(vq->vdev->dma_as, pa, MEMTXATTRS_UNSPECIFIED,
-   (void *)uelem, sizeof(VRingUsedElem));
+address_space_write_cached(&caches->used, pa, uelem, 
sizeof(VRingUsedElem));
+address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
 }
 
+/* Called within rcu_read_lock().  */
 static uint16_t vring_used_idx(VirtQueue *vq)
 {
-hwaddr pa;
-pa = vq->vring.used + offsetof(VRingUsed, idx);
-return virtio_lduw_phys(vq->vdev, pa);
+VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
+hwaddr pa = offsetof(VRingUsed, idx);
+return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 }
 
+/* Called within rcu_read_lock().  */
 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
 {
-hwaddr pa;
-pa = vq->vring.used + offsetof(VRingUsed, idx);
-virtio_stw_phys(vq->vdev, pa, val);
+VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
+hwaddr

[Qemu-devel] [PULL 11/23] virtio: check for vring setup in virtio_queue_update_used_idx

2017-02-17 Thread Michael S. Tsirkin

From: Paolo Bonzini 

If the vring has not been set up, it is not necessary for vring_used_idx
to do anything (as is already the case when the caller is virtio_load).
This is harmless for now, but it will be a problem when the
MemoryRegionCache has not been set up.

Signed-off-by: Paolo Bonzini 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/virtio/virtio.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index d62509d..cdafcec 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2156,7 +2156,9 @@ void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, 
int n, uint16_t idx)
 
 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
 {
-vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
+if (vdev->vq[n].vring.desc) {
+vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
+}
 }
 
 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
-- 
MST

[Qemu-devel] [PULL 05/23] virtio: add virtio_*_phys_cached

2017-02-17 Thread Michael S. Tsirkin

From: Paolo Bonzini 

Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Paolo Bonzini 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/hw/virtio/virtio-access.h | 52 +++
 1 file changed, 52 insertions(+)

diff --git a/include/hw/virtio/virtio-access.h 
b/include/hw/virtio/virtio-access.h
index 91ae14d..2e92074 100644
--- a/include/hw/virtio/virtio-access.h
+++ b/include/hw/virtio/virtio-access.h
@@ -156,6 +156,58 @@ static inline uint16_t virtio_tswap16(VirtIODevice *vdev, 
uint16_t s)
 #endif
 }
 
+static inline uint16_t virtio_lduw_phys_cached(VirtIODevice *vdev,
+   MemoryRegionCache *cache,
+   hwaddr pa)
+{
+if (virtio_access_is_big_endian(vdev)) {
+return lduw_be_phys_cached(cache, pa);
+}
+return lduw_le_phys_cached(cache, pa);
+}
+
+static inline uint32_t virtio_ldl_phys_cached(VirtIODevice *vdev,
+  MemoryRegionCache *cache,
+  hwaddr pa)
+{
+if (virtio_access_is_big_endian(vdev)) {
+return ldl_be_phys_cached(cache, pa);
+}
+return ldl_le_phys_cached(cache, pa);
+}
+
+static inline uint64_t virtio_ldq_phys_cached(VirtIODevice *vdev,
+  MemoryRegionCache *cache,
+  hwaddr pa)
+{
+if (virtio_access_is_big_endian(vdev)) {
+return ldq_be_phys_cached(cache, pa);
+}
+return ldq_le_phys_cached(cache, pa);
+}
+
+static inline void virtio_stw_phys_cached(VirtIODevice *vdev,
+  MemoryRegionCache *cache,
+  hwaddr pa, uint16_t value)
+{
+if (virtio_access_is_big_endian(vdev)) {
+stw_be_phys_cached(cache, pa, value);
+} else {
+stw_le_phys_cached(cache, pa, value);
+}
+}
+
+static inline void virtio_stl_phys_cached(VirtIODevice *vdev,
+  MemoryRegionCache *cache,
+  hwaddr pa, uint32_t value)
+{
+if (virtio_access_is_big_endian(vdev)) {
+stl_be_phys_cached(cache, pa, value);
+} else {
+stl_le_phys_cached(cache, pa, value);
+}
+}
+
 static inline void virtio_tswap16s(VirtIODevice *vdev, uint16_t *s)
 {
 *s = virtio_tswap16(vdev, *s);
-- 
MST

Re: [Qemu-devel] [PATCH 15/17] iotests: add default node-name

2017-02-17 Thread Dr. David Alan Gilbert

* Fam Zheng (f...@redhat.com) wrote:
> On Fri, 02/17 16:36, Vladimir Sementsov-Ogievskiy wrote:
> > 17.02.2017 15:21, Fam Zheng wrote:
> > > On Fri, 02/17 13:20, Vladimir Sementsov-Ogievskiy wrote:
> > > > 16.02.2017 16:48, Fam Zheng wrote:
> > > > > On Mon, 02/13 12:54, Vladimir Sementsov-Ogievskiy wrote:
> > > > > > When testing migration, auto-generated by qemu node-names differs in
> > > > > > source and destination qemu and migration fails. After this patch,
> > > > > > auto-generated by iotest nodenames will be the same.
> > > > > What should be done in libvirt to make sure the node-names are 
> > > > > matching
> > > > > correctly at both sides?
> > > > Hmm, just set node names appropriately?
> > > But I think the problem is that node names are not configurable from 
> > > libvirt
> > > today, and then the migration will fail. Should the device name take 
> > > precedence
> > > in the code, to make it easier?
> > 
> > libvirt can use same parameters as I in this patch..
> 
> If I'm not mistaken, libvirt can be patched to explicitly set the same node
> names in the QEMU command line, but that is some extra work to do there. My
> point is if device names are used during migration, when available, this patch
> and the libvirt change is not necessary.

Always best to check with libvirt guys to see what makes sense for them;
ccing in jdenemar.

Dave

> Fam
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

[Qemu-devel] [PULL 21/23] intel_iommu: convert dbg macros to traces for inv

2017-02-17 Thread Michael S. Tsirkin

From: Peter Xu 

VT-d codes are still using static DEBUG_INTEL_IOMMU macro. That's not
good, and we should end the day when we need to recompile the code
before getting useful debugging information for vt-d. Time to switch to
the trace system. This is the first patch to do it.

Signed-off-by: Peter Xu 
Reviewed-by: Jason Wang 
Reviewed-by: David Gibson 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/i386/intel_iommu.c | 95 +--
 hw/i386/trace-events  | 18 ++
 2 files changed, 56 insertions(+), 57 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 0c94b79..08e43b6 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -35,6 +35,7 @@
 #include "sysemu/kvm.h"
 #include "hw/i386/apic_internal.h"
 #include "kvm_i386.h"
+#include "trace.h"
 
 /*#define DEBUG_INTEL_IOMMU*/
 #ifdef DEBUG_INTEL_IOMMU
@@ -474,22 +475,19 @@ static void vtd_handle_inv_queue_error(IntelIOMMUState *s)
 /* Set the IWC field and try to generate an invalidation completion interrupt 
*/
 static void vtd_generate_completion_event(IntelIOMMUState *s)
 {
-VTD_DPRINTF(INV, "completes an invalidation wait command with "
-"Interrupt Flag");
 if (vtd_get_long_raw(s, DMAR_ICS_REG) & VTD_ICS_IWC) {
-VTD_DPRINTF(INV, "there is a previous interrupt condition to be "
-"serviced by software, "
-"new invalidation event is not generated");
+trace_vtd_inv_desc_wait_irq("One pending, skip current");
 return;
 }
 vtd_set_clear_mask_long(s, DMAR_ICS_REG, 0, VTD_ICS_IWC);
 vtd_set_clear_mask_long(s, DMAR_IECTL_REG, 0, VTD_IECTL_IP);
 if (vtd_get_long_raw(s, DMAR_IECTL_REG) & VTD_IECTL_IM) {
-VTD_DPRINTF(INV, "IM filed in IECTL_REG is set, new invalidation "
-"event is not generated");
+trace_vtd_inv_desc_wait_irq("IM in IECTL_REG is set, "
+"new event not generated");
 return;
 } else {
 /* Generate the interrupt event */
+trace_vtd_inv_desc_wait_irq("Generating complete event");
 vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG);
 vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0);
 }
@@ -923,6 +921,7 @@ static void vtd_interrupt_remap_table_setup(IntelIOMMUState 
*s)
 
 static void vtd_context_global_invalidate(IntelIOMMUState *s)
 {
+trace_vtd_inv_desc_cc_global();
 s->context_cache_gen++;
 if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) {
 vtd_reset_context_cache(s);
@@ -962,9 +961,11 @@ static void vtd_context_device_invalidate(IntelIOMMUState 
*s,
 uint16_t mask;
 VTDBus *vtd_bus;
 VTDAddressSpace *vtd_as;
-uint16_t devfn;
+uint8_t bus_n, devfn;
 uint16_t devfn_it;
 
+trace_vtd_inv_desc_cc_devices(source_id, func_mask);
+
 switch (func_mask & 3) {
 case 0:
 mask = 0;   /* No bits in the SID field masked */
@@ -980,16 +981,16 @@ static void vtd_context_device_invalidate(IntelIOMMUState 
*s,
 break;
 }
 mask = ~mask;
-VTD_DPRINTF(INV, "device-selective invalidation source 0x%"PRIx16
-" mask %"PRIu16, source_id, mask);
-vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id));
+
+bus_n = VTD_SID_TO_BUS(source_id);
+vtd_bus = vtd_find_as_from_bus_num(s, bus_n);
 if (vtd_bus) {
 devfn = VTD_SID_TO_DEVFN(source_id);
 for (devfn_it = 0; devfn_it < X86_IOMMU_PCI_DEVFN_MAX; ++devfn_it) {
 vtd_as = vtd_bus->dev_as[devfn_it];
 if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
-VTD_DPRINTF(INV, "invalidate context-cahce of devfn 0x%"PRIx16,
-devfn_it);
+trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it),
+ VTD_PCI_FUNC(devfn_it));
 vtd_as->context_cache_entry.context_cache_gen = 0;
 }
 }
@@ -1302,9 +1303,7 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, 
VTDInvDesc *inv_desc)
 {
 if ((inv_desc->hi & VTD_INV_DESC_WAIT_RSVD_HI) ||
 (inv_desc->lo & VTD_INV_DESC_WAIT_RSVD_LO)) {
-VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Invalidation "
-"Wait Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
-inv_desc->hi, inv_desc->lo);
+trace_vtd_inv_desc_wait_invalid(inv_desc->hi, inv_desc->lo);
 return false;
 }
 if (inv_desc->lo & VTD_INV_DESC_WAIT_SW) {
@@ -1316,21 +1315,18 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, 
VTDInvDesc *inv_desc)
 
 /* FIXME: need to be masked with HAW? */
 dma_addr_t status_addr = inv_desc->hi;
-VTD_DPRINTF(INV, "status data 0x%x, status addr 0x%"PRIx64,
-status_data, status_addr);
+trace_vtd_inv_desc_wait_sw(st

[Qemu-devel] [PULL 13/23] virtio: Fix no interrupt when not creating msi controller

2017-02-17 Thread Michael S. Tsirkin

For ARM virt machine, if we use virt-2.7 which will not create ITS node,
the virtio-net can not recieve interrupts so it can't get ip address
through dhcp.
This fixes commit 83d768b(virtio: set ISR on dataplane notifications).

Signed-off-by: Shannon Zhao 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/virtio/virtio.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index c08e50f..23483c7 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1584,6 +1584,12 @@ void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue 
*vq)
 event_notifier_set(&vq->guest_notifier);
 }
 
+static void virtio_irq(VirtQueue *vq)
+{
+virtio_set_isr(vq->vdev, 0x1);
+virtio_notify_vector(vq->vdev, vq->vector);
+}
+
 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
 {
 bool should_notify;
@@ -1596,8 +1602,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
 }
 
 trace_virtio_notify(vdev, vq);
-virtio_set_isr(vq->vdev, 0x1);
-virtio_notify_vector(vdev, vq->vector);
+virtio_irq(vq);
 }
 
 void virtio_notify_config(VirtIODevice *vdev)
@@ -2240,7 +2245,7 @@ static void 
virtio_queue_guest_notifier_read(EventNotifier *n)
 {
 VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
 if (event_notifier_test_and_clear(n)) {
-virtio_notify_vector(vq->vdev, vq->vector);
+virtio_irq(vq);
 }
 }
 
-- 
MST

[Qemu-devel] [PULL 07/23] exec: make address_space_cache_destroy idempotent

2017-02-17 Thread Michael S. Tsirkin

From: Paolo Bonzini 

Clear cache->mr so that address_space_cache_destroy does nothing
the second time it is called.

Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Paolo Bonzini 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 exec.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/exec.c b/exec.c
index 6fa337b..865a1e8 100644
--- a/exec.c
+++ b/exec.c
@@ -3166,6 +3166,7 @@ void address_space_cache_destroy(MemoryRegionCache *cache)
 xen_invalidate_map_cache_entry(cache->ptr);
 }
 memory_region_unref(cache->mr);
+cache->mr = NULL;
 }
 
 /* Called from RCU critical section.  This function has the same
-- 
MST

[Qemu-devel] [PULL 23/23] intel_iommu: vtd_slpt_level_shift check level

2017-02-17 Thread Michael S. Tsirkin

From: Peter Xu 

This helps in debugging incorrect level passed in.

Reviewed-by: Jason Wang 
Signed-off-by: Peter Xu 
Reviewed-by: David Gibson 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/i386/intel_iommu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index ad304f6..22d8226 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -168,6 +168,7 @@ static gboolean vtd_hash_remove_by_domain(gpointer key, 
gpointer value,
 /* The shift of an addr for a certain level of paging structure */
 static inline uint32_t vtd_slpt_level_shift(uint32_t level)
 {
+assert(level != 0);
 return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_SL_LEVEL_BITS;
 }
 
-- 
MST

[Qemu-devel] [PULL 01/23] pci/pcie: don't assume cap id 0 is reserved

2017-02-17 Thread Michael S. Tsirkin

VFIO actually wants to create a capability with ID == 0.
This is done to make guest drivers skip the given capability.
pcie_add_capability then trips up on this capability
when looking for end of capability list.

To support this use-case, it's easy enough to switch to
e.g. 0x for these comparisons - we can be sure
it will never match a 16-bit capability ID.

Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Peter Xu 
Reviewed-by: Alex Williamson 
---
 hw/pci/pcie.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index cbd4bb4..f4dd177 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -610,7 +610,8 @@ bool pcie_cap_is_arifwd_enabled(const PCIDevice *dev)
  * uint16_t ext_cap_size
  */
 
-static uint16_t pcie_find_capability_list(PCIDevice *dev, uint16_t cap_id,
+/* Passing a cap_id value > 0x will return 0 and put end of list in prev */
+static uint16_t pcie_find_capability_list(PCIDevice *dev, uint32_t cap_id,
   uint16_t *prev_p)
 {
 uint16_t prev = 0;
@@ -679,9 +680,11 @@ void pcie_add_capability(PCIDevice *dev,
 } else {
 uint16_t prev;
 
-/* 0 is reserved cap id. use internally to find the last capability
-   in the linked list */
-next = pcie_find_capability_list(dev, 0, &prev);
+/*
+ * 0x is not a valid cap id (it's a 16 bit field). use
+ * internally to find the last capability in the linked list.
+ */
+next = pcie_find_capability_list(dev, 0x, &prev);
 
 assert(prev >= PCI_CONFIG_SPACE_SIZE);
 assert(next == 0);
-- 
MST

[Qemu-devel] [PULL 09/23] virtio: add MemoryListener to cache ring translations

2017-02-17 Thread Michael S. Tsirkin

From: Paolo Bonzini 

The cached translations are RCU-protected to allow efficient use
when processing virtqueues.

Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Paolo Bonzini 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/hw/virtio/virtio.h |   1 +
 hw/virtio/virtio.c | 105 +++--
 2 files changed, 103 insertions(+), 3 deletions(-)

diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 0863a25..15efcf2 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -85,6 +85,7 @@ struct VirtIODevice
 uint32_t generation;
 int nvectors;
 VirtQueue *vq;
+MemoryListener listener;
 uint16_t device_id;
 bool vm_running;
 bool broken; /* device in invalid state, needs reset */
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 71e41f6..b75cb52 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -60,6 +60,13 @@ typedef struct VRingUsed
 VRingUsedElem ring[0];
 } VRingUsed;
 
+typedef struct VRingMemoryRegionCaches {
+struct rcu_head rcu;
+MemoryRegionCache desc;
+MemoryRegionCache avail;
+MemoryRegionCache used;
+} VRingMemoryRegionCaches;
+
 typedef struct VRing
 {
 unsigned int num;
@@ -68,6 +75,7 @@ typedef struct VRing
 hwaddr desc;
 hwaddr avail;
 hwaddr used;
+VRingMemoryRegionCaches *caches;
 } VRing;
 
 struct VirtQueue
@@ -104,6 +112,51 @@ struct VirtQueue
 QLIST_ENTRY(VirtQueue) node;
 };
 
+static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
+{
+if (!caches) {
+return;
+}
+
+address_space_cache_destroy(&caches->desc);
+address_space_cache_destroy(&caches->avail);
+address_space_cache_destroy(&caches->used);
+g_free(caches);
+}
+
+static void virtio_init_region_cache(VirtIODevice *vdev, int n)
+{
+VirtQueue *vq = &vdev->vq[n];
+VRingMemoryRegionCaches *old = vq->vring.caches;
+VRingMemoryRegionCaches *new;
+hwaddr addr, size;
+int event_size;
+
+event_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX) ? 
2 : 0;
+
+addr = vq->vring.desc;
+if (!addr) {
+return;
+}
+new = g_new0(VRingMemoryRegionCaches, 1);
+size = virtio_queue_get_desc_size(vdev, n);
+address_space_cache_init(&new->desc, vdev->dma_as,
+ addr, size, false);
+
+size = virtio_queue_get_used_size(vdev, n) + event_size;
+address_space_cache_init(&new->used, vdev->dma_as,
+ vq->vring.used, size, true);
+
+size = virtio_queue_get_avail_size(vdev, n) + event_size;
+address_space_cache_init(&new->avail, vdev->dma_as,
+ vq->vring.avail, size, false);
+
+atomic_rcu_set(&vq->vring.caches, new);
+if (old) {
+call_rcu(old, virtio_free_region_cache, rcu);
+}
+}
+
 /* virt queue functions */
 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
 {
@@ -117,6 +170,7 @@ void virtio_queue_update_rings(VirtIODevice *vdev, int n)
 vring->used = vring_align(vring->avail +
   offsetof(VRingAvail, ring[vring->num]),
   vring->align);
+virtio_init_region_cache(vdev, n);
 }
 
 static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
@@ -1264,6 +1318,7 @@ void virtio_queue_set_rings(VirtIODevice *vdev, int n, 
hwaddr desc,
 vdev->vq[n].vring.desc = desc;
 vdev->vq[n].vring.avail = avail;
 vdev->vq[n].vring.used = used;
+virtio_init_region_cache(vdev, n);
 }
 
 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
@@ -1984,9 +2039,6 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int 
version_id)
 void virtio_cleanup(VirtIODevice *vdev)
 {
 qemu_del_vm_change_state_handler(vdev->vmstate);
-g_free(vdev->config);
-g_free(vdev->vq);
-g_free(vdev->vector_queues);
 }
 
 static void virtio_vmstate_change(void *opaque, int running, RunState state)
@@ -2248,6 +2300,19 @@ void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, 
const char *fmt, ...)
 }
 }
 
+static void virtio_memory_listener_commit(MemoryListener *listener)
+{
+VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
+int i;
+
+for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+if (vdev->vq[i].vring.num == 0) {
+break;
+}
+virtio_init_region_cache(vdev, i);
+}
+}
+
 static void virtio_device_realize(DeviceState *dev, Error **errp)
 {
 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
@@ -2270,6 +2335,9 @@ static void virtio_device_realize(DeviceState *dev, Error 
**errp)
 error_propagate(errp, err);
 return;
 }
+
+vdev->listener.commit = virtio_memory_listener_commit;
+memory_listener_register(&vdev->listener, vdev->dma_as);
 }
 
 static void virtio_device_unrealize(DeviceState *dev, Error **errp)
@@ -2292,6 +2360,36 @@ static void virtio_device_unrealize(DeviceState *dev,

[Qemu-devel] [PULL 00/23] virtio, pci: fixes, features

2017-02-17 Thread Michael S. Tsirkin

The following changes since commit ad584d37f2a86b392c25f3f00cc1f1532676c2d1:

  Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging 
(2017-02-16 17:46:52 +)

are available in the git repository at:

  git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream

for you to fetch changes up to 7e58326ad7e79b8c5dbcc6f24e9dc1523d84c11b:

  intel_iommu: vtd_slpt_level_shift check level (2017-02-17 21:52:31 +0200)


virtio, pci: fixes, features

virtio is using region caches for performance
iommu support for IOTLBs
misc fixes

Signed-off-by: Michael S. Tsirkin 


Aviv Ben-David (1):
  intel_iommu: add "caching-mode" option

Fam Zheng (1):
  virtio: Report real progress in VQ aio poll handler

Haozhong Zhang (1):
  docs: add document to explain the usage of vNVDIMM

Michael S. Tsirkin (2):
  pci/pcie: don't assume cap id 0 is reserved
  virtio: Fix no interrupt when not creating msi controller

Paolo Bonzini (9):
  memory: make memory_listener_unregister idempotent
  virtio: add virtio_*_phys_cached
  virtio: use address_space_map/unmap to access descriptors
  exec: make address_space_cache_destroy idempotent
  virtio: use MemoryRegionCache to access descriptors
  virtio: add MemoryListener to cache ring translations
  virtio: use VRingMemoryRegionCaches for descriptor ring
  virtio: check for vring setup in virtio_queue_update_used_idx
  virtio: use VRingMemoryRegionCaches for avail and used rings

Peter Xu (9):
  pcie: simplify pcie_add_capability()
  vfio: trace map/unmap for notify as well
  vfio: introduce vfio_get_vaddr()
  vfio: allow to notify unmap for very large region
  intel_iommu: simplify irq region translation
  intel_iommu: renaming gpa to iova where proper
  intel_iommu: convert dbg macros to traces for inv
  intel_iommu: convert dbg macros to trace for trans
  intel_iommu: vtd_slpt_level_shift check level

 docs/nvdimm.txt   | 124 +
 hw/i386/intel_iommu_internal.h|   1 +
 include/exec/memory.h |   2 +
 include/hw/i386/intel_iommu.h |   2 +
 include/hw/virtio/virtio-access.h |  52 ++
 include/hw/virtio/virtio-blk.h|   2 +-
 include/hw/virtio/virtio-scsi.h   |   6 +-
 include/hw/virtio/virtio.h|   5 +-
 exec.c|   1 +
 hw/block/dataplane/virtio-blk.c   |   4 +-
 hw/block/virtio-blk.c |  12 +-
 hw/i386/intel_iommu.c | 238 ++---
 hw/net/virtio-net.c   |  14 +-
 hw/pci/pcie.c |  23 +--
 hw/scsi/virtio-scsi-dataplane.c   |  14 +-
 hw/scsi/virtio-scsi.c |  14 +-
 hw/vfio/common.c  |  65 ---
 hw/virtio/virtio.c| 364 ++
 memory.c  |   5 +
 hw/i386/trace-events  |  28 +++
 hw/vfio/trace-events  |   2 +-
 21 files changed, 702 insertions(+), 276 deletions(-)
 create mode 100644 docs/nvdimm.txt

Re: [Qemu-devel] [PATCH v8 4/8] ACPI: Add Virtual Machine Generation ID support

2017-02-17 Thread Laszlo Ersek

On 02/17/17 17:03, Laszlo Ersek wrote:
> On 02/17/17 16:33, Ben Warren wrote:
>>
>>> On Feb 17, 2017, at 2:43 AM, Igor Mammedov >> > wrote:
>>>
>>> On Thu, 16 Feb 2017 15:15:36 -0800
>>> b...@skyportsystems.com  wrote:
>>>
 From: Ben Warren mailto:b...@skyportsystems.com>>

 This implements the VM Generation ID feature by passing a 128-bit
 GUID to the guest via a fw_cfg blob.
 Any time the GUID changes, an ACPI notify event is sent to the guest

 The user interface is a simple device with one parameter:
 - guid (string, must be "auto" or in UUID format
   ----)
>>> I've given it some testing with WS2012R2 and v4 patches for Seabios,
>>>
>>> Windows is able to read initial GUID allocation and writeback
>>> seems to work somehow:
>>>
>>> (qemu) info vm-generation-id
>>> c109c09b-0e8b-42d5-9b33-8409c9dcd16c
>>>
>>> vmgenid client in Windows reads it as 2 following 64bit integers:
>>> 42d50e8bc109c09b:6cd1dcc90984339b
>>>
>>> However update path/restore from snapshot doesn't
>>> here is as I've tested it:
>>>
>>> qemu-system-x86_64 -device vmgenid,id=testvgid,guid=auto -monitor stdio
>>> (qemu) info vm-generation-id
>>> c109c09b-0e8b-42d5-9b33-8409c9dcd16c
>>> (qemu) stop
>>> (qemu) migrate "exec:gzip -c > STATEFILE.gz"
>>> (qemu) quit
>>>
>>> qemu-system-x86_64 -device vmgenid,id=testvgid,guid=auto -monitor stdio
>>> -incoming "exec: gzip -c -d STATEFILE.gz"
>>> (qemu) info vm-generation-id
>>> 28b587fa-991b-4267-80d7-9cf28b746fe9
>>>
>>> guest
>>> 1. doesn't get GPE notification that it must receive
>>> 2. vmgenid client in Windows reads the same value
>>>  42d50e8bc109c09b:6cd1dcc90984339b
>>>
>> Strange, this was working for me, but with a slightly different test method:
>>
>>   * I use virsh save/restore
> 
> Awesome, this actually what I should try. All my guests are managed by
> libvirt (with the occasional , for development), and direct
> QEMU monitor commands such as
> 
>   virsh qemu-monitor-command ovmf.rhel7 --hmp 'info vm-generation-id'
> 
> only work for me if they are reasonably non-intrusive.
> 
>>   * While I do later testing with Windows, during development I use a
>> Linux kernel module I wrote that keeps track of GUID and
>> notifications.  I’m happy to share this with you if interested.
> 
> Please do. If you have a public git repo somewhere, that would be
> awesome. (Bonus points if the module builds out-of-tree, if the
> kernel-devel package is installed.)
> 
> NB: while the set-id monitor command was part of the series, I did test
> it to the extent that I checked the SCI ("ACPI interrupt") count in the
> guest, in /proc/interrupts. I did see it increase, so minimally the SCI
> injection was fine.

So, I did some testing with a RHEL-7 guest. I passed '-device
vmgenid=auto' to QEMU using the  element in the domain XML.

(1) I started the guest normally, and grepped /proc/interrupts for
"acpi". Zero interrupts on either VCPU.

(2) Dumped the guest RAM to a file with "virsh dump ... --memory-only",
opened it with crash, and listed the 16 GUID bytes at the offset that
the firmware (OVMF) reported at startup.

(3) cycled through "virsh managedsave" and "virsh start"

(4) grepped /proc/interrupts again for "acpi". One interrupt had been
delivered to one of the VCPUs, all others were zero.

(5) Repeated step (2). The bytes listed this time were different.

(6) Issued "virsh qemu-monitor-command ovmf.rhel7 --hmp 'info
vm-generation-id", and compared the output against the bytes dumped
(with crash) from guest memory, in step 5. They were a match.

So, to me it seems like the SCI is injected, and the memory contents are
changed.

---*---

Windows Server 2012 R2 test:

(7) booted the guest similarly with '-device vmgenid=auto' via
 in the domain XML.

(8) Initial check from the host side:

$ virsh qemu-monitor-command ovmf.win2012r2.q35 \
--hmp 'info vm-generation-id'
a3f7c334-7dc4-4694-8b8f-abf52abb072f

(9) Verifying the same from within, using Vadim's program (note: I
logged into the VM with ssh, using Cygwin's SSHD in the guest):

$ ./vmgenid.exe
VmCounterValue: 46947dc4a3f7c334:2f07bb2af5ab8f8b
0x34 0xc3 0xf7 0xa3 0xc4 0x7d 0x94 0x46 0x8b 0x8f 0xab 0xf5 0x2a 0xbb
0x07 0x2f

This is a match, so the initial setup works. (Look only at the raw byte
dump in the second line -- it matches the Little Endian UUID
representation as specified in the SMBIOS spec!)

(10) Logged out of the guest (with ssh), cycled through "virsh
managedsave" and "virsh start" for the domain, logged back in.

(11) in the guest:

$ ./vmgenid.exe
VmCounterValue: 4a12296b382162da:6c00d1a52699b7bd
0xda 0x62 0x21 0x38 0x6b 0x29 0x12 0x4a 0xbd 0xb7 0x99 0x26 0xa5 0xd1
0x00 0x6c

(12) on the host:

$ virsh qemu-monitor-command ovmf.win2012r2.q35 \
  --hmp 'info vm-generation-id'
382162da-296b-4a12-bdb7-9926a5d1006c

This is again a match. (Again, look only at the raw byte dump from
vm

[Qemu-devel] [PULL 15/23] vfio: trace map/unmap for notify as well

2017-02-17 Thread Michael S. Tsirkin

From: Peter Xu 

We traces its range, but we don't know whether it's a MAP/UNMAP. Let's
dump it as well.

Acked-by: Alex Williamson 
Reviewed-by: David Gibson 
Signed-off-by: Peter Xu 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/vfio/common.c | 3 ++-
 hw/vfio/trace-events | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 801578b..174f351 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -305,7 +305,8 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, 
IOMMUTLBEntry *iotlb)
 void *vaddr;
 int ret;
 
-trace_vfio_iommu_map_notify(iova, iova + iotlb->addr_mask);
+trace_vfio_iommu_map_notify(iotlb->perm == IOMMU_NONE ? "UNMAP" : "MAP",
+iova, iova + iotlb->addr_mask);
 
 if (iotlb->target_as != &address_space_memory) {
 error_report("Wrong target AS \"%s\", only system memory is allowed",
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 8de8281..2561c6d 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -84,7 +84,7 @@ vfio_pci_igd_lpc_bridge_enabled(const char *name) "%s"
 # hw/vfio/common.c
 vfio_region_write(const char *name, int index, uint64_t addr, uint64_t data, 
unsigned size) " (%s:region%d+0x%"PRIx64", 0x%"PRIx64 ", %d)"
 vfio_region_read(char *name, int index, uint64_t addr, unsigned size, uint64_t 
data) " (%s:region%d+0x%"PRIx64", %d) = 0x%"PRIx64
-vfio_iommu_map_notify(uint64_t iova_start, uint64_t iova_end) "iommu map @ 
%"PRIx64" - %"PRIx64
+vfio_iommu_map_notify(const char *op, uint64_t iova_start, uint64_t iova_end) 
"iommu %s @ %"PRIx64" - %"PRIx64
 vfio_listener_region_add_skip(uint64_t start, uint64_t end) "SKIPPING 
region_add %"PRIx64" - %"PRIx64
 vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add 
[iommu] %"PRIx64" - %"PRIx64
 vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void 
*vaddr) "region_add [ram] %"PRIx64" - %"PRIx64" [%p]"
-- 
MST

[Qemu-devel] [PATCH V2 2/7] cputlb: move get_page_addr_code

2017-02-17 Thread fred . konrad

From: KONRAD Frederic 

This just moves the code before VICTIM_TLB_HIT macro definition
so we can use it.

Signed-off-by: KONRAD Frederic 
---
 cputlb.c | 72 
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/cputlb.c b/cputlb.c
index 665caea..b3a5f47 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -452,42 +452,6 @@ static void report_bad_exec(CPUState *cpu, target_ulong 
addr)
 log_cpu_state_mask(LOG_GUEST_ERROR, cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 }
 
-/* NOTE: this function can trigger an exception */
-/* NOTE2: the returned address is not exactly the physical address: it
- * is actually a ram_addr_t (in system mode; the user mode emulation
- * version of this function returns a guest virtual address).
- */
-tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
-{
-int mmu_idx, index, pd;
-void *p;
-MemoryRegion *mr;
-CPUState *cpu = ENV_GET_CPU(env);
-CPUIOTLBEntry *iotlbentry;
-
-index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
-mmu_idx = cpu_mmu_index(env, true);
-if (unlikely(env->tlb_table[mmu_idx][index].addr_code !=
- (addr & TARGET_PAGE_MASK))) {
-cpu_ldub_code(env, addr);
-}
-iotlbentry = &env->iotlb[mmu_idx][index];
-pd = iotlbentry->addr & ~TARGET_PAGE_MASK;
-mr = iotlb_to_region(cpu, pd, iotlbentry->attrs);
-if (memory_region_is_unassigned(mr)) {
-CPUClass *cc = CPU_GET_CLASS(cpu);
-
-if (cc->do_unassigned_access) {
-cc->do_unassigned_access(cpu, addr, false, true, 0, 4);
-} else {
-report_bad_exec(cpu, addr);
-exit(1);
-}
-}
-p = (void *)((uintptr_t)addr + env->tlb_table[mmu_idx][index].addend);
-return qemu_ram_addr_from_host_nofail(p);
-}
-
 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
  target_ulong addr, uintptr_t retaddr, int size)
 {
@@ -554,6 +518,42 @@ static bool victim_tlb_hit(CPUArchState *env, size_t 
mmu_idx, size_t index,
   victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
  (ADDR) & TARGET_PAGE_MASK)
 
+/* NOTE: this function can trigger an exception */
+/* NOTE2: the returned address is not exactly the physical address: it
+ * is actually a ram_addr_t (in system mode; the user mode emulation
+ * version of this function returns a guest virtual address).
+ */
+tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
+{
+int mmu_idx, index, pd;
+void *p;
+MemoryRegion *mr;
+CPUState *cpu = ENV_GET_CPU(env);
+CPUIOTLBEntry *iotlbentry;
+
+index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+mmu_idx = cpu_mmu_index(env, true);
+if (unlikely(env->tlb_table[mmu_idx][index].addr_code !=
+ (addr & TARGET_PAGE_MASK))) {
+cpu_ldub_code(env, addr);
+}
+iotlbentry = &env->iotlb[mmu_idx][index];
+pd = iotlbentry->addr & ~TARGET_PAGE_MASK;
+mr = iotlb_to_region(cpu, pd, iotlbentry->attrs);
+if (memory_region_is_unassigned(mr)) {
+CPUClass *cc = CPU_GET_CLASS(cpu);
+
+if (cc->do_unassigned_access) {
+cc->do_unassigned_access(cpu, addr, false, true, 0, 4);
+} else {
+report_bad_exec(cpu, addr);
+exit(1);
+}
+}
+p = (void *)((uintptr_t)addr + env->tlb_table[mmu_idx][index].addend);
+return qemu_ram_addr_from_host_nofail(p);
+}
+
 /* Probe for whether the specified guest write access is permitted.
  * If it is not permitted then an exception will be taken in the same
  * way as if this were a real write access (and we will not return).
-- 
1.8.3.1

[Qemu-devel] [PULL 03/23] docs: add document to explain the usage of vNVDIMM

2017-02-17 Thread Michael S. Tsirkin

From: Haozhong Zhang 

Signed-off-by: Haozhong Zhang 
Reviewed-by: Xiao Guangrong 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 docs/nvdimm.txt | 124 
 1 file changed, 124 insertions(+)
 create mode 100644 docs/nvdimm.txt

diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt
new file mode 100644
index 000..2d9f8c0
--- /dev/null
+++ b/docs/nvdimm.txt
@@ -0,0 +1,124 @@
+QEMU Virtual NVDIMM
+===
+
+This document explains the usage of virtual NVDIMM (vNVDIMM) feature
+which is available since QEMU v2.6.0.
+
+The current QEMU only implements the persistent memory mode of vNVDIMM
+device and not the block window mode.
+
+Basic Usage
+---
+
+The storage of a vNVDIMM device in QEMU is provided by the memory
+backend (i.e. memory-backend-file and memory-backend-ram). A simple
+way to create a vNVDIMM device at startup time is done via the
+following command line options:
+
+ -machine pc,nvdimm
+ -m $RAM_SIZE,slots=$N,maxmem=$MAX_SIZE
+ -object memory-backend-file,id=mem1,share=on,mem-path=$PATH,size=$NVDIMM_SIZE
+ -device nvdimm,id=nvdimm1,memdev=mem1
+
+Where,
+
+ - the "nvdimm" machine option enables vNVDIMM feature.
+
+ - "slots=$N" should be equal to or larger than the total amount of
+   normal RAM devices and vNVDIMM devices, e.g. $N should be >= 2 here.
+
+ - "maxmem=$MAX_SIZE" should be equal to or larger than the total size
+   of normal RAM devices and vNVDIMM devices, e.g. $MAX_SIZE should be
+   >= $RAM_SIZE + $NVDIMM_SIZE here.
+
+ - "object 
memory-backend-file,id=mem1,share=on,mem-path=$PATH,size=$NVDIMM_SIZE"
+   creates a backend storage of size $NVDIMM_SIZE on a file $PATH. All
+   accesses to the virtual NVDIMM device go to the file $PATH.
+
+   "share=on/off" controls the visibility of guest writes. If
+   "share=on", then guest writes will be applied to the backend
+   file. If another guest uses the same backend file with option
+   "share=on", then above writes will be visible to it as well. If
+   "share=off", then guest writes won't be applied to the backend
+   file and thus will be invisible to other guests.
+
+ - "device nvdimm,id=nvdimm1,memdev=mem1" creates a virtual NVDIMM
+   device whose storage is provided by above memory backend device.
+
+Multiple vNVDIMM devices can be created if multiple pairs of "-object"
+and "-device" are provided.
+
+For above command line options, if the guest OS has the proper NVDIMM
+driver, it should be able to detect a NVDIMM device which is in the
+persistent memory mode and whose size is $NVDIMM_SIZE.
+
+Note:
+
+1. Prior to QEMU v2.8.0, if memory-backend-file is used and the actual
+   backend file size is not equal to the size given by "size" option,
+   QEMU will truncate the backend file by ftruncate(2), which will
+   corrupt the existing data in the backend file, especially for the
+   shrink case.
+
+   QEMU v2.8.0 and later check the backend file size and the "size"
+   option. If they do not match, QEMU will report errors and abort in
+   order to avoid the data corruption.
+
+2. QEMU v2.6.0 only puts a basic alignment requirement on the "size"
+   option of memory-backend-file, e.g. 4KB alignment on x86.  However,
+   QEMU v.2.7.0 puts an additional alignment requirement, which may
+   require a larger value than the basic one, e.g. 2MB on x86. This
+   change breaks the usage of memory-backend-file that only satisfies
+   the basic alignment.
+
+   QEMU v2.8.0 and later remove the additional alignment on non-s390x
+   architectures, so the broken memory-backend-file can work again.
+
+Label
+-
+
+QEMU v2.7.0 and later implement the label support for vNVDIMM devices.
+To enable label on vNVDIMM devices, users can simply add
+"label-size=$SZ" option to "-device nvdimm", e.g.
+
+ -device nvdimm,id=nvdimm1,memdev=mem1,label-size=128K
+
+Note:
+
+1. The minimal label size is 128KB.
+
+2. QEMU v2.7.0 and later store labels at the end of backend storage.
+   If a memory backend file, which was previously used as the backend
+   of a vNVDIMM device without labels, is now used for a vNVDIMM
+   device with label, the data in the label area at the end of file
+   will be inaccessible to the guest. If any useful data (e.g. the
+   meta-data of the file system) was stored there, the latter usage
+   may result guest data corruption (e.g. breakage of guest file
+   system).
+
+Hotplug
+---
+
+QEMU v2.8.0 and later implement the hotplug support for vNVDIMM
+devices. Similarly to the RAM hotplug, the vNVDIMM hotplug is
+accomplished by two monitor commands "object_add" and "device_add".
+
+For example, the following commands add another 4GB vNVDIMM device to
+the guest:
+
+ (qemu) object_add 
memory-backend-file,id=mem2,share=on,mem-path=new_nvdimm.img,size=4G
+ (qemu) device_add nvdimm,id=nvdimm2,memdev=mem2
+
+Note:
+
+1. Each hotplugged vNVDIMM device consumes one memory slot. Users
+   should always ensure th

[Qemu-devel] [PATCH V2 4/7] exec: allow to get a pointer for some mmio memory region

2017-02-17 Thread fred . konrad

From: KONRAD Frederic 

This introduces a special callback which allows to run code from some MMIO
devices.

SysBusDevice with a MemoryRegion which implements the request_ptr callback will
be notified when the guest try to execute code from their offset. Then it will
be able to eg: pre-load some code from an SPI device or ask a pointer from an
external simulator, etc..

When the pointer or the data in it are no longer valid the device has to
invalidate it.

Signed-off-by: KONRAD Frederic 

RFC -> V1:
  * Use mmio-interface instead of directly creating the subregion.
---
 cputlb.c  |  7 +++
 include/exec/memory.h | 35 +++
 memory.c  | 57 +++
 3 files changed, 99 insertions(+)

diff --git a/cputlb.c b/cputlb.c
index 846341e..9077247 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -545,6 +545,13 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, 
target_ulong addr)
 if (memory_region_is_unassigned(mr)) {
 CPUClass *cc = CPU_GET_CLASS(cpu);
 
+if (memory_region_request_mmio_ptr(mr, addr)) {
+/* A MemoryRegion is potentially added so re-run the
+ * get_page_addr_code.
+ */
+return get_page_addr_code(env, addr);
+}
+
 if (cc->do_unassigned_access) {
 cc->do_unassigned_access(cpu, addr, false, true, 0, 4);
 } else {
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 987f925..36b0eec 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -120,6 +120,15 @@ struct MemoryRegionOps {
 uint64_t data,
 unsigned size,
 MemTxAttrs attrs);
+/* Instruction execution pre-callback:
+ * @addr is the address of the access relative to the @mr.
+ * @size is the size of the area returned by the callback.
+ * @offset is the location of the pointer inside @mr.
+ *
+ * Returns a pointer to a location which contains guest code.
+ */
+void *(*request_ptr)(void *opaque, hwaddr addr, unsigned *size,
+ unsigned *offset);
 
 enum device_endian endianness;
 /* Guest-visible constraints: */
@@ -1253,6 +1262,32 @@ void memory_global_dirty_log_stop(void);
 void mtree_info(fprintf_function mon_printf, void *f, bool flatview);
 
 /**
+ * memory_region_request_mmio_ptr: request a pointer to an mmio
+ * MemoryRegion. If it is possible map a RAM MemoryRegion with this pointer.
+ * When the device wants to invalidate the pointer it will call
+ * memory_region_invalidate_mmio_ptr.
+ *
+ * @mr: #MemoryRegion to check
+ * @addr: address within that region
+ *
+ * Returns true on success, false otherwise.
+ */
+bool memory_region_request_mmio_ptr(MemoryRegion *mr, hwaddr addr);
+
+/**
+ * memory_region_invalidate_mmio_ptr: invalidate the pointer to an mmio
+ * previously requested.
+ * In the end that means that if something wants to execute from this area it
+ * will need to request the pointer again.
+ *
+ * @mr: #MemoryRegion associated to the pointer.
+ * @addr: address within that region
+ * @size: size of that area.
+ */
+void memory_region_invalidate_mmio_ptr(MemoryRegion *mr, hwaddr offset,
+   unsigned size);
+
+/**
  * memory_region_dispatch_read: perform a read directly to the specified
  * MemoryRegion.
  *
diff --git a/memory.c b/memory.c
index 6c58373..a605250 100644
--- a/memory.c
+++ b/memory.c
@@ -30,6 +30,8 @@
 #include "exec/ram_addr.h"
 #include "sysemu/kvm.h"
 #include "sysemu/sysemu.h"
+#include "hw/misc/mmio_interface.h"
+#include "hw/qdev-properties.h"
 
 //#define DEBUG_UNASSIGNED
 
@@ -2375,6 +2377,61 @@ void memory_listener_unregister(MemoryListener *listener)
 QTAILQ_REMOVE(&listener->address_space->listeners, listener, link_as);
 }
 
+bool memory_region_request_mmio_ptr(MemoryRegion *mr, hwaddr addr)
+{
+void *host;
+unsigned size = 0;
+unsigned offset = 0;
+Object *new_interface;
+
+if (!mr || !mr->ops->request_ptr) {
+return false;
+}
+
+/*
+ * Avoid an update if the request_ptr call
+ * memory_region_invalidate_mmio_ptr which seems to be likely when we use
+ * a cache.
+ */
+memory_region_transaction_begin();
+
+host = mr->ops->request_ptr(mr->opaque, addr - mr->addr, &size, &offset);
+
+if (!host || !size) {
+memory_region_transaction_commit();
+return false;
+}
+
+new_interface = object_new("mmio_interface");
+qdev_prop_set_uint64(DEVICE(new_interface), "start", offset);
+qdev_prop_set_uint64(DEVICE(new_interface), "end", offset + size - 1);
+qdev_prop_set_bit(DEVICE(new_interface), "ro", true);
+qdev_prop_set_ptr(DEVICE(new_interface), "host_ptr", host);
+qdev_prop_set_ptr(DEVICE(new_interface), "subregion", mr);
+object_property_set_bool(OBJECT(new_interface), true, "realize

[Qemu-devel] [PULL 17/23] vfio: allow to notify unmap for very large region

2017-02-17 Thread Michael S. Tsirkin

From: Peter Xu 

Linux vfio driver supports to do VFIO_IOMMU_UNMAP_DMA for a very big
region. This can be leveraged by QEMU IOMMU implementation to cleanup
existing page mappings for an entire iova address space (by notifying
with an IOTLB with extremely huge addr_mask). However current
vfio_iommu_map_notify() does not allow that. It make sure that all the
translated address in IOTLB is falling into RAM range.

The check makes sense, but it should only be a sensible checker for
mapping operations, and mean little for unmap operations.

This patch moves this check into map logic only, so that we'll get
faster unmap handling (no need to translate again), and also we can then
better support unmapping a very big region when it covers non-ram ranges
or even not-existing ranges.

Acked-by: Alex Williamson 
Signed-off-by: Peter Xu 
Reviewed-by: David Gibson 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/vfio/common.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 42c4790..f3ba9b9 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -352,11 +352,10 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, 
IOMMUTLBEntry *iotlb)
 
 rcu_read_lock();
 
-if (!vfio_get_vaddr(iotlb, &vaddr, &read_only)) {
-goto out;
-}
-
 if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
+if (!vfio_get_vaddr(iotlb, &vaddr, &read_only)) {
+goto out;
+}
 /*
  * vaddr is only valid until rcu_read_unlock(). But after
  * vfio_dma_map has set up the mapping the pages will be
-- 
MST

[Qemu-devel] [PATCH V2 3/7] cputlb: fix the way get_page_addr_code fills the tlb

2017-02-17 Thread fred . konrad

From: KONRAD Frederic 

get_page_addr_code(..) does a cpu_ldub_code to fill the tlb:
This can lead to some side effects if a device is mapped at this address.

So this patch replaces the cpu_memory_ld by a tlb_fill.

Signed-off-by: KONRAD Frederic 
---
 cputlb.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/cputlb.c b/cputlb.c
index b3a5f47..846341e 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -534,8 +534,10 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, 
target_ulong addr)
 index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
 mmu_idx = cpu_mmu_index(env, true);
 if (unlikely(env->tlb_table[mmu_idx][index].addr_code !=
- (addr & TARGET_PAGE_MASK))) {
-cpu_ldub_code(env, addr);
+ (addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK {
+if (!VICTIM_TLB_HIT(addr_read, addr)) {
+tlb_fill(ENV_GET_CPU(env), addr, MMU_INST_FETCH, mmu_idx, 0);
+}
 }
 iotlbentry = &env->iotlb[mmu_idx][index];
 pd = iotlbentry->addr & ~TARGET_PAGE_MASK;
-- 
1.8.3.1

[Qemu-devel] [PULL 04/23] memory: make memory_listener_unregister idempotent

2017-02-17 Thread Michael S. Tsirkin

From: Paolo Bonzini 

Make it easy to unregister a MemoryListener without tracking whether it
had been registered before.

Signed-off-by: Paolo Bonzini 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 memory.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/memory.c b/memory.c
index 6c58373..ed8b5aa 100644
--- a/memory.c
+++ b/memory.c
@@ -2371,8 +2371,13 @@ void memory_listener_register(MemoryListener *listener, 
AddressSpace *as)
 
 void memory_listener_unregister(MemoryListener *listener)
 {
+if (!listener->address_space) {
+return;
+}
+
 QTAILQ_REMOVE(&memory_listeners, listener, link);
 QTAILQ_REMOVE(&listener->address_space->listeners, listener, link_as);
+listener->address_space = NULL;
 }
 
 void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name)
-- 
MST

[Qemu-devel] [PULL 20/23] intel_iommu: renaming gpa to iova where proper

2017-02-17 Thread Michael S. Tsirkin

From: Peter Xu 

There are lots of places in current intel_iommu.c codes that named
"iova" as "gpa". It is really confusing to use a name "gpa" in these
places (which is very easily to be understood as "Guest Physical
Address", while it's not). To make the codes (much) easier to be read, I
decided to do this once and for all.

No functional change is made. Only literal ones.

Reviewed-by: Jason Wang 
Signed-off-by: Peter Xu 
Reviewed-by: David Gibson 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/i386/intel_iommu.c | 44 ++--
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 86d19bb..0c94b79 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -259,7 +259,7 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t 
source_id,
 uint64_t *key = g_malloc(sizeof(*key));
 uint64_t gfn = vtd_get_iotlb_gfn(addr, level);
 
-VTD_DPRINTF(CACHE, "update iotlb sid 0x%"PRIx16 " gpa 0x%"PRIx64
+VTD_DPRINTF(CACHE, "update iotlb sid 0x%"PRIx16 " iova 0x%"PRIx64
 " slpte 0x%"PRIx64 " did 0x%"PRIx16, source_id, addr, slpte,
 domain_id);
 if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) {
@@ -575,12 +575,12 @@ static uint64_t vtd_get_slpte(dma_addr_t base_addr, 
uint32_t index)
 return slpte;
 }
 
-/* Given a gpa and the level of paging structure, return the offset of current
- * level.
+/* Given an iova and the level of paging structure, return the offset
+ * of current level.
  */
-static inline uint32_t vtd_gpa_level_offset(uint64_t gpa, uint32_t level)
+static inline uint32_t vtd_iova_level_offset(uint64_t iova, uint32_t level)
 {
-return (gpa >> vtd_slpt_level_shift(level)) &
+return (iova >> vtd_slpt_level_shift(level)) &
 ((1ULL << VTD_SL_LEVEL_BITS) - 1);
 }
 
@@ -628,12 +628,12 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, 
uint32_t level)
 }
 }
 
-/* Given the @gpa, get relevant @slptep. @slpte_level will be the last level
+/* Given the @iova, get relevant @slptep. @slpte_level will be the last level
  * of the translation, can be used for deciding the size of large page.
  */
-static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, bool is_write,
-uint64_t *slptep, uint32_t *slpte_level,
-bool *reads, bool *writes)
+static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
+ uint64_t *slptep, uint32_t *slpte_level,
+ bool *reads, bool *writes)
 {
 dma_addr_t addr = vtd_get_slpt_base_from_context(ce);
 uint32_t level = vtd_get_level_from_context_entry(ce);
@@ -642,11 +642,11 @@ static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t 
gpa, bool is_write,
 uint32_t ce_agaw = vtd_get_agaw_from_context_entry(ce);
 uint64_t access_right_check;
 
-/* Check if @gpa is above 2^X-1, where X is the minimum of MGAW in CAP_REG
- * and AW in context-entry.
+/* Check if @iova is above 2^X-1, where X is the minimum of MGAW
+ * in CAP_REG and AW in context-entry.
  */
-if (gpa & ~((1ULL << MIN(ce_agaw, VTD_MGAW)) - 1)) {
-VTD_DPRINTF(GENERAL, "error: gpa 0x%"PRIx64 " exceeds limits", gpa);
+if (iova & ~((1ULL << MIN(ce_agaw, VTD_MGAW)) - 1)) {
+VTD_DPRINTF(GENERAL, "error: iova 0x%"PRIx64 " exceeds limits", iova);
 return -VTD_FR_ADDR_BEYOND_MGAW;
 }
 
@@ -654,13 +654,13 @@ static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t 
gpa, bool is_write,
 access_right_check = is_write ? VTD_SL_W : VTD_SL_R;
 
 while (true) {
-offset = vtd_gpa_level_offset(gpa, level);
+offset = vtd_iova_level_offset(iova, level);
 slpte = vtd_get_slpte(addr, offset);
 
 if (slpte == (uint64_t)-1) {
 VTD_DPRINTF(GENERAL, "error: fail to access second-level paging "
-"entry at level %"PRIu32 " for gpa 0x%"PRIx64,
-level, gpa);
+"entry at level %"PRIu32 " for iova 0x%"PRIx64,
+level, iova);
 if (level == vtd_get_level_from_context_entry(ce)) {
 /* Invalid programming of context-entry */
 return -VTD_FR_CONTEXT_ENTRY_INV;
@@ -672,8 +672,8 @@ static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t 
gpa, bool is_write,
 *writes = (*writes) && (slpte & VTD_SL_W);
 if (!(slpte & access_right_check)) {
 VTD_DPRINTF(GENERAL, "error: lack of %s permission for "
-"gpa 0x%"PRIx64 " slpte 0x%"PRIx64,
-(is_write ? "write" : "read"), gpa, slpte);
+"iova 0x%"PRIx64 " slpte 0x%"PRIx64,
+(is_write ? "write" : "read"), iova, slpte);
 return is_write ? -VTD_FR_WRITE : -VTD_FR_READ;
 }

[Qemu-devel] [PATCH V2 7/7] xilinx_spips: allow mmio execution

2017-02-17 Thread fred . konrad

From: KONRAD Frederic 

This allows to execute from the lqspi area.

When the request_ptr is called the device loads 1024bytes from the SPI device.
Then this code can be executed by the guest.

Signed-off-by: KONRAD Frederic 
---
 hw/ssi/xilinx_spips.c | 74 ++-
 1 file changed, 55 insertions(+), 19 deletions(-)

diff --git a/hw/ssi/xilinx_spips.c b/hw/ssi/xilinx_spips.c
index da8adfa..e833028 100644
--- a/hw/ssi/xilinx_spips.c
+++ b/hw/ssi/xilinx_spips.c
@@ -496,6 +496,18 @@ static const MemoryRegionOps spips_ops = {
 .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
+static void xilinx_qspips_invalidate_mmio_ptr(XilinxQSPIPS *q)
+{
+XilinxSPIPS *s = &q->parent_obj;
+
+if (q->lqspi_cached_addr != ~0ULL) {
+/* Invalidate the current mapped mmio */
+memory_region_invalidate_mmio_ptr(&s->mmlqspi, q->lqspi_cached_addr,
+  LQSPI_CACHE_SIZE);
+q->lqspi_cached_addr = ~0ULL;
+}
+}
+
 static void xilinx_qspips_write(void *opaque, hwaddr addr,
 uint64_t value, unsigned size)
 {
@@ -505,7 +517,7 @@ static void xilinx_qspips_write(void *opaque, hwaddr addr,
 addr >>= 2;
 
 if (addr == R_LQSPI_CFG) {
-q->lqspi_cached_addr = ~0ULL;
+xilinx_qspips_invalidate_mmio_ptr(q);
 }
 }
 
@@ -517,27 +529,20 @@ static const MemoryRegionOps qspips_ops = {
 
 #define LQSPI_CACHE_SIZE 1024
 
-static uint64_t
-lqspi_read(void *opaque, hwaddr addr, unsigned int size)
+static void lqspi_load_cache(void *opaque, hwaddr addr)
 {
-int i;
 XilinxQSPIPS *q = opaque;
 XilinxSPIPS *s = opaque;
-uint32_t ret;
-
-if (addr >= q->lqspi_cached_addr &&
-addr <= q->lqspi_cached_addr + LQSPI_CACHE_SIZE - 4) {
-uint8_t *retp = &q->lqspi_buf[addr - q->lqspi_cached_addr];
-ret = cpu_to_le32(*(uint32_t *)retp);
-DB_PRINT_L(1, "addr: %08x, data: %08x\n", (unsigned)addr,
-   (unsigned)ret);
-return ret;
-} else {
-int flash_addr = (addr / num_effective_busses(s));
-int slave = flash_addr >> LQSPI_ADDRESS_BITS;
-int cache_entry = 0;
-uint32_t u_page_save = s->regs[R_LQSPI_STS] & ~LQSPI_CFG_U_PAGE;
-
+int i;
+int flash_addr = ((addr & ~(LQSPI_CACHE_SIZE - 1))
+   / num_effective_busses(s));
+int slave = flash_addr >> LQSPI_ADDRESS_BITS;
+int cache_entry = 0;
+uint32_t u_page_save = s->regs[R_LQSPI_STS] & ~LQSPI_CFG_U_PAGE;
+
+if (addr < q->lqspi_cached_addr ||
+addr > q->lqspi_cached_addr + LQSPI_CACHE_SIZE - 4) {
+xilinx_qspips_invalidate_mmio_ptr(q);
 s->regs[R_LQSPI_STS] &= ~LQSPI_CFG_U_PAGE;
 s->regs[R_LQSPI_STS] |= slave ? LQSPI_CFG_U_PAGE : 0;
 
@@ -589,12 +594,43 @@ lqspi_read(void *opaque, hwaddr addr, unsigned int size)
 xilinx_spips_update_cs_lines(s);
 
 q->lqspi_cached_addr = flash_addr * num_effective_busses(s);
+}
+}
+
+static void *lqspi_request_mmio_ptr(void *opaque, hwaddr addr, unsigned *size,
+unsigned *offset)
+{
+XilinxQSPIPS *q = opaque;
+hwaddr offset_within_the_region = addr & ~(LQSPI_CACHE_SIZE - 1);
+
+lqspi_load_cache(opaque, offset_within_the_region);
+*size = LQSPI_CACHE_SIZE;
+*offset = offset_within_the_region;
+return q->lqspi_buf;
+}
+
+static uint64_t
+lqspi_read(void *opaque, hwaddr addr, unsigned int size)
+{
+XilinxQSPIPS *q = opaque;
+uint32_t ret;
+
+if (addr >= q->lqspi_cached_addr &&
+addr <= q->lqspi_cached_addr + LQSPI_CACHE_SIZE - 4) {
+uint8_t *retp = &q->lqspi_buf[addr - q->lqspi_cached_addr];
+ret = cpu_to_le32(*(uint32_t *)retp);
+DB_PRINT_L(1, "addr: %08x, data: %08x\n", (unsigned)addr,
+   (unsigned)ret);
+return ret;
+} else {
+lqspi_load_cache(opaque, addr);
 return lqspi_read(opaque, addr, size);
 }
 }
 
 static const MemoryRegionOps lqspi_ops = {
 .read = lqspi_read,
+.request_ptr = lqspi_request_mmio_ptr,
 .endianness = DEVICE_NATIVE_ENDIAN,
 .valid = {
 .min_access_size = 1,
-- 
1.8.3.1

[Qemu-devel] [PULL 06/23] virtio: use address_space_map/unmap to access descriptors

2017-02-17 Thread Michael S. Tsirkin

From: Paolo Bonzini 

This makes little difference, but it makes the code change smaller
for the next patch that introduces MemoryRegionCache.  This is
because map/unmap are similar to MemoryRegionCache init/destroy.

Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Paolo Bonzini 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/virtio/virtio.c | 103 ++---
 1 file changed, 75 insertions(+), 28 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 2461c06..6ce6a26 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -120,10 +120,9 @@ void virtio_queue_update_rings(VirtIODevice *vdev, int n)
 }
 
 static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
-hwaddr desc_pa, int i)
+uint8_t *desc_ptr, int i)
 {
-address_space_read(vdev->dma_as, desc_pa + i * sizeof(VRingDesc),
-   MEMTXATTRS_UNSPECIFIED, (void *)desc, 
sizeof(VRingDesc));
+memcpy(desc, desc_ptr + i * sizeof(VRingDesc), sizeof(VRingDesc));
 virtio_tswap64s(vdev, &desc->addr);
 virtio_tswap32s(vdev, &desc->len);
 virtio_tswap16s(vdev, &desc->flags);
@@ -408,7 +407,7 @@ enum {
 };
 
 static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
-hwaddr desc_pa, unsigned int max,
+void *desc_ptr, unsigned int max,
 unsigned int *next)
 {
 /* If this descriptor says it doesn't chain, we're done. */
@@ -426,7 +425,7 @@ static int virtqueue_read_next_desc(VirtIODevice *vdev, 
VRingDesc *desc,
 return VIRTQUEUE_READ_DESC_ERROR;
 }
 
-vring_desc_read(vdev, desc, desc_pa, *next);
+vring_desc_read(vdev, desc, desc_ptr, *next);
 return VIRTQUEUE_READ_DESC_MORE;
 }
 
@@ -434,31 +433,41 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned 
int *in_bytes,
unsigned int *out_bytes,
unsigned max_in_bytes, unsigned max_out_bytes)
 {
-unsigned int idx;
+VirtIODevice *vdev = vq->vdev;
+unsigned int max, idx;
 unsigned int total_bufs, in_total, out_total;
+void *vring_desc_ptr;
+void *indirect_desc_ptr = NULL;
+hwaddr len = 0;
 int rc;
 
 idx = vq->last_avail_idx;
-
 total_bufs = in_total = out_total = 0;
+
+max = vq->vring.num;
+len = max * sizeof(VRingDesc);
+vring_desc_ptr = address_space_map(vdev->dma_as, vq->vring.desc, &len, 
false);
+if (len < max * sizeof(VRingDesc)) {
+virtio_error(vdev, "Cannot map descriptor ring");
+goto err;
+}
+
 while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
-VirtIODevice *vdev = vq->vdev;
-unsigned int max, num_bufs, indirect = 0;
+void *desc_ptr = vring_desc_ptr;
+unsigned int num_bufs;
 VRingDesc desc;
-hwaddr desc_pa;
 unsigned int i;
 
-max = vq->vring.num;
 num_bufs = total_bufs;
 
 if (!virtqueue_get_head(vq, idx++, &i)) {
 goto err;
 }
 
-desc_pa = vq->vring.desc;
-vring_desc_read(vdev, &desc, desc_pa, i);
+vring_desc_read(vdev, &desc, desc_ptr, i);
 
 if (desc.flags & VRING_DESC_F_INDIRECT) {
+len = desc.len;
 if (desc.len % sizeof(VRingDesc)) {
 virtio_error(vdev, "Invalid size for indirect buffer table");
 goto err;
@@ -471,11 +480,17 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned 
int *in_bytes,
 }
 
 /* loop over the indirect descriptor table */
-indirect = 1;
+indirect_desc_ptr = address_space_map(vdev->dma_as, desc.addr,
+  &len, false);
+desc_ptr = indirect_desc_ptr;
+if (len < desc.len) {
+virtio_error(vdev, "Cannot map indirect buffer");
+goto err;
+}
+
 max = desc.len / sizeof(VRingDesc);
-desc_pa = desc.addr;
 num_bufs = i = 0;
-vring_desc_read(vdev, &desc, desc_pa, i);
+vring_desc_read(vdev, &desc, desc_ptr, i);
 }
 
 do {
@@ -494,17 +509,20 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned 
int *in_bytes,
 goto done;
 }
 
-rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i);
+rc = virtqueue_read_next_desc(vdev, &desc, desc_ptr, max, &i);
 } while (rc == VIRTQUEUE_READ_DESC_MORE);
 
 if (rc == VIRTQUEUE_READ_DESC_ERROR) {
 goto err;
 }
 
-if (!indirect)
-total_bufs = num_bufs;
-else
+if (desc_ptr == indirect_desc_ptr) {
+address_space_unmap(vdev->dma_as, desc_ptr, len, false, 0);
+indirect_desc_ptr = NULL;
 total_bufs++;

[Qemu-devel] [PULL 08/23] virtio: use MemoryRegionCache to access descriptors

2017-02-17 Thread Michael S. Tsirkin

From: Paolo Bonzini 

For now, the cache is created on every virtqueue_pop.  Later on,
direct descriptors will be able to reuse it.

Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Paolo Bonzini 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/exec/memory.h |  2 ++
 hw/virtio/virtio.c| 80 +--
 2 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 987f925..6911023 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -1426,6 +1426,8 @@ struct MemoryRegionCache {
 bool is_write;
 };
 
+#define MEMORY_REGION_CACHE_INVALID ((MemoryRegionCache) { .mr = NULL })
+
 /* address_space_cache_init: prepare for repeated access to a physical
  * memory region
  *
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 6ce6a26..71e41f6 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -120,9 +120,10 @@ void virtio_queue_update_rings(VirtIODevice *vdev, int n)
 }
 
 static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
-uint8_t *desc_ptr, int i)
+MemoryRegionCache *cache, int i)
 {
-memcpy(desc, desc_ptr + i * sizeof(VRingDesc), sizeof(VRingDesc));
+address_space_read_cached(cache, i * sizeof(VRingDesc),
+  desc, sizeof(VRingDesc));
 virtio_tswap64s(vdev, &desc->addr);
 virtio_tswap32s(vdev, &desc->len);
 virtio_tswap16s(vdev, &desc->flags);
@@ -407,7 +408,7 @@ enum {
 };
 
 static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
-void *desc_ptr, unsigned int max,
+MemoryRegionCache *desc_cache, unsigned 
int max,
 unsigned int *next)
 {
 /* If this descriptor says it doesn't chain, we're done. */
@@ -425,7 +426,7 @@ static int virtqueue_read_next_desc(VirtIODevice *vdev, 
VRingDesc *desc,
 return VIRTQUEUE_READ_DESC_ERROR;
 }
 
-vring_desc_read(vdev, desc, desc_ptr, *next);
+vring_desc_read(vdev, desc, desc_cache, *next);
 return VIRTQUEUE_READ_DESC_MORE;
 }
 
@@ -436,24 +437,25 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned 
int *in_bytes,
 VirtIODevice *vdev = vq->vdev;
 unsigned int max, idx;
 unsigned int total_bufs, in_total, out_total;
-void *vring_desc_ptr;
-void *indirect_desc_ptr = NULL;
-hwaddr len = 0;
+MemoryRegionCache vring_desc_cache;
+MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
+int64_t len = 0;
 int rc;
 
 idx = vq->last_avail_idx;
 total_bufs = in_total = out_total = 0;
 
 max = vq->vring.num;
-len = max * sizeof(VRingDesc);
-vring_desc_ptr = address_space_map(vdev->dma_as, vq->vring.desc, &len, 
false);
+len = address_space_cache_init(&vring_desc_cache, vdev->dma_as,
+   vq->vring.desc, max * sizeof(VRingDesc),
+   false);
 if (len < max * sizeof(VRingDesc)) {
 virtio_error(vdev, "Cannot map descriptor ring");
 goto err;
 }
 
 while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
-void *desc_ptr = vring_desc_ptr;
+MemoryRegionCache *desc_cache = &vring_desc_cache;
 unsigned int num_bufs;
 VRingDesc desc;
 unsigned int i;
@@ -464,10 +466,9 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int 
*in_bytes,
 goto err;
 }
 
-vring_desc_read(vdev, &desc, desc_ptr, i);
+vring_desc_read(vdev, &desc, desc_cache, i);
 
 if (desc.flags & VRING_DESC_F_INDIRECT) {
-len = desc.len;
 if (desc.len % sizeof(VRingDesc)) {
 virtio_error(vdev, "Invalid size for indirect buffer table");
 goto err;
@@ -480,9 +481,10 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int 
*in_bytes,
 }
 
 /* loop over the indirect descriptor table */
-indirect_desc_ptr = address_space_map(vdev->dma_as, desc.addr,
-  &len, false);
-desc_ptr = indirect_desc_ptr;
+len = address_space_cache_init(&indirect_desc_cache,
+   vdev->dma_as,
+   desc.addr, desc.len, false);
+desc_cache = &indirect_desc_cache;
 if (len < desc.len) {
 virtio_error(vdev, "Cannot map indirect buffer");
 goto err;
@@ -490,7 +492,7 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int 
*in_bytes,
 
 max = desc.len / sizeof(VRingDesc);
 num_bufs = i = 0;
-vring_desc_read(vdev, &desc, desc_ptr, i);
+vring_desc_read(vdev, &desc, desc_cache, i);
 }
 
 do {
@@ -509,16 +511,15 @@ void virtqueue_get_av

[Qemu-devel] [PATCH] tests: Use error_free_or_abort() where appropriate

2017-02-17 Thread Markus Armbruster

Done with this Coccinelle semantic patch:

@@
expression E;
@@
-g_assert(E);
-error_free(E);
+error_free_or_abort(&E);

Signed-off-by: Markus Armbruster 
---
 tests/test-qemu-opts.c  | 3 +--
 tests/test-qobject-output-visitor.c | 6 ++
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/tests/test-qemu-opts.c b/tests/test-qemu-opts.c
index a505a3e..d08e8a6 100644
--- a/tests/test-qemu-opts.c
+++ b/tests/test-qemu-opts.c
@@ -79,8 +79,7 @@ static void test_find_unknown_opts(void)
 /* should not return anything, we don't have an "unknown" option */
 list = qemu_find_opts_err("unknown", &err);
 g_assert(list == NULL);
-g_assert(err);
-error_free(err);
+error_free_or_abort(&err);
 }
 
 static void test_qemu_find_opts(void)
diff --git a/tests/test-qobject-output-visitor.c 
b/tests/test-qobject-output-visitor.c
index 4e2d79c..58019e2 100644
--- a/tests/test-qobject-output-visitor.c
+++ b/tests/test-qobject-output-visitor.c
@@ -146,8 +146,7 @@ static void 
test_visitor_out_enum_errors(TestOutputVisitorData *data,
 for (i = 0; i < ARRAY_SIZE(bad_values) ; i++) {
 err = NULL;
 visit_type_EnumOne(data->ov, "unused", &bad_values[i], &err);
-g_assert(err);
-error_free(err);
+error_free_or_abort(&err);
 visitor_reset(data);
 }
 }
@@ -251,8 +250,7 @@ static void 
test_visitor_out_struct_errors(TestOutputVisitorData *data,
 u.has_enum1 = true;
 u.enum1 = bad_values[i];
 visit_type_UserDefOne(data->ov, "unused", &pu, &err);
-g_assert(err);
-error_free(err);
+error_free_or_abort(&err);
 visitor_reset(data);
 }
 }
-- 
2.7.4

[Qemu-devel] [PATCH V2 5/7] qdev: add MemoryRegion property

2017-02-17 Thread fred . konrad

From: KONRAD Frederic 

We need to pass a pointer to a MemoryRegion for mmio_interface.
So this just adds that.

Signed-off-by: KONRAD Frederic 
---
 include/hw/qdev-properties.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h
index 7ac3153..babb258 100644
--- a/include/hw/qdev-properties.h
+++ b/include/hw/qdev-properties.h
@@ -171,6 +171,8 @@ extern PropertyInfo qdev_prop_arraylen;
 DEFINE_PROP_DEFAULT(_n, _s, _f, 0, qdev_prop_blocksize, uint16_t)
 #define DEFINE_PROP_PCI_HOST_DEVADDR(_n, _s, _f) \
 DEFINE_PROP(_n, _s, _f, qdev_prop_pci_host_devaddr, PCIHostDeviceAddress)
+#define DEFINE_PROP_MEMORY_REGION(_n, _s, _f) \
+DEFINE_PROP(_n, _s, _f, qdev_prop_ptr, MemoryRegion *)
 
 #define DEFINE_PROP_END_OF_LIST()   \
 {}
-- 
1.8.3.1

[Qemu-devel] [PULL 10/23] virtio: use VRingMemoryRegionCaches for descriptor ring

2017-02-17 Thread Michael S. Tsirkin

From: Paolo Bonzini 

Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Paolo Bonzini 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/virtio/virtio.c | 26 --
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index b75cb52..d62509d 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -491,25 +491,24 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned 
int *in_bytes,
 VirtIODevice *vdev = vq->vdev;
 unsigned int max, idx;
 unsigned int total_bufs, in_total, out_total;
-MemoryRegionCache vring_desc_cache;
+VRingMemoryRegionCaches *caches;
 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
 int64_t len = 0;
 int rc;
 
+rcu_read_lock();
 idx = vq->last_avail_idx;
 total_bufs = in_total = out_total = 0;
 
 max = vq->vring.num;
-len = address_space_cache_init(&vring_desc_cache, vdev->dma_as,
-   vq->vring.desc, max * sizeof(VRingDesc),
-   false);
-if (len < max * sizeof(VRingDesc)) {
+caches = atomic_rcu_read(&vq->vring.caches);
+if (caches->desc.len < max * sizeof(VRingDesc)) {
 virtio_error(vdev, "Cannot map descriptor ring");
 goto err;
 }
 
 while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
-MemoryRegionCache *desc_cache = &vring_desc_cache;
+MemoryRegionCache *desc_cache = &caches->desc;
 unsigned int num_bufs;
 VRingDesc desc;
 unsigned int i;
@@ -586,13 +585,13 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned 
int *in_bytes,
 
 done:
 address_space_cache_destroy(&indirect_desc_cache);
-address_space_cache_destroy(&vring_desc_cache);
 if (in_bytes) {
 *in_bytes = in_total;
 }
 if (out_bytes) {
 *out_bytes = out_total;
 }
+rcu_read_unlock();
 return;
 
 err:
@@ -726,7 +725,7 @@ static void *virtqueue_alloc_element(size_t sz, unsigned 
out_num, unsigned in_nu
 void *virtqueue_pop(VirtQueue *vq, size_t sz)
 {
 unsigned int i, head, max;
-MemoryRegionCache vring_desc_cache;
+VRingMemoryRegionCaches *caches;
 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
 MemoryRegionCache *desc_cache;
 int64_t len;
@@ -768,15 +767,14 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
 
 i = head;
 
-len = address_space_cache_init(&vring_desc_cache, vdev->dma_as,
-   vq->vring.desc, max * sizeof(VRingDesc),
-   false);
-if (len < max * sizeof(VRingDesc)) {
+rcu_read_lock();
+caches = atomic_rcu_read(&vq->vring.caches);
+if (caches->desc.len < max * sizeof(VRingDesc)) {
 virtio_error(vdev, "Cannot map descriptor ring");
 goto done;
 }
 
-desc_cache = &vring_desc_cache;
+desc_cache = &caches->desc;
 vring_desc_read(vdev, &desc, desc_cache, i);
 if (desc.flags & VRING_DESC_F_INDIRECT) {
 if (desc.len % sizeof(VRingDesc)) {
@@ -850,7 +848,7 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
 trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
 done:
 address_space_cache_destroy(&indirect_desc_cache);
-address_space_cache_destroy(&vring_desc_cache);
+rcu_read_unlock();
 
 return elem;
 
-- 
MST

[Qemu-devel] [PULL 14/23] pcie: simplify pcie_add_capability()

2017-02-17 Thread Michael S. Tsirkin

From: Peter Xu 

When we add PCIe extended capabilities, we should be following the rule
that we add the head extended cap (at offset 0x100) first, then the rest
of them. Meanwhile, we are always adding new capability bits at the end
of the list. Here the "next" looks meaningless in all cases since it
should always be zero (along with the "header").

Simplify the function a bit, and it looks more readable now.

Signed-off-by: Peter Xu 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/pci/pcie.c | 14 +++---
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index f4dd177..fc54bfd 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -665,32 +665,24 @@ void pcie_add_capability(PCIDevice *dev,
  uint16_t cap_id, uint8_t cap_ver,
  uint16_t offset, uint16_t size)
 {
-uint32_t header;
-uint16_t next;
-
 assert(offset >= PCI_CONFIG_SPACE_SIZE);
 assert(offset < offset + size);
 assert(offset + size <= PCIE_CONFIG_SPACE_SIZE);
 assert(size >= 8);
 assert(pci_is_express(dev));
 
-if (offset == PCI_CONFIG_SPACE_SIZE) {
-header = pci_get_long(dev->config + offset);
-next = PCI_EXT_CAP_NEXT(header);
-} else {
+if (offset != PCI_CONFIG_SPACE_SIZE) {
 uint16_t prev;
 
 /*
  * 0x is not a valid cap id (it's a 16 bit field). use
  * internally to find the last capability in the linked list.
  */
-next = pcie_find_capability_list(dev, 0x, &prev);
-
+pcie_find_capability_list(dev, 0x, &prev);
 assert(prev >= PCI_CONFIG_SPACE_SIZE);
-assert(next == 0);
 pcie_ext_cap_set_next(dev, prev, offset);
 }
-pci_set_long(dev->config + offset, PCI_EXT_CAP(cap_id, cap_ver, next));
+pci_set_long(dev->config + offset, PCI_EXT_CAP(cap_id, cap_ver, 0));
 
 /* Make capability read-only by default */
 memset(dev->wmask + offset, 0, size);
-- 
MST

[Qemu-devel] [PATCH V2 1/7] cputlb: cleanup get_page_addr_code to use VICTIM_TLB_HIT

2017-02-17 Thread fred . konrad

From: KONRAD Frederic 

This replaces env1 and page_index variables by env and index
so we can use VICTIM_TLB_HIT macro later.

Signed-off-by: KONRAD Frederic 
---
 cputlb.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/cputlb.c b/cputlb.c
index 6c39927..665caea 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -457,21 +457,21 @@ static void report_bad_exec(CPUState *cpu, target_ulong 
addr)
  * is actually a ram_addr_t (in system mode; the user mode emulation
  * version of this function returns a guest virtual address).
  */
-tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
+tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
 {
-int mmu_idx, page_index, pd;
+int mmu_idx, index, pd;
 void *p;
 MemoryRegion *mr;
-CPUState *cpu = ENV_GET_CPU(env1);
+CPUState *cpu = ENV_GET_CPU(env);
 CPUIOTLBEntry *iotlbentry;
 
-page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
-mmu_idx = cpu_mmu_index(env1, true);
-if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
+index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+mmu_idx = cpu_mmu_index(env, true);
+if (unlikely(env->tlb_table[mmu_idx][index].addr_code !=
  (addr & TARGET_PAGE_MASK))) {
-cpu_ldub_code(env1, addr);
+cpu_ldub_code(env, addr);
 }
-iotlbentry = &env1->iotlb[mmu_idx][page_index];
+iotlbentry = &env->iotlb[mmu_idx][index];
 pd = iotlbentry->addr & ~TARGET_PAGE_MASK;
 mr = iotlb_to_region(cpu, pd, iotlbentry->attrs);
 if (memory_region_is_unassigned(mr)) {
@@ -484,7 +484,7 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, 
target_ulong addr)
 exit(1);
 }
 }
-p = (void *)((uintptr_t)addr + 
env1->tlb_table[mmu_idx][page_index].addend);
+p = (void *)((uintptr_t)addr + env->tlb_table[mmu_idx][index].addend);
 return qemu_ram_addr_from_host_nofail(p);
 }
 
-- 
1.8.3.1

< 1 2 3 >

101 - 200 of 264 matches

Mail list logo